1 diff -NurpP --minimal linux-4.9.217/arch/alpha/Kconfig linux-4.9.217-vs2.3.9.12/arch/alpha/Kconfig
2 --- linux-4.9.217/arch/alpha/Kconfig 2016-12-11 19:17:54.000000000 +0000
3 +++ linux-4.9.217-vs2.3.9.12/arch/alpha/Kconfig 2018-10-20 04:58:12.000000000 +0000
4 @@ -743,6 +743,8 @@ config DUMMY_CONSOLE
8 +source "kernel/vserver/Kconfig"
10 source "security/Kconfig"
12 source "crypto/Kconfig"
13 diff -NurpP --minimal linux-4.9.217/arch/alpha/kernel/systbls.S linux-4.9.217-vs2.3.9.12/arch/alpha/kernel/systbls.S
14 --- linux-4.9.217/arch/alpha/kernel/systbls.S 2016-12-11 19:17:54.000000000 +0000
15 +++ linux-4.9.217-vs2.3.9.12/arch/alpha/kernel/systbls.S 2018-10-20 04:58:12.000000000 +0000
16 @@ -446,7 +446,7 @@ sys_call_table:
17 .quad sys_stat64 /* 425 */
20 - .quad sys_ni_syscall /* sys_vserver */
21 + .quad sys_vserver /* sys_vserver */
22 .quad sys_ni_syscall /* sys_mbind */
23 .quad sys_ni_syscall /* sys_get_mempolicy */
24 .quad sys_ni_syscall /* sys_set_mempolicy */
25 diff -NurpP --minimal linux-4.9.217/arch/alpha/kernel/traps.c linux-4.9.217-vs2.3.9.12/arch/alpha/kernel/traps.c
26 --- linux-4.9.217/arch/alpha/kernel/traps.c 2020-03-27 00:50:40.331132545 +0000
27 +++ linux-4.9.217-vs2.3.9.12/arch/alpha/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
28 @@ -179,7 +179,8 @@ die_if_kernel(char * str, struct pt_regs
30 printk("CPU %d ", hard_smp_processor_id());
32 - printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
33 + printk("%s(%d:#%u): %s %ld\n", current->comm,
34 + task_pid_nr(current), current->xid, str, err);
35 dik_show_regs(regs, r9_15);
36 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
37 dik_show_trace((unsigned long *)(regs+1));
38 diff -NurpP --minimal linux-4.9.217/arch/arm/Kconfig linux-4.9.217-vs2.3.9.12/arch/arm/Kconfig
39 --- linux-4.9.217/arch/arm/Kconfig 2020-03-27 00:50:40.441130813 +0000
40 +++ linux-4.9.217-vs2.3.9.12/arch/arm/Kconfig 2020-04-01 09:39:57.315984093 +0000
41 @@ -2200,6 +2200,8 @@ source "fs/Kconfig"
43 source "arch/arm/Kconfig.debug"
45 +source "kernel/vserver/Kconfig"
47 source "security/Kconfig"
49 source "crypto/Kconfig"
50 diff -NurpP --minimal linux-4.9.217/arch/arm/kernel/calls.S linux-4.9.217-vs2.3.9.12/arch/arm/kernel/calls.S
51 --- linux-4.9.217/arch/arm/kernel/calls.S 2016-12-11 19:17:54.000000000 +0000
52 +++ linux-4.9.217-vs2.3.9.12/arch/arm/kernel/calls.S 2018-10-20 04:58:12.000000000 +0000
54 /* 310 */ CALL(sys_request_key)
56 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
57 -/* vserver */ CALL(sys_ni_syscall)
60 /* 315 */ CALL(sys_ioprio_get)
61 CALL(sys_inotify_init)
62 diff -NurpP --minimal linux-4.9.217/arch/arm/kernel/traps.c linux-4.9.217-vs2.3.9.12/arch/arm/kernel/traps.c
63 --- linux-4.9.217/arch/arm/kernel/traps.c 2020-03-27 00:50:40.901123578 +0000
64 +++ linux-4.9.217-vs2.3.9.12/arch/arm/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
65 @@ -279,8 +279,8 @@ static int __die(const char *str, int er
69 - pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
70 - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
71 + pr_emerg("Process %.*s (pid: %d:%u, stack limit = 0x%p)\n",
72 + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, end_of_stack(tsk));
74 if (!user_mode(regs) || in_interrupt()) {
75 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
76 diff -NurpP --minimal linux-4.9.217/arch/cris/Kconfig linux-4.9.217-vs2.3.9.12/arch/cris/Kconfig
77 --- linux-4.9.217/arch/cris/Kconfig 2016-12-11 19:17:54.000000000 +0000
78 +++ linux-4.9.217-vs2.3.9.12/arch/cris/Kconfig 2018-10-20 04:58:12.000000000 +0000
79 @@ -583,6 +583,8 @@ source "fs/Kconfig"
81 source "arch/cris/Kconfig.debug"
83 +source "kernel/vserver/Kconfig"
85 source "security/Kconfig"
87 source "crypto/Kconfig"
88 diff -NurpP --minimal linux-4.9.217/arch/ia64/Kconfig linux-4.9.217-vs2.3.9.12/arch/ia64/Kconfig
89 --- linux-4.9.217/arch/ia64/Kconfig 2016-12-11 19:17:54.000000000 +0000
90 +++ linux-4.9.217-vs2.3.9.12/arch/ia64/Kconfig 2018-10-20 04:58:12.000000000 +0000
91 @@ -602,6 +602,8 @@ source "fs/Kconfig"
93 source "arch/ia64/Kconfig.debug"
95 +source "kernel/vserver/Kconfig"
97 source "security/Kconfig"
99 source "crypto/Kconfig"
100 diff -NurpP --minimal linux-4.9.217/arch/ia64/kernel/entry.S linux-4.9.217-vs2.3.9.12/arch/ia64/kernel/entry.S
101 --- linux-4.9.217/arch/ia64/kernel/entry.S 2016-12-11 19:17:54.000000000 +0000
102 +++ linux-4.9.217-vs2.3.9.12/arch/ia64/kernel/entry.S 2018-10-20 04:58:12.000000000 +0000
103 @@ -1697,7 +1697,7 @@ sys_call_table:
105 data8 sys_mq_getsetattr
107 - data8 sys_ni_syscall // reserved for vserver
109 data8 sys_waitid // 1270
111 data8 sys_request_key
112 diff -NurpP --minimal linux-4.9.217/arch/ia64/kernel/ptrace.c linux-4.9.217-vs2.3.9.12/arch/ia64/kernel/ptrace.c
113 --- linux-4.9.217/arch/ia64/kernel/ptrace.c 2020-03-27 00:50:42.241102503 +0000
114 +++ linux-4.9.217-vs2.3.9.12/arch/ia64/kernel/ptrace.c 2018-10-20 04:58:12.000000000 +0000
116 #include <linux/regset.h>
117 #include <linux/elf.h>
118 #include <linux/tracehook.h>
119 +#include <linux/vs_base.h>
121 #include <asm/pgtable.h>
122 #include <asm/processor.h>
123 diff -NurpP --minimal linux-4.9.217/arch/ia64/kernel/traps.c linux-4.9.217-vs2.3.9.12/arch/ia64/kernel/traps.c
124 --- linux-4.9.217/arch/ia64/kernel/traps.c 2016-12-11 19:17:54.000000000 +0000
125 +++ linux-4.9.217-vs2.3.9.12/arch/ia64/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
126 @@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
129 if (++die.lock_owner_depth < 3) {
130 - printk("%s[%d]: %s %ld [%d]\n",
131 - current->comm, task_pid_nr(current), str, err, ++die_counter);
132 + printk("%s[%d:#%u]: %s %ld [%d]\n",
133 + current->comm, task_pid_nr(current), current->xid,
134 + str, err, ++die_counter);
135 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
138 @@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
139 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
140 last.time = current_jiffies + 5 * HZ;
142 - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
143 - current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
144 + "%s(%d:#%u): floating-point assist fault at ip %016lx, isr %016lx\n",
145 + current->comm, task_pid_nr(current), current->xid,
146 + regs->cr_iip + ia64_psr(regs)->ri, isr);
150 diff -NurpP --minimal linux-4.9.217/arch/m32r/kernel/traps.c linux-4.9.217-vs2.3.9.12/arch/m32r/kernel/traps.c
151 --- linux-4.9.217/arch/m32r/kernel/traps.c 2016-12-11 19:17:54.000000000 +0000
152 +++ linux-4.9.217-vs2.3.9.12/arch/m32r/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
153 @@ -184,8 +184,9 @@ static void show_registers(struct pt_reg
155 printk("SPI: %08lx\n", sp);
157 - printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
158 - current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
159 + printk("Process %s (pid: %d:#%u, process nr: %d, stackpage=%08lx)",
160 + current->comm, task_pid_nr(current), current->xid,
161 + 0xffff & i, 4096+(unsigned long)current);
164 * When in-kernel, we also print out the stack and code at the
165 diff -NurpP --minimal linux-4.9.217/arch/m68k/Kconfig linux-4.9.217-vs2.3.9.12/arch/m68k/Kconfig
166 --- linux-4.9.217/arch/m68k/Kconfig 2016-12-11 19:17:54.000000000 +0000
167 +++ linux-4.9.217-vs2.3.9.12/arch/m68k/Kconfig 2018-10-20 04:58:12.000000000 +0000
168 @@ -163,6 +163,8 @@ source "fs/Kconfig"
170 source "arch/m68k/Kconfig.debug"
172 +source "kernel/vserver/Kconfig"
174 source "security/Kconfig"
176 source "crypto/Kconfig"
177 diff -NurpP --minimal linux-4.9.217/arch/mips/Kconfig linux-4.9.217-vs2.3.9.12/arch/mips/Kconfig
178 --- linux-4.9.217/arch/mips/Kconfig 2020-03-27 00:50:42.451099201 +0000
179 +++ linux-4.9.217-vs2.3.9.12/arch/mips/Kconfig 2019-12-25 15:37:43.808561393 +0000
180 @@ -3193,6 +3193,8 @@ source "fs/Kconfig"
182 source "arch/mips/Kconfig.debug"
184 +source "kernel/vserver/Kconfig"
186 source "security/Kconfig"
188 source "crypto/Kconfig"
189 diff -NurpP --minimal linux-4.9.217/arch/mips/kernel/ptrace.c linux-4.9.217-vs2.3.9.12/arch/mips/kernel/ptrace.c
190 --- linux-4.9.217/arch/mips/kernel/ptrace.c 2020-03-27 00:50:42.811093539 +0000
191 +++ linux-4.9.217-vs2.3.9.12/arch/mips/kernel/ptrace.c 2018-10-20 05:55:33.000000000 +0000
193 #include <linux/audit.h>
194 #include <linux/seccomp.h>
195 #include <linux/ftrace.h>
196 +#include <linux/vs_base.h>
198 #include <asm/byteorder.h>
200 @@ -798,6 +799,9 @@ long arch_ptrace(struct task_struct *chi
201 void __user *datavp = (void __user *) data;
202 unsigned long __user *datalp = (void __user *) data;
204 + if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
208 /* when I and D space are separate, these will need to be fixed. */
209 case PTRACE_PEEKTEXT: /* read word at location addr. */
210 diff -NurpP --minimal linux-4.9.217/arch/mips/kernel/scall32-o32.S linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall32-o32.S
211 --- linux-4.9.217/arch/mips/kernel/scall32-o32.S 2020-03-27 00:50:42.811093539 +0000
212 +++ linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall32-o32.S 2018-10-20 04:58:12.000000000 +0000
213 @@ -511,7 +511,7 @@ EXPORT(sys_call_table)
214 PTR sys_mq_timedreceive
215 PTR sys_mq_notify /* 4275 */
216 PTR sys_mq_getsetattr
217 - PTR sys_ni_syscall /* sys_vserver */
220 PTR sys_ni_syscall /* available, was setaltroot */
221 PTR sys_add_key /* 4280 */
222 diff -NurpP --minimal linux-4.9.217/arch/mips/kernel/scall64-64.S linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall64-64.S
223 --- linux-4.9.217/arch/mips/kernel/scall64-64.S 2020-03-27 00:50:42.811093539 +0000
224 +++ linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall64-64.S 2018-10-20 04:58:12.000000000 +0000
225 @@ -348,7 +348,7 @@ EXPORT(sys_call_table)
226 PTR sys_mq_timedreceive
228 PTR sys_mq_getsetattr /* 5235 */
229 - PTR sys_ni_syscall /* sys_vserver */
232 PTR sys_ni_syscall /* available, was setaltroot */
234 diff -NurpP --minimal linux-4.9.217/arch/mips/kernel/scall64-n32.S linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall64-n32.S
235 --- linux-4.9.217/arch/mips/kernel/scall64-n32.S 2020-03-27 00:50:42.811093539 +0000
236 +++ linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall64-n32.S 2018-10-20 04:58:12.000000000 +0000
237 @@ -343,7 +343,7 @@ EXPORT(sysn32_call_table)
238 PTR compat_sys_mq_timedreceive
239 PTR compat_sys_mq_notify
240 PTR compat_sys_mq_getsetattr
241 - PTR sys_ni_syscall /* 6240, sys_vserver */
242 + PTR sys32_vserver /* 6240 */
243 PTR compat_sys_waitid
244 PTR sys_ni_syscall /* available, was setaltroot */
246 diff -NurpP --minimal linux-4.9.217/arch/mips/kernel/scall64-o32.S linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall64-o32.S
247 --- linux-4.9.217/arch/mips/kernel/scall64-o32.S 2020-03-27 00:50:42.811093539 +0000
248 +++ linux-4.9.217-vs2.3.9.12/arch/mips/kernel/scall64-o32.S 2019-10-05 14:58:36.920447392 +0000
249 @@ -499,7 +499,7 @@ EXPORT(sys32_call_table)
250 PTR compat_sys_mq_timedreceive
251 PTR compat_sys_mq_notify /* 4275 */
252 PTR compat_sys_mq_getsetattr
253 - PTR sys_ni_syscall /* sys_vserver */
255 PTR compat_sys_waitid
256 PTR sys_ni_syscall /* available, was setaltroot */
257 PTR sys_add_key /* 4280 */
258 diff -NurpP --minimal linux-4.9.217/arch/mips/kernel/traps.c linux-4.9.217-vs2.3.9.12/arch/mips/kernel/traps.c
259 --- linux-4.9.217/arch/mips/kernel/traps.c 2020-03-27 00:50:42.811093539 +0000
260 +++ linux-4.9.217-vs2.3.9.12/arch/mips/kernel/traps.c 2018-10-20 04:58:13.000000000 +0000
261 @@ -361,9 +361,10 @@ void show_registers(struct pt_regs *regs
265 - printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
266 - current->comm, current->pid, current_thread_info(), current,
267 - field, current_thread_info()->tp_value);
268 + printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
269 + current->comm, task_pid_nr(current), current->xid,
270 + current_thread_info(), current,
271 + field, current_thread_info()->tp_value);
272 if (cpu_has_userlocal) {
275 diff -NurpP --minimal linux-4.9.217/arch/parisc/Kconfig linux-4.9.217-vs2.3.9.12/arch/parisc/Kconfig
276 --- linux-4.9.217/arch/parisc/Kconfig 2020-03-27 00:50:43.061089606 +0000
277 +++ linux-4.9.217-vs2.3.9.12/arch/parisc/Kconfig 2018-10-20 05:55:33.000000000 +0000
278 @@ -348,6 +348,8 @@ config SECCOMP
280 If unsure, say Y. Only embedded should say N here.
282 +source "kernel/vserver/Kconfig"
284 source "security/Kconfig"
286 source "crypto/Kconfig"
287 diff -NurpP --minimal linux-4.9.217/arch/parisc/kernel/syscall_table.S linux-4.9.217-vs2.3.9.12/arch/parisc/kernel/syscall_table.S
288 --- linux-4.9.217/arch/parisc/kernel/syscall_table.S 2020-03-27 00:50:43.151088192 +0000
289 +++ linux-4.9.217-vs2.3.9.12/arch/parisc/kernel/syscall_table.S 2018-10-20 04:58:13.000000000 +0000
291 ENTRY_COMP(mbind) /* 260 */
292 ENTRY_COMP(get_mempolicy)
293 ENTRY_COMP(set_mempolicy)
294 - ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
295 + ENTRY_DIFF(vserver)
297 ENTRY_SAME(request_key) /* 265 */
299 diff -NurpP --minimal linux-4.9.217/arch/parisc/kernel/traps.c linux-4.9.217-vs2.3.9.12/arch/parisc/kernel/traps.c
300 --- linux-4.9.217/arch/parisc/kernel/traps.c 2020-03-27 00:50:43.161088037 +0000
301 +++ linux-4.9.217-vs2.3.9.12/arch/parisc/kernel/traps.c 2019-02-22 08:37:49.463155825 +0000
302 @@ -235,8 +235,9 @@ void die_if_kernel(char *str, struct pt_
305 parisc_printk_ratelimited(1, regs,
306 - KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
307 - current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
308 + KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
309 + current->comm, task_pid_nr(current), current->xid,
310 + str, err, regs->iaoq[0]);
314 @@ -266,8 +267,8 @@ void die_if_kernel(char *str, struct pt_
315 pdc_console_restart();
318 - printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
319 - current->comm, task_pid_nr(current), str, err);
320 + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
321 + current->comm, task_pid_nr(current), current->xid, str, err);
323 /* Wot's wrong wif bein' racy? */
324 if (current->thread.flags & PARISC_KERNEL_DEATH) {
325 diff -NurpP --minimal linux-4.9.217/arch/powerpc/include/uapi/asm/unistd.h linux-4.9.217-vs2.3.9.12/arch/powerpc/include/uapi/asm/unistd.h
326 --- linux-4.9.217/arch/powerpc/include/uapi/asm/unistd.h 2016-12-11 19:17:54.000000000 +0000
327 +++ linux-4.9.217-vs2.3.9.12/arch/powerpc/include/uapi/asm/unistd.h 2018-10-20 04:58:13.000000000 +0000
330 #define __NR_rtas 255
331 #define __NR_sys_debug_setcontext 256
332 -/* Number 257 is reserved for vserver */
333 +#define __NR_vserver 257
334 #define __NR_migrate_pages 258
335 #define __NR_mbind 259
336 #define __NR_get_mempolicy 260
337 diff -NurpP --minimal linux-4.9.217/arch/powerpc/Kconfig linux-4.9.217-vs2.3.9.12/arch/powerpc/Kconfig
338 --- linux-4.9.217/arch/powerpc/Kconfig 2020-03-27 00:50:43.171087876 +0000
339 +++ linux-4.9.217-vs2.3.9.12/arch/powerpc/Kconfig 2020-04-01 09:40:17.695646404 +0000
340 @@ -1092,6 +1092,8 @@ source "lib/Kconfig"
342 source "arch/powerpc/Kconfig.debug"
344 +source "kernel/vserver/Kconfig"
346 source "security/Kconfig"
348 source "crypto/Kconfig"
349 diff -NurpP --minimal linux-4.9.217/arch/s390/include/asm/tlb.h linux-4.9.217-vs2.3.9.12/arch/s390/include/asm/tlb.h
350 --- linux-4.9.217/arch/s390/include/asm/tlb.h 2016-12-11 19:17:54.000000000 +0000
351 +++ linux-4.9.217-vs2.3.9.12/arch/s390/include/asm/tlb.h 2018-10-20 04:58:13.000000000 +0000
353 #include <linux/mm.h>
354 #include <linux/pagemap.h>
355 #include <linux/swap.h>
357 #include <asm/processor.h>
358 #include <asm/pgalloc.h>
359 #include <asm/tlbflush.h>
360 diff -NurpP --minimal linux-4.9.217/arch/s390/include/uapi/asm/unistd.h linux-4.9.217-vs2.3.9.12/arch/s390/include/uapi/asm/unistd.h
361 --- linux-4.9.217/arch/s390/include/uapi/asm/unistd.h 2016-12-11 19:17:54.000000000 +0000
362 +++ linux-4.9.217-vs2.3.9.12/arch/s390/include/uapi/asm/unistd.h 2018-10-20 04:58:13.000000000 +0000
364 #define __NR_clock_gettime 260
365 #define __NR_clock_getres 261
366 #define __NR_clock_nanosleep 262
367 -/* Number 263 is reserved for vserver */
368 +#define __NR_vserver 263
369 #define __NR_statfs64 265
370 #define __NR_fstatfs64 266
371 #define __NR_remap_file_pages 267
372 diff -NurpP --minimal linux-4.9.217/arch/s390/Kconfig linux-4.9.217-vs2.3.9.12/arch/s390/Kconfig
373 --- linux-4.9.217/arch/s390/Kconfig 2020-03-27 00:50:44.061073883 +0000
374 +++ linux-4.9.217-vs2.3.9.12/arch/s390/Kconfig 2018-10-20 04:58:13.000000000 +0000
375 @@ -775,6 +775,8 @@ source "fs/Kconfig"
377 source "arch/s390/Kconfig.debug"
379 +source "kernel/vserver/Kconfig"
381 source "security/Kconfig"
383 source "crypto/Kconfig"
384 diff -NurpP --minimal linux-4.9.217/arch/s390/kernel/ptrace.c linux-4.9.217-vs2.3.9.12/arch/s390/kernel/ptrace.c
385 --- linux-4.9.217/arch/s390/kernel/ptrace.c 2020-03-27 00:50:44.171072152 +0000
386 +++ linux-4.9.217-vs2.3.9.12/arch/s390/kernel/ptrace.c 2018-10-20 04:58:13.000000000 +0000
388 #include <linux/tracehook.h>
389 #include <linux/seccomp.h>
390 #include <linux/compat.h>
391 +#include <linux/vs_base.h>
392 #include <trace/syscall.h>
393 #include <asm/segment.h>
394 #include <asm/page.h>
395 diff -NurpP --minimal linux-4.9.217/arch/s390/kernel/syscalls.S linux-4.9.217-vs2.3.9.12/arch/s390/kernel/syscalls.S
396 --- linux-4.9.217/arch/s390/kernel/syscalls.S 2020-03-27 00:50:44.171072152 +0000
397 +++ linux-4.9.217-vs2.3.9.12/arch/s390/kernel/syscalls.S 2018-10-20 04:58:13.000000000 +0000
398 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,compat_sys_clo
399 SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
400 SYSCALL(sys_clock_getres,compat_sys_clock_getres)
401 SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
402 -NI_SYSCALL /* reserved for vserver */
403 +SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
404 SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
405 SYSCALL(sys_statfs64,compat_sys_statfs64)
406 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
407 diff -NurpP --minimal linux-4.9.217/arch/sh/Kconfig linux-4.9.217-vs2.3.9.12/arch/sh/Kconfig
408 --- linux-4.9.217/arch/sh/Kconfig 2016-12-11 19:17:54.000000000 +0000
409 +++ linux-4.9.217-vs2.3.9.12/arch/sh/Kconfig 2018-10-20 04:58:13.000000000 +0000
410 @@ -904,6 +904,8 @@ source "fs/Kconfig"
412 source "arch/sh/Kconfig.debug"
414 +source "kernel/vserver/Kconfig"
416 source "security/Kconfig"
418 source "crypto/Kconfig"
419 diff -NurpP --minimal linux-4.9.217/arch/sh/kernel/irq.c linux-4.9.217-vs2.3.9.12/arch/sh/kernel/irq.c
420 --- linux-4.9.217/arch/sh/kernel/irq.c 2016-12-11 19:17:54.000000000 +0000
421 +++ linux-4.9.217-vs2.3.9.12/arch/sh/kernel/irq.c 2018-10-20 04:58:13.000000000 +0000
423 #include <linux/ftrace.h>
424 #include <linux/delay.h>
425 #include <linux/ratelimit.h>
426 +// #include <linux/vs_context.h>
427 #include <asm/processor.h>
428 #include <asm/machvec.h>
429 #include <asm/uaccess.h>
430 diff -NurpP --minimal linux-4.9.217/arch/sparc/include/uapi/asm/unistd.h linux-4.9.217-vs2.3.9.12/arch/sparc/include/uapi/asm/unistd.h
431 --- linux-4.9.217/arch/sparc/include/uapi/asm/unistd.h 2016-12-11 19:17:54.000000000 +0000
432 +++ linux-4.9.217-vs2.3.9.12/arch/sparc/include/uapi/asm/unistd.h 2018-10-20 04:58:13.000000000 +0000
434 #define __NR_timer_getoverrun 264
435 #define __NR_timer_delete 265
436 #define __NR_timer_create 266
437 -/* #define __NR_vserver 267 Reserved for VSERVER */
438 +#define __NR_vserver 267
439 #define __NR_io_setup 268
440 #define __NR_io_destroy 269
441 #define __NR_io_submit 270
442 diff -NurpP --minimal linux-4.9.217/arch/sparc/Kconfig linux-4.9.217-vs2.3.9.12/arch/sparc/Kconfig
443 --- linux-4.9.217/arch/sparc/Kconfig 2020-03-27 00:50:44.741063185 +0000
444 +++ linux-4.9.217-vs2.3.9.12/arch/sparc/Kconfig 2019-02-22 08:37:49.523154806 +0000
445 @@ -584,6 +584,8 @@ source "fs/Kconfig"
447 source "arch/sparc/Kconfig.debug"
449 +source "kernel/vserver/Kconfig"
451 source "security/Kconfig"
453 source "crypto/Kconfig"
454 diff -NurpP --minimal linux-4.9.217/arch/sparc/kernel/systbls_32.S linux-4.9.217-vs2.3.9.12/arch/sparc/kernel/systbls_32.S
455 --- linux-4.9.217/arch/sparc/kernel/systbls_32.S 2016-12-11 19:17:54.000000000 +0000
456 +++ linux-4.9.217-vs2.3.9.12/arch/sparc/kernel/systbls_32.S 2018-10-20 04:58:13.000000000 +0000
457 @@ -70,7 +70,7 @@ sys_call_table:
458 /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
459 /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
460 /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
461 -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
462 +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
463 /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
464 /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
465 /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
466 diff -NurpP --minimal linux-4.9.217/arch/sparc/kernel/systbls_64.S linux-4.9.217-vs2.3.9.12/arch/sparc/kernel/systbls_64.S
467 --- linux-4.9.217/arch/sparc/kernel/systbls_64.S 2016-12-11 19:17:54.000000000 +0000
468 +++ linux-4.9.217-vs2.3.9.12/arch/sparc/kernel/systbls_64.S 2018-10-20 04:58:13.000000000 +0000
469 @@ -71,7 +71,7 @@ sys_call_table32:
470 /*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
471 .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
472 /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
473 - .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
474 + .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
475 /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
476 .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
477 /*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
478 @@ -152,7 +152,7 @@ sys_call_table:
479 /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
480 .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
481 /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
482 - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
483 + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
484 /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
485 .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
486 /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
487 diff -NurpP --minimal linux-4.9.217/arch/um/Kconfig.rest linux-4.9.217-vs2.3.9.12/arch/um/Kconfig.rest
488 --- linux-4.9.217/arch/um/Kconfig.rest 2016-12-11 19:17:54.000000000 +0000
489 +++ linux-4.9.217-vs2.3.9.12/arch/um/Kconfig.rest 2018-10-20 04:58:13.000000000 +0000
490 @@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
494 +source "kernel/vserver/Kconfig"
496 source "security/Kconfig"
498 source "crypto/Kconfig"
499 diff -NurpP --minimal linux-4.9.217/arch/x86/entry/syscalls/syscall_32.tbl linux-4.9.217-vs2.3.9.12/arch/x86/entry/syscalls/syscall_32.tbl
500 --- linux-4.9.217/arch/x86/entry/syscalls/syscall_32.tbl 2016-12-11 19:17:54.000000000 +0000
501 +++ linux-4.9.217-vs2.3.9.12/arch/x86/entry/syscalls/syscall_32.tbl 2018-10-20 04:58:13.000000000 +0000
503 270 i386 tgkill sys_tgkill
504 271 i386 utimes sys_utimes compat_sys_utimes
505 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
507 +273 i386 vserver sys_vserver sys32_vserver
508 274 i386 mbind sys_mbind
509 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
510 276 i386 set_mempolicy sys_set_mempolicy
511 diff -NurpP --minimal linux-4.9.217/arch/x86/entry/syscalls/syscall_64.tbl linux-4.9.217-vs2.3.9.12/arch/x86/entry/syscalls/syscall_64.tbl
512 --- linux-4.9.217/arch/x86/entry/syscalls/syscall_64.tbl 2016-12-11 19:17:54.000000000 +0000
513 +++ linux-4.9.217-vs2.3.9.12/arch/x86/entry/syscalls/syscall_64.tbl 2018-10-20 04:58:13.000000000 +0000
515 233 common epoll_ctl sys_epoll_ctl
516 234 common tgkill sys_tgkill
517 235 common utimes sys_utimes
519 +236 64 vserver sys_vserver
520 237 common mbind sys_mbind
521 238 common set_mempolicy sys_set_mempolicy
522 239 common get_mempolicy sys_get_mempolicy
523 diff -NurpP --minimal linux-4.9.217/arch/x86/Kconfig linux-4.9.217-vs2.3.9.12/arch/x86/Kconfig
524 --- linux-4.9.217/arch/x86/Kconfig 2020-03-27 00:50:45.651048876 +0000
525 +++ linux-4.9.217-vs2.3.9.12/arch/x86/Kconfig 2019-12-25 15:37:44.298553484 +0000
526 @@ -2810,6 +2810,8 @@ source "fs/Kconfig"
528 source "arch/x86/Kconfig.debug"
530 +source "kernel/vserver/Kconfig"
532 source "security/Kconfig"
534 source "crypto/Kconfig"
535 diff -NurpP --minimal linux-4.9.217/block/ioprio.c linux-4.9.217-vs2.3.9.12/block/ioprio.c
536 --- linux-4.9.217/block/ioprio.c 2016-12-11 19:17:54.000000000 +0000
537 +++ linux-4.9.217-vs2.3.9.12/block/ioprio.c 2018-10-20 04:58:13.000000000 +0000
539 #include <linux/syscalls.h>
540 #include <linux/security.h>
541 #include <linux/pid_namespace.h>
542 +#include <linux/vs_base.h>
544 int set_task_ioprio(struct task_struct *task, int ioprio)
546 @@ -105,6 +106,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which,
548 pgrp = find_vpid(who);
549 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
550 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
552 ret = set_task_ioprio(p, ioprio);
555 @@ -203,6 +206,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which,
557 pgrp = find_vpid(who);
558 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
559 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
561 tmpio = get_task_ioprio(p);
564 diff -NurpP --minimal linux-4.9.217/Documentation/vserver/debug.txt linux-4.9.217-vs2.3.9.12/Documentation/vserver/debug.txt
565 --- linux-4.9.217/Documentation/vserver/debug.txt 1970-01-01 00:00:00.000000000 +0000
566 +++ linux-4.9.217-vs2.3.9.12/Documentation/vserver/debug.txt 2018-10-20 04:58:12.000000000 +0000
571 + 2 4 "vx_map_tgid: %p/%llx: %d -> %d"
572 + "vx_rmap_tgid: %p/%llx: %d -> %d"
576 + 0 1 "ALLOC (%p,#%d)%c inode (%d)"
577 + "FREE (%p,#%d)%c inode"
578 + 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
579 + "FREE (%p,#%d)%c %lld bytes"
580 + 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
581 + 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
582 + "ext3_has_free_blocks(%p): free=%lu, root=%lu"
583 + "rcu_free_dl_info(%p)"
584 + 4 10 "alloc_dl_info(%p,%d) = %p"
585 + "dealloc_dl_info(%p)"
586 + "get_dl_info(%p[#%d.%d])"
587 + "put_dl_info(%p[#%d.%d])"
588 + 5 20 "alloc_dl_info(%p,%d)*"
589 + 6 40 "__hash_dl_info: %p[#%d]"
590 + "__unhash_dl_info: %p[#%d]"
591 + 7 80 "locate_dl_info(%p,#%d) = %p"
595 + 0 1 "destroy_dqhash: %p [#0x%08x] c=%d"
596 + "new_dqhash: %p [#0x%08x]"
597 + "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
598 + "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
599 + "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
600 + "vroot_get_real_bdev not set"
601 + 1 2 "cow_break_link(?%s?)"
603 + 2 4 "dentry_open(new): %p"
604 + "dentry_open(old): %p"
605 + "lookup_create(new): %p"
607 + "path_lookup(old): %d"
608 + "vfs_create(new): %d"
611 + 3 8 "fput(new_file=%p[#%d])"
612 + "fput(old_file=%p[#%d])"
613 + 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d"
614 + "vx_info_kill(%p[#%d],%d,%d)*"
615 + 5 20 "vs_reboot(%p[#%d],%d)"
616 + 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
620 + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
621 + 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
622 + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
623 + 4 10 "ip_route_connect(%p) %p,%p;%lx"
624 + 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
625 + 6 40 "sk,egf: %p [#%d] (from %d)"
626 + "sk,egn: %p [#%d] (from %d)"
627 + "sk,req: %p [#%d] (from %d)"
628 + "sk: %p [#%d] (from %d)"
629 + "tw: %p [#%d] (from %d)"
630 + 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
631 + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
635 + 0 1 "__lookup_nx_info(#%u): %p[#%u]"
636 + "alloc_nx_info(%d) = %p"
637 + "create_nx_info(%d) (dynamic rejected)"
638 + "create_nx_info(%d) = %p (already there)"
639 + "create_nx_info(%d) = %p (new)"
640 + "dealloc_nx_info(%p)"
641 + 1 2 "alloc_nx_info(%d)*"
642 + "create_nx_info(%d)*"
643 + 2 4 "get_nx_info(%p[#%d.%d])"
644 + "put_nx_info(%p[#%d.%d])"
645 + 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p"
646 + "clr_nx_info(%p[#%d.%d])"
647 + "init_nx_info(%p[#%d.%d])"
648 + "release_nx_info(%p[#%d.%d.%d]) %p"
649 + "set_nx_info(%p[#%d.%d])"
650 + 4 10 "__hash_nx_info: %p[#%d]"
651 + "__nx_dynamic_id: [#%d]"
652 + "__unhash_nx_info: %p[#%d.%d.%d]"
653 + 5 20 "moved task %p into nxi:%p[#%d]"
654 + "nx_migrate_task(%p,%p[#%d.%d.%d])"
655 + "task_get_nx_info(%p)"
656 + 6 40 "nx_clear_persistent(%p[#%d])"
660 + 0 1 "quota_sync_dqh(%p,%d) discard inode %p"
661 + 1 2 "quota_sync_dqh(%p,%d)"
662 + "sync_dquots(%p,%d)"
663 + "sync_dquots_dqh(%p,%d)"
664 + 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
668 + 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
669 + 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
670 + 4 10 "%s: (%s %s) returned %s with %d"
674 + 7 80 "dx_parse_tag(?%s?): %d:#%d"
675 + "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
679 + 0 1 "__lookup_vx_info(#%u): %p[#%u]"
680 + "alloc_vx_info(%d) = %p"
681 + "alloc_vx_info(%d)*"
682 + "create_vx_info(%d) (dynamic rejected)"
683 + "create_vx_info(%d) = %p (already there)"
684 + "create_vx_info(%d) = %p (new)"
685 + "dealloc_vx_info(%p)"
686 + "loc_vx_info(%d) = %p (found)"
687 + "loc_vx_info(%d) = %p (new)"
688 + "loc_vx_info(%d) = %p (not available)"
689 + 1 2 "create_vx_info(%d)*"
691 + 2 4 "get_vx_info(%p[#%d.%d])"
692 + "put_vx_info(%p[#%d.%d])"
693 + 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p"
694 + "clr_vx_info(%p[#%d.%d])"
695 + "init_vx_info(%p[#%d.%d])"
696 + "release_vx_info(%p[#%d.%d.%d]) %p"
697 + "set_vx_info(%p[#%d.%d])"
698 + 4 10 "__hash_vx_info: %p[#%d]"
699 + "__unhash_vx_info: %p[#%d.%d.%d]"
700 + "__vx_dynamic_id: [#%d]"
701 + 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
702 + "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
703 + "moved task %p into vxi:%p[#%d]"
704 + "task_get_vx_info(%p)"
705 + "vx_migrate_task(%p,%p[#%d.%d])"
706 + 6 40 "vx_clear_persistent(%p[#%d])"
707 + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
708 + "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
709 + "vx_set_persistent(%p[#%d])"
710 + "vx_set_reaper(%p[#%d],%p[#%d,%d])"
711 + 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
716 + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
717 + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
719 + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
720 + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
721 + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
722 diff -NurpP --minimal linux-4.9.217/drivers/block/Kconfig linux-4.9.217-vs2.3.9.12/drivers/block/Kconfig
723 --- linux-4.9.217/drivers/block/Kconfig 2016-12-11 19:17:54.000000000 +0000
724 +++ linux-4.9.217-vs2.3.9.12/drivers/block/Kconfig 2018-10-20 04:58:13.000000000 +0000
725 @@ -273,6 +273,13 @@ config BLK_DEV_CRYPTOLOOP
727 source "drivers/block/drbd/Kconfig"
729 +config BLK_DEV_VROOT
730 + tristate "Virtual Root device support"
731 + depends on QUOTACTL
733 + Saying Y here will allow you to use quota/fs ioctls on a shared
734 + partition within a virtual server without compromising security.
737 tristate "Network block device support"
739 diff -NurpP --minimal linux-4.9.217/drivers/block/loop.c linux-4.9.217-vs2.3.9.12/drivers/block/loop.c
740 --- linux-4.9.217/drivers/block/loop.c 2020-03-27 00:50:48.141009707 +0000
741 +++ linux-4.9.217-vs2.3.9.12/drivers/block/loop.c 2019-12-25 15:37:45.418535409 +0000
743 #include <linux/miscdevice.h>
744 #include <linux/falloc.h>
745 #include <linux/uio.h>
746 +#include <linux/vs_context.h>
749 #include <asm/uaccess.h>
750 @@ -936,6 +937,7 @@ static int loop_set_fd(struct loop_devic
751 lo->lo_blocksize = lo_blocksize;
752 lo->lo_device = bdev;
753 lo->lo_flags = lo_flags;
754 + lo->lo_xid = vx_current_xid();
755 lo->lo_backing_file = file;
758 @@ -1056,6 +1058,7 @@ static int loop_clr_fd(struct loop_devic
760 lo->lo_sizelimit = 0;
761 lo->lo_encrypt_key_size = 0;
763 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
764 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
765 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
766 @@ -1102,7 +1105,7 @@ loop_set_status(struct loop_device *lo,
768 if (lo->lo_encrypt_key_size &&
769 !uid_eq(lo->lo_key_owner, uid) &&
770 - !capable(CAP_SYS_ADMIN))
771 + !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
773 if (lo->lo_state != Lo_bound)
775 @@ -1207,7 +1210,8 @@ loop_get_status(struct loop_device *lo,
776 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
777 info->lo_encrypt_type =
778 lo->lo_encryption ? lo->lo_encryption->number : 0;
779 - if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
780 + if (lo->lo_encrypt_key_size &&
781 + vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
782 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
783 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
784 lo->lo_encrypt_key_size);
785 @@ -1569,6 +1573,11 @@ static int lo_open(struct block_device *
789 + if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
794 atomic_inc(&lo->lo_refcnt);
796 mutex_unlock(&loop_index_mutex);
797 diff -NurpP --minimal linux-4.9.217/drivers/block/loop.h linux-4.9.217-vs2.3.9.12/drivers/block/loop.h
798 --- linux-4.9.217/drivers/block/loop.h 2020-03-27 00:50:48.151009551 +0000
799 +++ linux-4.9.217-vs2.3.9.12/drivers/block/loop.h 2019-10-05 14:58:39.110412393 +0000
800 @@ -43,6 +43,7 @@ struct loop_device {
801 struct loop_func_table *lo_encryption;
803 kuid_t lo_key_owner; /* Who set the key */
805 int (*ioctl)(struct loop_device *, int cmd,
808 diff -NurpP --minimal linux-4.9.217/drivers/block/Makefile linux-4.9.217-vs2.3.9.12/drivers/block/Makefile
809 --- linux-4.9.217/drivers/block/Makefile 2016-12-11 19:17:54.000000000 +0000
810 +++ linux-4.9.217-vs2.3.9.12/drivers/block/Makefile 2018-10-20 04:58:13.000000000 +0000
811 @@ -31,6 +31,7 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
813 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
814 obj-$(CONFIG_BLK_DEV_HD) += hd.o
815 +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o
817 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
818 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
819 diff -NurpP --minimal linux-4.9.217/drivers/block/vroot.c linux-4.9.217-vs2.3.9.12/drivers/block/vroot.c
820 --- linux-4.9.217/drivers/block/vroot.c 1970-01-01 00:00:00.000000000 +0000
821 +++ linux-4.9.217-vs2.3.9.12/drivers/block/vroot.c 2018-10-20 04:58:13.000000000 +0000
824 + * linux/drivers/block/vroot.c
826 + * written by Herbert P?tzl, 9/11/2002
827 + * ported to 2.6.10 by Herbert P?tzl, 30/12/2004
829 + * based on the loop.c code by Theodore Ts'o.
831 + * Copyright (C) 2002-2007 by Herbert P?tzl.
832 + * Redistribution of this file is permitted under the
833 + * GNU General Public License.
837 +#include <linux/module.h>
838 +#include <linux/moduleparam.h>
839 +#include <linux/file.h>
840 +#include <linux/major.h>
841 +#include <linux/blkdev.h>
842 +#include <linux/slab.h>
844 +#include <linux/vroot.h>
845 +#include <linux/vs_context.h>
848 +static int max_vroot = 8;
850 +static struct vroot_device *vroot_dev;
851 +static struct gendisk **disks;
854 +static int vroot_set_dev(
855 + struct vroot_device *vr,
856 + struct block_device *bdev,
859 + struct block_device *real_bdev;
861 + struct inode *inode;
865 + if (vr->vr_state != Vr_unbound)
874 + inode = file->f_path.dentry->d_inode;
877 + if (S_ISBLK(inode->i_mode)) {
878 + real_bdev = inode->i_bdev;
879 + vr->vr_device = real_bdev;
880 + __iget(real_bdev->bd_inode);
884 + vxdprintk(VXD_CBIT(misc, 0),
885 + "vroot[%d]_set_dev: dev=" VXF_DEV,
886 + vr->vr_number, VXD_DEV(real_bdev));
888 + vr->vr_state = Vr_bound;
897 +static int vroot_clr_dev(
898 + struct vroot_device *vr,
899 + struct block_device *bdev)
901 + struct block_device *real_bdev;
903 + if (vr->vr_state != Vr_bound)
905 + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
908 + real_bdev = vr->vr_device;
910 + vxdprintk(VXD_CBIT(misc, 0),
911 + "vroot[%d]_clr_dev: dev=" VXF_DEV,
912 + vr->vr_number, VXD_DEV(real_bdev));
915 + vr->vr_state = Vr_unbound;
916 + vr->vr_device = NULL;
921 +static int vr_ioctl(struct block_device *bdev, fmode_t mode,
922 + unsigned int cmd, unsigned long arg)
924 + struct vroot_device *vr = bdev->bd_disk->private_data;
927 + down(&vr->vr_ctl_mutex);
929 + case VROOT_SET_DEV:
930 + err = vroot_set_dev(vr, bdev, arg);
932 + case VROOT_CLR_DEV:
933 + err = vroot_clr_dev(vr, bdev);
939 + up(&vr->vr_ctl_mutex);
943 +static int vr_open(struct block_device *bdev, fmode_t mode)
945 + struct vroot_device *vr = bdev->bd_disk->private_data;
947 + down(&vr->vr_ctl_mutex);
949 + up(&vr->vr_ctl_mutex);
953 +static void vr_release(struct gendisk *disk, fmode_t mode)
955 + struct vroot_device *vr = disk->private_data;
957 + down(&vr->vr_ctl_mutex);
959 + up(&vr->vr_ctl_mutex);
962 +static struct block_device_operations vr_fops = {
963 + .owner = THIS_MODULE,
965 + .release = vr_release,
969 +static blk_qc_t vroot_make_request(struct request_queue *q, struct bio *bio)
971 + printk("vroot_make_request %p, %p\n", q, bio);
973 + return BLK_QC_T_NONE;
976 +struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
978 + struct inode *inode = bdev->bd_inode;
979 + struct vroot_device *vr;
980 + struct block_device *real_bdev;
981 + int minor = iminor(inode);
983 + vr = &vroot_dev[minor];
984 + real_bdev = vr->vr_device;
986 + vxdprintk(VXD_CBIT(misc, 0),
987 + "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
988 + vr->vr_number, VXD_DEV(real_bdev));
990 + if (vr->vr_state != Vr_bound)
991 + return ERR_PTR(-ENXIO);
993 + __iget(real_bdev->bd_inode);
1000 + * And now the modules code and kernel interface.
1003 +module_param(max_vroot, int, 0);
1005 +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1006 +MODULE_LICENSE("GPL");
1007 +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1009 +MODULE_AUTHOR ("Herbert P?tzl");
1010 +MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1013 +int __init vroot_init(void)
1017 + if (max_vroot < 1 || max_vroot > 256) {
1018 + max_vroot = MAX_VROOT_DEFAULT;
1019 + printk(KERN_WARNING "vroot: invalid max_vroot "
1020 + "(must be between 1 and 256), "
1021 + "using default (%d)\n", max_vroot);
1024 + if (register_blkdev(VROOT_MAJOR, "vroot"))
1028 + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1031 + memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1033 + disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1037 + for (i = 0; i < max_vroot; i++) {
1038 + disks[i] = alloc_disk(1);
1041 + disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1042 + if (!disks[i]->queue)
1044 + blk_queue_make_request(disks[i]->queue, vroot_make_request);
1047 + for (i = 0; i < max_vroot; i++) {
1048 + struct vroot_device *vr = &vroot_dev[i];
1049 + struct gendisk *disk = disks[i];
1051 + memset(vr, 0, sizeof(*vr));
1052 + sema_init(&vr->vr_ctl_mutex, 1);
1053 + vr->vr_number = i;
1054 + disk->major = VROOT_MAJOR;
1055 + disk->first_minor = i;
1056 + disk->fops = &vr_fops;
1057 + sprintf(disk->disk_name, "vroot%d", i);
1058 + disk->private_data = vr;
1061 + err = register_vroot_grb(&__vroot_get_real_bdev);
1065 + for (i = 0; i < max_vroot; i++)
1066 + add_disk(disks[i]);
1067 + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1072 + put_disk(disks[i]);
1077 + unregister_blkdev(VROOT_MAJOR, "vroot");
1078 + printk(KERN_ERR "vroot: ran out of memory\n");
1082 +void vroot_exit(void)
1086 + if (unregister_vroot_grb(&__vroot_get_real_bdev))
1087 + printk(KERN_WARNING "vroot: cannot unregister grb\n");
1089 + for (i = 0; i < max_vroot; i++) {
1090 + del_gendisk(disks[i]);
1091 + put_disk(disks[i]);
1093 + unregister_blkdev(VROOT_MAJOR, "vroot");
1099 +module_init(vroot_init);
1100 +module_exit(vroot_exit);
1104 +static int __init max_vroot_setup(char *str)
1106 + max_vroot = simple_strtol(str, NULL, 0);
1110 +__setup("max_vroot=", max_vroot_setup);
1114 diff -NurpP --minimal linux-4.9.217/drivers/md/dm.c linux-4.9.217-vs2.3.9.12/drivers/md/dm.c
1115 --- linux-4.9.217/drivers/md/dm.c 2020-03-27 00:50:57.810857303 +0000
1116 +++ linux-4.9.217-vs2.3.9.12/drivers/md/dm.c 2020-04-01 09:40:21.845577638 +0000
1118 #include <linux/wait.h>
1119 #include <linux/pr.h>
1120 #include <linux/vmalloc.h>
1121 +#include <linux/vs_base.h>
1123 #define DM_MSG_PREFIX "core"
1125 @@ -300,6 +301,7 @@ int dm_deleting_md(struct mapped_device
1126 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1128 struct mapped_device *md;
1131 spin_lock(&_minor_lock);
1133 @@ -308,17 +310,19 @@ static int dm_blk_open(struct block_devi
1136 if (test_bit(DMF_FREEING, &md->flags) ||
1137 - dm_deleting_md(md)) {
1139 + dm_deleting_md(md))
1143 + if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1148 atomic_inc(&md->open_count);
1151 spin_unlock(&_minor_lock);
1153 - return md ? 0 : -ENXIO;
1157 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
1158 @@ -744,6 +748,14 @@ int dm_set_geometry(struct mapped_device
1163 + * Get the xid associated with a dm device
1165 +vxid_t dm_get_xid(struct mapped_device *md)
1170 /*-----------------------------------------------------------------
1172 * A more elegant soln is in the works that uses the queue
1173 @@ -1549,6 +1561,7 @@ static struct mapped_device *alloc_dev(i
1174 INIT_LIST_HEAD(&md->uevent_list);
1175 INIT_LIST_HEAD(&md->table_devices);
1176 spin_lock_init(&md->uevent_lock);
1177 + md->xid = vx_current_xid();
1179 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
1181 diff -NurpP --minimal linux-4.9.217/drivers/md/dm-core.h linux-4.9.217-vs2.3.9.12/drivers/md/dm-core.h
1182 --- linux-4.9.217/drivers/md/dm-core.h 2020-03-27 00:50:57.740858403 +0000
1183 +++ linux-4.9.217-vs2.3.9.12/drivers/md/dm-core.h 2018-10-20 04:58:13.000000000 +0000
1184 @@ -52,6 +52,7 @@ struct mapped_device {
1187 atomic_t open_count;
1190 struct dm_target *immutable_target;
1191 struct target_type *immutable_target_type;
1192 diff -NurpP --minimal linux-4.9.217/drivers/md/dm.h linux-4.9.217-vs2.3.9.12/drivers/md/dm.h
1193 --- linux-4.9.217/drivers/md/dm.h 2016-12-11 19:17:54.000000000 +0000
1194 +++ linux-4.9.217-vs2.3.9.12/drivers/md/dm.h 2018-10-20 04:58:13.000000000 +0000
1195 @@ -45,6 +45,8 @@ struct dm_dev_internal {
1197 struct dm_md_mempools;
1199 +vxid_t dm_get_xid(struct mapped_device *md);
1201 /*-----------------------------------------------------------------
1202 * Internal table functions.
1203 *---------------------------------------------------------------*/
1204 diff -NurpP --minimal linux-4.9.217/drivers/md/dm-ioctl.c linux-4.9.217-vs2.3.9.12/drivers/md/dm-ioctl.c
1205 --- linux-4.9.217/drivers/md/dm-ioctl.c 2020-03-27 00:50:57.760858091 +0000
1206 +++ linux-4.9.217-vs2.3.9.12/drivers/md/dm-ioctl.c 2019-02-22 08:37:51.173126752 +0000
1208 #include <linux/dm-ioctl.h>
1209 #include <linux/hdreg.h>
1210 #include <linux/compat.h>
1211 +#include <linux/vs_context.h>
1213 #include <asm/uaccess.h>
1215 @@ -114,7 +115,8 @@ static struct hash_cell *__get_name_cell
1216 unsigned int h = hash_str(str);
1218 list_for_each_entry (hc, _name_buckets + h, name_list)
1219 - if (!strcmp(hc->name, str)) {
1220 + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1221 + !strcmp(hc->name, str)) {
1225 @@ -128,7 +130,8 @@ static struct hash_cell *__get_uuid_cell
1226 unsigned int h = hash_str(str);
1228 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1229 - if (!strcmp(hc->uuid, str)) {
1230 + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1231 + !strcmp(hc->uuid, str)) {
1235 @@ -139,13 +142,15 @@ static struct hash_cell *__get_uuid_cell
1236 static struct hash_cell *__get_dev_cell(uint64_t dev)
1238 struct mapped_device *md;
1239 - struct hash_cell *hc;
1240 + struct hash_cell *hc = NULL;
1242 md = dm_get_md(huge_decode_dev(dev));
1246 - hc = dm_get_mdptr(md);
1247 + if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1248 + hc = dm_get_mdptr(md);
1253 @@ -467,6 +472,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl
1255 static int remove_all(struct dm_ioctl *param, size_t param_size)
1257 + if (!vx_check(0, VS_ADMIN))
1260 dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
1261 param->data_size = 0;
1263 @@ -514,6 +522,8 @@ static int list_devices(struct dm_ioctl
1265 for (i = 0; i < NUM_BUCKETS; i++) {
1266 list_for_each_entry (hc, _name_buckets + i, name_list) {
1267 + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1269 needed += sizeof(struct dm_name_list);
1270 needed += strlen(hc->name) + 1;
1271 needed += ALIGN_MASK;
1272 @@ -537,6 +547,8 @@ static int list_devices(struct dm_ioctl
1274 for (i = 0; i < NUM_BUCKETS; i++) {
1275 list_for_each_entry (hc, _name_buckets + i, name_list) {
1276 + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1279 old_nl->next = (uint32_t) ((void *) nl -
1281 @@ -1799,8 +1811,8 @@ static int ctl_ioctl(uint command, struc
1282 size_t input_param_size;
1283 struct dm_ioctl param_kernel;
1285 - /* only root can play with this */
1286 - if (!capable(CAP_SYS_ADMIN))
1287 + /* only root and certain contexts can play with this */
1288 + if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1291 if (_IOC_TYPE(command) != DM_IOCTL)
1292 diff -NurpP --minimal linux-4.9.217/drivers/net/tun.c linux-4.9.217-vs2.3.9.12/drivers/net/tun.c
1293 --- linux-4.9.217/drivers/net/tun.c 2020-03-27 00:51:04.880745870 +0000
1294 +++ linux-4.9.217-vs2.3.9.12/drivers/net/tun.c 2020-04-01 09:40:23.915543339 +0000
1296 #include <linux/nsproxy.h>
1297 #include <linux/virtio_net.h>
1298 #include <linux/rcupdate.h>
1299 +#include <linux/vs_network.h>
1300 #include <net/net_namespace.h>
1301 #include <net/netns/generic.h>
1302 #include <net/rtnetlink.h>
1303 @@ -194,6 +195,7 @@ struct tun_struct {
1309 struct net_device *dev;
1310 netdev_features_t set_features;
1311 @@ -490,6 +492,7 @@ static inline bool tun_not_capable(struc
1312 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
1313 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
1314 !ns_capable(net->user_ns, CAP_NET_ADMIN);
1315 + /* !cap_raised(current_cap(), CAP_NET_ADMIN) */
1318 static void tun_set_real_num_queues(struct tun_struct *tun)
1319 @@ -1569,6 +1572,7 @@ static void tun_setup(struct net_device
1321 tun->owner = INVALID_UID;
1322 tun->group = INVALID_GID;
1323 + tun->nid = nx_current_nid();
1325 dev->ethtool_ops = &tun_ethtool_ops;
1326 dev->destructor = tun_free_netdev;
1327 @@ -1782,7 +1786,7 @@ static int tun_set_iff(struct net *net,
1328 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
1331 - if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1332 + if (!nx_ns_capable(net->user_ns, CAP_NET_ADMIN, NXC_TUN_CREATE))
1334 err = security_tun_dev_create();
1336 @@ -2151,6 +2155,16 @@ static long __tun_chr_ioctl(struct file
1337 from_kgid(&init_user_ns, tun->group));
1341 + if (!capable(CAP_CONTEXT))
1344 + /* Set nid owner of the device */
1345 + tun->nid = (vnid_t) arg;
1347 + tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
1351 /* Only allow setting the type when the interface is down */
1352 if (tun->dev->flags & IFF_UP) {
1353 diff -NurpP --minimal linux-4.9.217/drivers/scsi/cxgbi/libcxgbi.c linux-4.9.217-vs2.3.9.12/drivers/scsi/cxgbi/libcxgbi.c
1354 --- linux-4.9.217/drivers/scsi/cxgbi/libcxgbi.c 2020-03-27 00:51:09.110679202 +0000
1355 +++ linux-4.9.217-vs2.3.9.12/drivers/scsi/cxgbi/libcxgbi.c 2020-04-01 09:40:26.325503408 +0000
1356 @@ -777,7 +777,8 @@ static struct cxgbi_sock *cxgbi_check_ro
1357 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
1359 err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
1360 - &daddr6->sin6_addr, 0, &pref_saddr);
1361 + &daddr6->sin6_addr, 0, &pref_saddr,
1364 pr_info("failed to get source address to reach %pI6\n",
1365 &daddr6->sin6_addr);
1366 diff -NurpP --minimal linux-4.9.217/drivers/tty/sysrq.c linux-4.9.217-vs2.3.9.12/drivers/tty/sysrq.c
1367 --- linux-4.9.217/drivers/tty/sysrq.c 2020-03-27 00:51:14.160599611 +0000
1368 +++ linux-4.9.217-vs2.3.9.12/drivers/tty/sysrq.c 2020-04-01 09:40:27.395485676 +0000
1370 #include <linux/syscalls.h>
1371 #include <linux/of.h>
1372 #include <linux/rcupdate.h>
1373 +#include <linux/vserver/debug.h>
1375 #include <asm/ptrace.h>
1376 #include <asm/irq_regs.h>
1377 @@ -428,6 +429,21 @@ static struct sysrq_key_op sysrq_unrt_op
1378 .enable_mask = SYSRQ_ENABLE_RTNICE,
1382 +#ifdef CONFIG_VSERVER_DEBUG
1383 +static void sysrq_handle_vxinfo(int key)
1385 + dump_vx_info_inactive((key == 'x') ? 0 : 1);
1388 +static struct sysrq_key_op sysrq_showvxinfo_op = {
1389 + .handler = sysrq_handle_vxinfo,
1390 + .help_msg = "conteXt",
1391 + .action_msg = "Show Context Info",
1392 + .enable_mask = SYSRQ_ENABLE_DUMP,
1396 /* Key Operations table and lock */
1397 static DEFINE_SPINLOCK(sysrq_key_table_lock);
1399 @@ -484,7 +500,11 @@ static struct sysrq_key_op *sysrq_key_ta
1400 /* x: May be registered on mips for TLB dump */
1401 /* x: May be registered on ppc/powerpc for xmon */
1402 /* x: May be registered on sparc64 for global PMU dump */
1403 +#ifdef CONFIG_VSERVER_DEBUG
1404 + &sysrq_showvxinfo_op, /* x */
1408 /* y: May be registered on sparc64 for global register dump */
1410 &sysrq_ftrace_dump_op, /* z */
1411 @@ -499,6 +519,8 @@ static int sysrq_key_table_key2index(int
1413 else if ((key >= 'a') && (key <= 'z'))
1414 retval = key + 10 - 'a';
1415 + else if ((key >= 'A') && (key <= 'Z'))
1416 + retval = key + 10 - 'A';
1420 diff -NurpP --minimal linux-4.9.217/drivers/tty/tty_io.c linux-4.9.217-vs2.3.9.12/drivers/tty/tty_io.c
1421 --- linux-4.9.217/drivers/tty/tty_io.c 2020-03-27 00:51:14.190599135 +0000
1422 +++ linux-4.9.217-vs2.3.9.12/drivers/tty/tty_io.c 2019-10-05 14:58:43.980334565 +0000
1425 #include <linux/kmod.h>
1426 #include <linux/nsproxy.h>
1427 +#include <linux/vs_pid.h>
1429 #undef TTY_DEBUG_HANGUP
1430 #ifdef TTY_DEBUG_HANGUP
1431 @@ -2318,7 +2319,8 @@ static int tiocsti(struct tty_struct *tt
1433 struct tty_ldisc *ld;
1435 - if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
1436 + if (((current->signal->tty != tty) &&
1437 + !vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
1439 if (get_user(ch, p))
1441 @@ -2634,6 +2636,7 @@ static int tiocspgrp(struct tty_struct *
1443 if (get_user(pgrp_nr, p))
1445 + pgrp_nr = vx_rmap_pid(pgrp_nr);
1449 diff -NurpP --minimal linux-4.9.217/fs/attr.c linux-4.9.217-vs2.3.9.12/fs/attr.c
1450 --- linux-4.9.217/fs/attr.c 2016-12-11 19:17:54.000000000 +0000
1451 +++ linux-4.9.217-vs2.3.9.12/fs/attr.c 2018-10-20 04:58:13.000000000 +0000
1453 #include <linux/security.h>
1454 #include <linux/evm.h>
1455 #include <linux/ima.h>
1456 +#include <linux/proc_fs.h>
1457 +#include <linux/devpts_fs.h>
1458 +#include <linux/vs_tag.h>
1461 * setattr_prepare - check if attribute changes to a dentry are allowed
1462 @@ -90,6 +93,10 @@ kill_priv:
1466 + /* check for inode tag permission */
1467 + if (dx_permission(inode, MAY_WRITE))
1472 EXPORT_SYMBOL(setattr_prepare);
1473 @@ -160,6 +167,8 @@ void setattr_copy(struct inode *inode, c
1474 inode->i_uid = attr->ia_uid;
1475 if (ia_valid & ATTR_GID)
1476 inode->i_gid = attr->ia_gid;
1477 + if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1478 + inode->i_tag = attr->ia_tag;
1479 if (ia_valid & ATTR_ATIME)
1480 inode->i_atime = timespec_trunc(attr->ia_atime,
1481 inode->i_sb->s_time_gran);
1482 @@ -210,7 +219,8 @@ int notify_change(struct dentry * dentry
1484 WARN_ON_ONCE(!inode_is_locked(inode));
1486 - if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
1487 + if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
1488 + ATTR_TAG | ATTR_TIMES_SET)) {
1489 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1492 diff -NurpP --minimal linux-4.9.217/fs/block_dev.c linux-4.9.217-vs2.3.9.12/fs/block_dev.c
1493 --- linux-4.9.217/fs/block_dev.c 2020-03-27 00:51:17.200551698 +0000
1494 +++ linux-4.9.217-vs2.3.9.12/fs/block_dev.c 2019-02-22 08:37:54.463070814 +0000
1496 #include <linux/dax.h>
1497 #include <linux/badblocks.h>
1498 #include <linux/falloc.h>
1499 +#include <linux/vs_device.h>
1500 #include <asm/uaccess.h>
1501 #include "internal.h"
1503 @@ -734,6 +735,7 @@ struct block_device *bdget(dev_t dev)
1504 bdev->bd_invalidated = 0;
1505 inode->i_mode = S_IFBLK;
1506 inode->i_rdev = dev;
1507 + inode->i_mdev = dev;
1508 inode->i_bdev = bdev;
1509 inode->i_data.a_ops = &def_blk_aops;
1510 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
1511 @@ -780,6 +782,11 @@ EXPORT_SYMBOL(bdput);
1512 static struct block_device *bd_acquire(struct inode *inode)
1514 struct block_device *bdev;
1517 + if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1519 + inode->i_mdev = mdev;
1521 spin_lock(&bdev_lock);
1522 bdev = inode->i_bdev;
1523 @@ -790,7 +797,7 @@ static struct block_device *bd_acquire(s
1525 spin_unlock(&bdev_lock);
1527 - bdev = bdget(inode->i_rdev);
1528 + bdev = bdget(mdev);
1530 spin_lock(&bdev_lock);
1531 if (!inode->i_bdev) {
1532 diff -NurpP --minimal linux-4.9.217/fs/btrfs/ctree.h linux-4.9.217-vs2.3.9.12/fs/btrfs/ctree.h
1533 --- linux-4.9.217/fs/btrfs/ctree.h 2020-03-27 00:51:17.270550595 +0000
1534 +++ linux-4.9.217-vs2.3.9.12/fs/btrfs/ctree.h 2020-04-01 09:40:28.245471591 +0000
1535 @@ -1319,6 +1319,8 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(c
1536 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
1537 #define BTRFS_DEFAULT_MAX_INLINE (2048)
1539 +#define BTRFS_MOUNT_TAGGED (1 << 24)
1541 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1542 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
1543 #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
1544 @@ -1669,6 +1671,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
1545 BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1546 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1547 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1548 +BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
1549 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1550 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1551 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
1552 @@ -1716,6 +1719,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct
1554 BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1556 +#define BTRFS_INODE_IXUNLINK (1 << 24)
1557 +#define BTRFS_INODE_BARRIER (1 << 25)
1558 +#define BTRFS_INODE_COW (1 << 26)
1561 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1563 @@ -3199,6 +3206,7 @@ int btrfs_ioctl_get_supported_features(v
1564 void btrfs_update_iflags(struct inode *inode);
1565 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
1566 int btrfs_is_empty_uuid(u8 *uuid);
1567 +int btrfs_sync_flags(struct inode *inode, int, int);
1568 int btrfs_defrag_file(struct inode *inode, struct file *file,
1569 struct btrfs_ioctl_defrag_range_args *range,
1570 u64 newer_than, unsigned long max_pages);
1571 diff -NurpP --minimal linux-4.9.217/fs/btrfs/disk-io.c linux-4.9.217-vs2.3.9.12/fs/btrfs/disk-io.c
1572 --- linux-4.9.217/fs/btrfs/disk-io.c 2020-03-27 00:51:17.290550279 +0000
1573 +++ linux-4.9.217-vs2.3.9.12/fs/btrfs/disk-io.c 2020-04-01 09:40:28.245471591 +0000
1574 @@ -2705,6 +2705,9 @@ int open_ctree(struct super_block *sb,
1578 + if (btrfs_test_opt(fs_info, TAGGED))
1579 + sb->s_flags |= MS_TAGGED;
1581 features = btrfs_super_incompat_flags(disk_super) &
1582 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1584 diff -NurpP --minimal linux-4.9.217/fs/btrfs/inode.c linux-4.9.217-vs2.3.9.12/fs/btrfs/inode.c
1585 --- linux-4.9.217/fs/btrfs/inode.c 2020-03-27 00:51:17.350549330 +0000
1586 +++ linux-4.9.217-vs2.3.9.12/fs/btrfs/inode.c 2020-04-01 09:40:28.265471261 +0000
1588 #include <linux/blkdev.h>
1589 #include <linux/posix_acl_xattr.h>
1590 #include <linux/uio.h>
1591 +#include <linux/vs_tag.h>
1593 #include "disk-io.h"
1594 #include "transaction.h"
1595 @@ -3701,6 +3702,9 @@ static int btrfs_read_locked_inode(struc
1603 bool filled = false;
1604 int first_xattr_slot;
1605 @@ -3733,8 +3737,14 @@ static int btrfs_read_locked_inode(struc
1606 struct btrfs_inode_item);
1607 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
1608 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
1609 - i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
1610 - i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
1612 + kuid = make_kuid(&init_user_ns, btrfs_inode_uid(leaf, inode_item));
1613 + kgid = make_kgid(&init_user_ns, btrfs_inode_gid(leaf, inode_item));
1614 + ktag = make_ktag(&init_user_ns, btrfs_inode_tag(leaf, inode_item));
1616 + inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
1617 + inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
1618 + inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
1619 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1621 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
1622 @@ -3889,11 +3899,18 @@ static void fill_inode_item(struct btrfs
1623 struct inode *inode)
1625 struct btrfs_map_token token;
1626 + uid_t uid = from_kuid(&init_user_ns,
1627 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
1628 + gid_t gid = from_kgid(&init_user_ns,
1629 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
1631 btrfs_init_map_token(&token);
1633 - btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
1634 - btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
1635 + btrfs_set_token_inode_uid(leaf, item, uid, &token);
1636 + btrfs_set_token_inode_gid(leaf, item, gid, &token);
1637 +#ifdef CONFIG_TAGGING_INTERN
1638 + btrfs_set_token_inode_tag(leaf, item, i_tag_read(inode), &token);
1640 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
1642 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
1643 @@ -10646,6 +10663,7 @@ static const struct inode_operations btr
1644 .mknod = btrfs_mknod,
1645 .listxattr = btrfs_listxattr,
1646 .permission = btrfs_permission,
1647 + .sync_flags = btrfs_sync_flags,
1648 .get_acl = btrfs_get_acl,
1649 .set_acl = btrfs_set_acl,
1650 .update_time = btrfs_update_time,
1651 @@ -10654,6 +10672,7 @@ static const struct inode_operations btr
1652 static const struct inode_operations btrfs_dir_ro_inode_operations = {
1653 .lookup = btrfs_lookup,
1654 .permission = btrfs_permission,
1655 + .sync_flags = btrfs_sync_flags,
1656 .update_time = btrfs_update_time,
1659 @@ -10719,6 +10738,7 @@ static const struct inode_operations btr
1660 .listxattr = btrfs_listxattr,
1661 .permission = btrfs_permission,
1662 .fiemap = btrfs_fiemap,
1663 + .sync_flags = btrfs_sync_flags,
1664 .get_acl = btrfs_get_acl,
1665 .set_acl = btrfs_set_acl,
1666 .update_time = btrfs_update_time,
1667 diff -NurpP --minimal linux-4.9.217/fs/btrfs/ioctl.c linux-4.9.217-vs2.3.9.12/fs/btrfs/ioctl.c
1668 --- linux-4.9.217/fs/btrfs/ioctl.c 2020-03-27 00:51:17.350549330 +0000
1669 +++ linux-4.9.217-vs2.3.9.12/fs/btrfs/ioctl.c 2020-04-01 09:40:28.265471261 +0000
1670 @@ -110,10 +110,13 @@ static unsigned int btrfs_flags_to_ioctl
1672 unsigned int iflags = 0;
1674 - if (flags & BTRFS_INODE_SYNC)
1675 - iflags |= FS_SYNC_FL;
1676 if (flags & BTRFS_INODE_IMMUTABLE)
1677 iflags |= FS_IMMUTABLE_FL;
1678 + if (flags & BTRFS_INODE_IXUNLINK)
1679 + iflags |= FS_IXUNLINK_FL;
1681 + if (flags & BTRFS_INODE_SYNC)
1682 + iflags |= FS_SYNC_FL;
1683 if (flags & BTRFS_INODE_APPEND)
1684 iflags |= FS_APPEND_FL;
1685 if (flags & BTRFS_INODE_NODUMP)
1686 @@ -130,34 +133,84 @@ static unsigned int btrfs_flags_to_ioctl
1687 else if (flags & BTRFS_INODE_COMPRESS)
1688 iflags |= FS_COMPR_FL;
1690 + if (flags & BTRFS_INODE_BARRIER)
1691 + iflags |= FS_BARRIER_FL;
1692 + if (flags & BTRFS_INODE_COW)
1693 + iflags |= FS_COW_FL;
1698 - * Update inode->i_flags based on the btrfs internal flags.
1699 + * Update inode->i_(v)flags based on the btrfs internal flags.
1701 void btrfs_update_iflags(struct inode *inode)
1703 struct btrfs_inode *ip = BTRFS_I(inode);
1704 unsigned int new_fl = 0;
1706 - if (ip->flags & BTRFS_INODE_SYNC)
1708 if (ip->flags & BTRFS_INODE_IMMUTABLE)
1709 new_fl |= S_IMMUTABLE;
1710 + if (ip->flags & BTRFS_INODE_IXUNLINK)
1711 + new_fl |= S_IXUNLINK;
1713 + if (ip->flags & BTRFS_INODE_SYNC)
1715 if (ip->flags & BTRFS_INODE_APPEND)
1717 if (ip->flags & BTRFS_INODE_NOATIME)
1718 new_fl |= S_NOATIME;
1719 if (ip->flags & BTRFS_INODE_DIRSYNC)
1720 new_fl |= S_DIRSYNC;
1722 set_mask_bits(&inode->i_flags,
1723 - S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
1724 + S_SYNC | S_APPEND | S_IMMUTABLE | S_IXUNLINK | S_NOATIME | S_DIRSYNC,
1728 + if (ip->flags & BTRFS_INODE_BARRIER)
1729 + new_fl |= V_BARRIER;
1730 + if (ip->flags & BTRFS_INODE_COW)
1733 + set_mask_bits(&inode->i_vflags,
1734 + V_BARRIER | V_COW, new_fl);
1738 + * Update btrfs internal flags from inode->i_(v)flags.
1740 +void btrfs_update_flags(struct inode *inode)
1742 + struct btrfs_inode *ip = BTRFS_I(inode);
1744 + unsigned int flags = inode->i_flags;
1745 + unsigned int vflags = inode->i_vflags;
1747 + ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
1748 + BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
1749 + BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
1750 + BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
1752 + if (flags & S_IMMUTABLE)
1753 + ip->flags |= BTRFS_INODE_IMMUTABLE;
1754 + if (flags & S_IXUNLINK)
1755 + ip->flags |= BTRFS_INODE_IXUNLINK;
1757 + if (flags & S_SYNC)
1758 + ip->flags |= BTRFS_INODE_SYNC;
1759 + if (flags & S_APPEND)
1760 + ip->flags |= BTRFS_INODE_APPEND;
1761 + if (flags & S_NOATIME)
1762 + ip->flags |= BTRFS_INODE_NOATIME;
1763 + if (flags & S_DIRSYNC)
1764 + ip->flags |= BTRFS_INODE_DIRSYNC;
1766 + if (vflags & V_BARRIER)
1767 + ip->flags |= BTRFS_INODE_BARRIER;
1768 + if (vflags & V_COW)
1769 + ip->flags |= BTRFS_INODE_COW;
1773 * Inherit flags from the parent inode.
1775 * Currently only the compression flags and the cow flags are inherited.
1776 @@ -170,6 +223,7 @@ void btrfs_inherit_iflags(struct inode *
1779 flags = BTRFS_I(dir)->flags;
1780 + flags &= ~BTRFS_INODE_BARRIER;
1782 if (flags & BTRFS_INODE_NOCOMPRESS) {
1783 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
1784 @@ -188,6 +242,30 @@ void btrfs_inherit_iflags(struct inode *
1785 btrfs_update_iflags(inode);
1788 +int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
1790 + struct btrfs_inode *ip = BTRFS_I(inode);
1791 + struct btrfs_root *root = ip->root;
1792 + struct btrfs_trans_handle *trans;
1795 + trans = btrfs_join_transaction(root);
1798 + inode->i_flags = flags;
1799 + inode->i_vflags = vflags;
1800 + btrfs_update_flags(inode);
1802 + ret = btrfs_update_inode(trans, root, inode);
1805 + btrfs_update_iflags(inode);
1806 + inode->i_ctime = CURRENT_TIME;
1807 + btrfs_end_transaction(trans, root);
1812 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
1814 struct btrfs_inode *ip = BTRFS_I(file_inode(file));
1815 @@ -250,21 +328,27 @@ static int btrfs_ioctl_setflags(struct f
1817 flags = btrfs_mask_flags(inode->i_mode, flags);
1818 oldflags = btrfs_flags_to_ioctl(ip->flags);
1819 - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
1820 + if ((flags ^ oldflags) & (FS_APPEND_FL |
1821 + FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
1822 if (!capable(CAP_LINUX_IMMUTABLE)) {
1828 - if (flags & FS_SYNC_FL)
1829 - ip->flags |= BTRFS_INODE_SYNC;
1831 - ip->flags &= ~BTRFS_INODE_SYNC;
1832 if (flags & FS_IMMUTABLE_FL)
1833 ip->flags |= BTRFS_INODE_IMMUTABLE;
1835 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
1836 + if (flags & FS_IXUNLINK_FL)
1837 + ip->flags |= BTRFS_INODE_IXUNLINK;
1839 + ip->flags &= ~BTRFS_INODE_IXUNLINK;
1841 + if (flags & FS_SYNC_FL)
1842 + ip->flags |= BTRFS_INODE_SYNC;
1844 + ip->flags &= ~BTRFS_INODE_SYNC;
1845 if (flags & FS_APPEND_FL)
1846 ip->flags |= BTRFS_INODE_APPEND;
1848 diff -NurpP --minimal linux-4.9.217/fs/btrfs/super.c linux-4.9.217-vs2.3.9.12/fs/btrfs/super.c
1849 --- linux-4.9.217/fs/btrfs/super.c 2020-03-27 00:51:17.390548703 +0000
1850 +++ linux-4.9.217-vs2.3.9.12/fs/btrfs/super.c 2020-04-01 09:40:28.265471261 +0000
1851 @@ -327,7 +327,7 @@ enum {
1852 #ifdef CONFIG_BTRFS_DEBUG
1853 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
1856 + Opt_tag, Opt_notag, Opt_tagid, Opt_err,
1859 static const match_table_t tokens = {
1860 @@ -388,6 +388,9 @@ static const match_table_t tokens = {
1861 {Opt_fragment_metadata, "fragment=metadata"},
1862 {Opt_fragment_all, "fragment=all"},
1865 + {Opt_notag, "notag"},
1866 + {Opt_tagid, "tagid=%u"},
1870 @@ -833,6 +836,22 @@ int btrfs_parse_options(struct btrfs_roo
1871 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1874 +#ifndef CONFIG_TAGGING_NONE
1876 + printk(KERN_INFO "btrfs: use tagging\n");
1877 + btrfs_set_opt(info->mount_opt, TAGGED);
1880 + printk(KERN_INFO "btrfs: disabled tagging\n");
1881 + btrfs_clear_opt(info->mount_opt, TAGGED);
1884 +#ifdef CONFIG_PROPAGATE
1887 + btrfs_set_opt(info->mount_opt, TAGGED);
1891 btrfs_info(root->fs_info,
1892 "unrecognized mount option '%s'", p);
1893 @@ -1754,6 +1773,12 @@ static int btrfs_remount(struct super_bl
1894 btrfs_resize_thread_pool(fs_info,
1895 fs_info->thread_pool_size, old_thread_pool_size);
1897 + if (btrfs_test_opt(fs_info, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
1898 + printk("btrfs: %s: tagging not permitted on remount.\n",
1903 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1906 diff -NurpP --minimal linux-4.9.217/fs/char_dev.c linux-4.9.217-vs2.3.9.12/fs/char_dev.c
1907 --- linux-4.9.217/fs/char_dev.c 2020-03-27 00:51:17.560546024 +0000
1908 +++ linux-4.9.217-vs2.3.9.12/fs/char_dev.c 2020-04-01 09:40:28.315470432 +0000
1910 #include <linux/mutex.h>
1911 #include <linux/backing-dev.h>
1912 #include <linux/tty.h>
1913 +#include <linux/vs_context.h>
1914 +#include <linux/vs_device.h>
1916 #include "internal.h"
1918 @@ -360,14 +362,21 @@ static int chrdev_open(struct inode *ino
1920 struct cdev *new = NULL;
1924 + if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
1926 + inode->i_mdev = mdev;
1928 spin_lock(&cdev_lock);
1931 struct kobject *kobj;
1934 spin_unlock(&cdev_lock);
1935 - kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
1937 + kobj = kobj_lookup(cdev_map, mdev, &idx);
1940 new = container_of(kobj, struct cdev, kobj);
1941 diff -NurpP --minimal linux-4.9.217/fs/dcache.c linux-4.9.217-vs2.3.9.12/fs/dcache.c
1942 --- linux-4.9.217/fs/dcache.c 2020-03-27 00:51:17.880540977 +0000
1943 +++ linux-4.9.217-vs2.3.9.12/fs/dcache.c 2019-10-05 14:58:45.150315864 +0000
1945 #include <linux/ratelimit.h>
1946 #include <linux/list_lru.h>
1947 #include <linux/kasan.h>
1948 +#include <linux/vs_limit.h>
1950 #include "internal.h"
1952 @@ -690,6 +691,7 @@ static inline bool fast_dput(struct dent
1953 spin_lock(&dentry->d_lock);
1954 if (dentry->d_lockref.count > 1) {
1955 dentry->d_lockref.count--;
1956 + vx_dentry_dec(dentry);
1957 spin_unlock(&dentry->d_lock);
1960 @@ -821,6 +823,7 @@ repeat:
1961 dentry_lru_add(dentry);
1963 dentry->d_lockref.count--;
1964 + vx_dentry_dec(dentry);
1965 spin_unlock(&dentry->d_lock);
1968 @@ -838,6 +841,7 @@ EXPORT_SYMBOL(dput);
1969 static inline void __dget_dlock(struct dentry *dentry)
1971 dentry->d_lockref.count++;
1972 + vx_dentry_inc(dentry);
1975 static inline void __dget(struct dentry *dentry)
1976 @@ -850,6 +854,8 @@ struct dentry *dget_parent(struct dentry
1980 + vx_dentry_dec(dentry);
1983 * Do optimistic parent lookup without any
1985 @@ -880,6 +886,7 @@ repeat:
1987 BUG_ON(!ret->d_lockref.count);
1988 ret->d_lockref.count++;
1989 + vx_dentry_inc(ret);
1990 spin_unlock(&ret->d_lock);
1993 @@ -1034,6 +1041,7 @@ static void shrink_dentry_list(struct li
1994 parent = lock_parent(dentry);
1995 if (dentry->d_lockref.count != 1) {
1996 dentry->d_lockref.count--;
1997 + vx_dentry_dec(dentry);
1998 spin_unlock(&dentry->d_lock);
2000 spin_unlock(&parent->d_lock);
2001 @@ -1594,6 +1602,9 @@ struct dentry *__d_alloc(struct super_bl
2005 + if (!vx_dentry_avail(1))
2008 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
2011 @@ -1637,6 +1648,7 @@ struct dentry *__d_alloc(struct super_bl
2013 dentry->d_lockref.count = 1;
2014 dentry->d_flags = 0;
2015 + vx_dentry_inc(dentry);
2016 spin_lock_init(&dentry->d_lock);
2017 seqcount_init(&dentry->d_seq);
2018 dentry->d_inode = NULL;
2019 @@ -2310,6 +2322,7 @@ struct dentry *__d_lookup(const struct d
2022 dentry->d_lockref.count++;
2023 + vx_dentry_inc(dentry);
2025 spin_unlock(&dentry->d_lock);
2027 @@ -3568,6 +3581,7 @@ static enum d_walk_ret d_genocide_kill(v
2028 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2029 dentry->d_flags |= DCACHE_GENOCIDE;
2030 dentry->d_lockref.count--;
2031 + vx_dentry_dec(dentry);
2034 return D_WALK_CONTINUE;
2035 diff -NurpP --minimal linux-4.9.217/fs/devpts/inode.c linux-4.9.217-vs2.3.9.12/fs/devpts/inode.c
2036 --- linux-4.9.217/fs/devpts/inode.c 2020-03-27 00:51:18.080537825 +0000
2037 +++ linux-4.9.217-vs2.3.9.12/fs/devpts/inode.c 2019-10-05 14:58:45.150315864 +0000
2039 #include <linux/parser.h>
2040 #include <linux/fsnotify.h>
2041 #include <linux/seq_file.h>
2042 +#include <linux/vs_base.h>
2044 #define DEVPTS_DEFAULT_MODE 0600
2047 #define DEVPTS_DEFAULT_PTMX_MODE 0000
2048 #define PTMX_MINOR 2
2050 +static int devpts_permission(struct inode *inode, int mask)
2052 + int ret = -EACCES;
2054 + /* devpts is xid tagged */
2055 + if (vx_check((vxid_t)i_tag_read(inode), VS_WATCH_P | VS_IDENT))
2056 + ret = generic_permission(inode, mask);
2060 +static struct inode_operations devpts_file_inode_operations = {
2061 + .permission = devpts_permission,
2066 * sysctl support for setting limits on the number of Unix98 ptys allocated.
2067 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
2068 @@ -363,6 +379,34 @@ static int devpts_show_options(struct se
2072 +static int devpts_filter(struct dentry *de)
2076 + /* devpts is xid tagged */
2077 + if (de && de->d_inode)
2078 + xid = (vxid_t)i_tag_read(de->d_inode);
2079 +#ifdef CONFIG_VSERVER_WARN_DEVPTS
2081 + vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
2082 + de->d_name.len, de->d_name.name);
2084 + return vx_check(xid, VS_WATCH_P | VS_IDENT);
2087 +static int devpts_readdir(struct file * filp, struct dir_context *ctx)
2089 + return dcache_readdir_filter(filp, ctx, devpts_filter);
2092 +static struct file_operations devpts_dir_operations = {
2093 + .open = dcache_dir_open,
2094 + .release = dcache_dir_close,
2095 + .llseek = dcache_dir_lseek,
2096 + .read = generic_read_dir,
2097 + .iterate = devpts_readdir,
2100 static const struct super_operations devpts_sops = {
2101 .statfs = simple_statfs,
2102 .remount_fs = devpts_remount,
2103 @@ -416,8 +460,10 @@ devpts_fill_super(struct super_block *s,
2104 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
2105 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2106 inode->i_op = &simple_dir_inode_operations;
2107 - inode->i_fop = &simple_dir_operations;
2108 + inode->i_fop = &devpts_dir_operations;
2109 set_nlink(inode, 2);
2110 + /* devpts is xid tagged */
2111 + i_tag_write(inode, (vtag_t)vx_current_xid());
2113 s->s_root = d_make_root(inode);
2115 @@ -543,6 +589,9 @@ struct dentry *devpts_pty_new(struct pts
2116 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
2117 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
2118 init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index));
2119 + /* devpts is xid tagged */
2120 + i_tag_write(inode, (vtag_t)vx_current_xid());
2121 + inode->i_op = &devpts_file_inode_operations;
2123 sprintf(s, "%d", index);
2125 diff -NurpP --minimal linux-4.9.217/fs/ext2/balloc.c linux-4.9.217-vs2.3.9.12/fs/ext2/balloc.c
2126 --- linux-4.9.217/fs/ext2/balloc.c 2016-12-11 19:17:54.000000000 +0000
2127 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/balloc.c 2018-10-20 04:58:13.000000000 +0000
2128 @@ -693,7 +693,6 @@ ext2_try_to_allocate(struct super_block
2130 end = EXT2_BLOCKS_PER_GROUP(sb);
2133 BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2136 diff -NurpP --minimal linux-4.9.217/fs/ext2/ext2.h linux-4.9.217-vs2.3.9.12/fs/ext2/ext2.h
2137 --- linux-4.9.217/fs/ext2/ext2.h 2016-12-11 19:17:54.000000000 +0000
2138 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/ext2.h 2018-10-20 04:58:13.000000000 +0000
2139 @@ -247,8 +247,12 @@ struct ext2_group_desc
2140 #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
2141 #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
2142 #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
2143 +#define EXT2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
2144 #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2146 +#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
2147 +#define EXT2_COW_FL FS_COW_FL /* Copy on Write marker */
2149 #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
2150 #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
2152 @@ -332,7 +336,8 @@ struct ext2_inode {
2154 __le16 l_i_uid_high; /* these 2 fields */
2155 __le16 l_i_gid_high; /* were reserved2[0] */
2156 - __u32 l_i_reserved2;
2157 + __le16 l_i_tag; /* Context Tag */
2158 + __u16 l_i_reserved2;
2161 __u8 h_i_frag; /* Fragment number */
2162 @@ -360,6 +365,7 @@ struct ext2_inode {
2163 #define i_gid_low i_gid
2164 #define i_uid_high osd2.linux2.l_i_uid_high
2165 #define i_gid_high osd2.linux2.l_i_gid_high
2166 +#define i_raw_tag osd2.linux2.l_i_tag
2167 #define i_reserved2 osd2.linux2.l_i_reserved2
2170 @@ -393,6 +399,7 @@ struct ext2_inode {
2172 #define EXT2_MOUNT_DAX 0
2174 +#define EXT2_MOUNT_TAGGED 0x200000 /* Enable Context Tags */
2177 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
2178 @@ -782,6 +789,7 @@ extern void ext2_set_inode_flags(struct
2179 extern void ext2_get_inode_flags(struct ext2_inode_info *);
2180 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2181 u64 start, u64 len);
2182 +extern int ext2_sync_flags(struct inode *, int, int);
2185 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
2186 diff -NurpP --minimal linux-4.9.217/fs/ext2/file.c linux-4.9.217-vs2.3.9.12/fs/ext2/file.c
2187 --- linux-4.9.217/fs/ext2/file.c 2016-12-11 19:17:54.000000000 +0000
2188 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/file.c 2018-10-20 04:58:13.000000000 +0000
2189 @@ -247,4 +247,5 @@ const struct inode_operations ext2_file_
2190 .get_acl = ext2_get_acl,
2191 .set_acl = ext2_set_acl,
2192 .fiemap = ext2_fiemap,
2193 + .sync_flags = ext2_sync_flags,
2195 diff -NurpP --minimal linux-4.9.217/fs/ext2/ialloc.c linux-4.9.217-vs2.3.9.12/fs/ext2/ialloc.c
2196 --- linux-4.9.217/fs/ext2/ialloc.c 2016-12-11 19:17:54.000000000 +0000
2197 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/ialloc.c 2018-10-20 04:58:13.000000000 +0000
2199 #include <linux/backing-dev.h>
2200 #include <linux/buffer_head.h>
2201 #include <linux/random.h>
2202 +#include <linux/vs_tag.h>
2206 @@ -551,6 +552,7 @@ got:
2207 inode->i_mode = mode;
2208 inode->i_uid = current_fsuid();
2209 inode->i_gid = dir->i_gid;
2210 + i_tag_write(inode, dx_current_fstag(sb));
2212 inode_init_owner(inode, dir, mode);
2214 diff -NurpP --minimal linux-4.9.217/fs/ext2/inode.c linux-4.9.217-vs2.3.9.12/fs/ext2/inode.c
2215 --- linux-4.9.217/fs/ext2/inode.c 2020-03-27 00:51:18.820526161 +0000
2216 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/inode.c 2019-12-25 15:37:51.438438255 +0000
2218 #include <linux/iomap.h>
2219 #include <linux/namei.h>
2220 #include <linux/uio.h>
2221 +#include <linux/vs_tag.h>
2225 @@ -1360,39 +1361,61 @@ void ext2_set_inode_flags(struct inode *
2227 unsigned int flags = EXT2_I(inode)->i_flags;
2229 - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
2230 - S_DIRSYNC | S_DAX);
2231 + inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK | S_DAX |
2232 + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2234 + if (flags & EXT2_IMMUTABLE_FL)
2235 + inode->i_flags |= S_IMMUTABLE;
2236 + if (flags & EXT2_IXUNLINK_FL)
2237 + inode->i_flags |= S_IXUNLINK;
2238 if (flags & EXT2_SYNC_FL)
2239 inode->i_flags |= S_SYNC;
2240 if (flags & EXT2_APPEND_FL)
2241 inode->i_flags |= S_APPEND;
2242 - if (flags & EXT2_IMMUTABLE_FL)
2243 - inode->i_flags |= S_IMMUTABLE;
2244 if (flags & EXT2_NOATIME_FL)
2245 inode->i_flags |= S_NOATIME;
2246 if (flags & EXT2_DIRSYNC_FL)
2247 inode->i_flags |= S_DIRSYNC;
2248 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
2249 inode->i_flags |= S_DAX;
2251 + inode->i_vflags &= ~(V_BARRIER | V_COW);
2253 + if (flags & EXT2_BARRIER_FL)
2254 + inode->i_vflags |= V_BARRIER;
2255 + if (flags & EXT2_COW_FL)
2256 + inode->i_vflags |= V_COW;
2259 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2260 void ext2_get_inode_flags(struct ext2_inode_info *ei)
2262 unsigned int flags = ei->vfs_inode.i_flags;
2263 + unsigned int vflags = ei->vfs_inode.i_vflags;
2265 + ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2266 + EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2267 + EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2268 + EXT2_BARRIER_FL | EXT2_COW_FL);
2270 + if (flags & S_IMMUTABLE)
2271 + ei->i_flags |= EXT2_IMMUTABLE_FL;
2272 + if (flags & S_IXUNLINK)
2273 + ei->i_flags |= EXT2_IXUNLINK_FL;
2275 - ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2276 - EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2278 ei->i_flags |= EXT2_SYNC_FL;
2279 if (flags & S_APPEND)
2280 ei->i_flags |= EXT2_APPEND_FL;
2281 - if (flags & S_IMMUTABLE)
2282 - ei->i_flags |= EXT2_IMMUTABLE_FL;
2283 if (flags & S_NOATIME)
2284 ei->i_flags |= EXT2_NOATIME_FL;
2285 if (flags & S_DIRSYNC)
2286 ei->i_flags |= EXT2_DIRSYNC_FL;
2288 + if (vflags & V_BARRIER)
2289 + ei->i_flags |= EXT2_BARRIER_FL;
2290 + if (vflags & V_COW)
2291 + ei->i_flags |= EXT2_COW_FL;
2294 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
2295 @@ -1428,8 +1451,10 @@ struct inode *ext2_iget (struct super_bl
2296 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2297 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2299 - i_uid_write(inode, i_uid);
2300 - i_gid_write(inode, i_gid);
2301 + i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2302 + i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
2303 + i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2304 + le16_to_cpu(raw_inode->i_raw_tag)));
2305 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2306 inode->i_size = le32_to_cpu(raw_inode->i_size);
2307 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2308 @@ -1536,8 +1561,10 @@ static int __ext2_write_inode(struct ino
2309 struct ext2_inode_info *ei = EXT2_I(inode);
2310 struct super_block *sb = inode->i_sb;
2311 ino_t ino = inode->i_ino;
2312 - uid_t uid = i_uid_read(inode);
2313 - gid_t gid = i_gid_read(inode);
2314 + uid_t uid = from_kuid(&init_user_ns,
2315 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2316 + gid_t gid = from_kgid(&init_user_ns,
2317 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
2318 struct buffer_head * bh;
2319 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2321 @@ -1573,6 +1600,9 @@ static int __ext2_write_inode(struct ino
2322 raw_inode->i_uid_high = 0;
2323 raw_inode->i_gid_high = 0;
2325 +#ifdef CONFIG_TAGGING_INTERN
2326 + raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
2328 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2329 raw_inode->i_size = cpu_to_le32(inode->i_size);
2330 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2331 @@ -1656,7 +1686,8 @@ int ext2_setattr(struct dentry *dentry,
2334 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
2335 - (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
2336 + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
2337 + (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
2338 error = dquot_transfer(inode, iattr);
2341 diff -NurpP --minimal linux-4.9.217/fs/ext2/ioctl.c linux-4.9.217-vs2.3.9.12/fs/ext2/ioctl.c
2342 --- linux-4.9.217/fs/ext2/ioctl.c 2016-12-11 19:17:54.000000000 +0000
2343 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/ioctl.c 2018-10-20 04:58:13.000000000 +0000
2345 #include <asm/uaccess.h>
2348 +int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2350 + inode->i_flags = flags;
2351 + inode->i_vflags = vflags;
2352 + ext2_get_inode_flags(EXT2_I(inode));
2353 + inode->i_ctime = CURRENT_TIME_SEC;
2354 + mark_inode_dirty(inode);
2358 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2360 struct inode *inode = file_inode(filp);
2361 @@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
2363 flags = ext2_mask_flags(inode->i_mode, flags);
2365 + if (IS_BARRIER(inode)) {
2366 + vxwprintk_task(1, "messing with the barrier.");
2371 /* Is it quota file? Do not allow user to mess with it */
2372 if (IS_NOQUOTA(inode)) {
2373 @@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
2375 * This test looks nicer. Thanks to Pauline Middelink
2377 - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2378 + if ((oldflags & EXT2_IMMUTABLE_FL) ||
2379 + ((flags ^ oldflags) & (EXT2_APPEND_FL |
2380 + EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2381 if (!capable(CAP_LINUX_IMMUTABLE)) {
2382 inode_unlock(inode);
2384 @@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2388 - flags = flags & EXT2_FL_USER_MODIFIABLE;
2389 + flags &= EXT2_FL_USER_MODIFIABLE;
2390 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2391 ei->i_flags = flags;
2393 diff -NurpP --minimal linux-4.9.217/fs/ext2/namei.c linux-4.9.217-vs2.3.9.12/fs/ext2/namei.c
2394 --- linux-4.9.217/fs/ext2/namei.c 2020-03-27 00:51:18.830526005 +0000
2395 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/namei.c 2018-10-20 04:58:13.000000000 +0000
2398 #include <linux/pagemap.h>
2399 #include <linux/quotaops.h>
2400 +#include <linux/vs_tag.h>
2404 @@ -71,6 +72,7 @@ static struct dentry *ext2_lookup(struct
2405 (unsigned long) ino);
2406 return ERR_PTR(-EIO);
2408 + dx_propagate_tag(nd, inode);
2410 return d_splice_alias(inode, dentry);
2412 @@ -443,6 +445,7 @@ const struct inode_operations ext2_speci
2413 .listxattr = ext2_listxattr,
2415 .setattr = ext2_setattr,
2416 + .sync_flags = ext2_sync_flags,
2417 .get_acl = ext2_get_acl,
2418 .set_acl = ext2_set_acl,
2420 diff -NurpP --minimal linux-4.9.217/fs/ext2/super.c linux-4.9.217-vs2.3.9.12/fs/ext2/super.c
2421 --- linux-4.9.217/fs/ext2/super.c 2020-03-27 00:51:18.940524270 +0000
2422 +++ linux-4.9.217-vs2.3.9.12/fs/ext2/super.c 2020-04-01 09:40:28.405468940 +0000
2423 @@ -411,7 +411,8 @@ enum {
2424 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2425 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
2426 Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
2427 - Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2428 + Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2429 + Opt_tag, Opt_notag, Opt_tagid
2432 static const match_table_t tokens = {
2433 @@ -439,6 +440,9 @@ static const match_table_t tokens = {
2435 {Opt_noacl, "noacl"},
2438 + {Opt_notag, "notag"},
2439 + {Opt_tagid, "tagid=%u"},
2441 {Opt_grpquota, "grpquota"},
2442 {Opt_ignore, "noquota"},
2443 @@ -523,6 +527,20 @@ static int parse_options(char *options,
2445 set_opt (sbi->s_mount_opt, NO_UID32);
2447 +#ifndef CONFIG_TAGGING_NONE
2449 + set_opt (sbi->s_mount_opt, TAGGED);
2452 + clear_opt (sbi->s_mount_opt, TAGGED);
2455 +#ifdef CONFIG_PROPAGATE
2458 + set_opt (sbi->s_mount_opt, TAGGED);
2462 clear_opt (sbi->s_mount_opt, CHECK);
2464 @@ -898,6 +916,8 @@ static int ext2_fill_super(struct super_
2465 if (!parse_options((char *) data, sb))
2468 + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2469 + sb->s_flags |= MS_TAGGED;
2470 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2471 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2473 @@ -1311,6 +1331,14 @@ static int ext2_remount (struct super_bl
2478 + if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
2479 + !(sb->s_flags & MS_TAGGED)) {
2480 + printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2483 + goto restore_opts;
2486 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2487 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2488 diff -NurpP --minimal linux-4.9.217/fs/ext4/ext4.h linux-4.9.217-vs2.3.9.12/fs/ext4/ext4.h
2489 --- linux-4.9.217/fs/ext4/ext4.h 2020-03-27 00:51:19.290518755 +0000
2490 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/ext4.h 2020-04-01 09:40:28.435468445 +0000
2491 @@ -392,8 +392,11 @@ struct flex_groups {
2492 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
2493 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
2494 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
2495 +#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2496 +#define EXT4_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
2497 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
2498 #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
2499 +#define EXT4_COW_FL 0x40000000 /* Copy on Write marker */
2500 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
2502 #define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
2503 @@ -735,7 +738,7 @@ struct ext4_inode {
2504 __le16 l_i_uid_high; /* these 2 fields */
2505 __le16 l_i_gid_high; /* were reserved2[0] */
2506 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
2507 - __le16 l_i_reserved;
2508 + __le16 l_i_tag; /* Context Tag */
2511 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
2512 @@ -892,6 +895,7 @@ do { \
2513 #define i_gid_low i_gid
2514 #define i_uid_high osd2.linux2.l_i_uid_high
2515 #define i_gid_high osd2.linux2.l_i_gid_high
2516 +#define i_raw_tag osd2.linux2.l_i_tag
2517 #define i_checksum_lo osd2.linux2.l_i_checksum_lo
2519 #elif defined(__GNU__)
2520 @@ -1133,6 +1137,7 @@ struct ext4_inode_info {
2521 #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
2522 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
2523 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
2524 +#define EXT4_MOUNT_TAGGED 0x2000000 /* Enable Context Tags */
2525 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
2526 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
2527 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
2528 @@ -2515,6 +2520,7 @@ extern int ext4_punch_hole(struct inode
2529 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2530 extern void ext4_set_inode_flags(struct inode *);
2531 extern void ext4_get_inode_flags(struct ext4_inode_info *);
2532 +extern int ext4_sync_flags(struct inode *, int, int);
2533 extern int ext4_alloc_da_blocks(struct inode *inode);
2534 extern void ext4_set_aops(struct inode *inode);
2535 extern int ext4_writepage_trans_blocks(struct inode *);
2536 diff -NurpP --minimal linux-4.9.217/fs/ext4/file.c linux-4.9.217-vs2.3.9.12/fs/ext4/file.c
2537 --- linux-4.9.217/fs/ext4/file.c 2020-03-27 00:51:19.360517651 +0000
2538 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/file.c 2019-10-05 14:58:45.150315864 +0000
2539 @@ -698,5 +698,6 @@ const struct inode_operations ext4_file_
2540 .get_acl = ext4_get_acl,
2541 .set_acl = ext4_set_acl,
2542 .fiemap = ext4_fiemap,
2543 + .sync_flags = ext4_sync_flags,
2546 diff -NurpP --minimal linux-4.9.217/fs/ext4/ialloc.c linux-4.9.217-vs2.3.9.12/fs/ext4/ialloc.c
2547 --- linux-4.9.217/fs/ext4/ialloc.c 2020-03-27 00:51:19.360517651 +0000
2548 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/ialloc.c 2020-04-01 09:40:28.435468445 +0000
2550 #include <linux/random.h>
2551 #include <linux/bitops.h>
2552 #include <linux/blkdev.h>
2553 +#include <linux/vs_tag.h>
2554 #include <asm/byteorder.h>
2557 @@ -780,6 +781,7 @@ struct inode *__ext4_new_inode(handle_t
2558 inode->i_mode = mode;
2559 inode->i_uid = current_fsuid();
2560 inode->i_gid = dir->i_gid;
2561 + i_tag_write(inode, dx_current_fstag(sb));
2563 inode_init_owner(inode, dir, mode);
2565 diff -NurpP --minimal linux-4.9.217/fs/ext4/inode.c linux-4.9.217-vs2.3.9.12/fs/ext4/inode.c
2566 --- linux-4.9.217/fs/ext4/inode.c 2020-03-27 00:51:19.610513714 +0000
2567 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/inode.c 2020-04-01 09:40:28.445468277 +0000
2569 #include <linux/printk.h>
2570 #include <linux/slab.h>
2571 #include <linux/bitops.h>
2572 +#include <linux/vs_tag.h>
2574 #include "ext4_jbd2.h"
2576 @@ -4390,12 +4391,15 @@ void ext4_set_inode_flags(struct inode *
2577 unsigned int flags = EXT4_I(inode)->i_flags;
2578 unsigned int new_fl = 0;
2580 + if (flags & EXT4_IMMUTABLE_FL)
2581 + new_fl |= S_IMMUTABLE;
2582 + if (flags & EXT4_IXUNLINK_FL)
2583 + new_fl |= S_IXUNLINK;
2585 if (flags & EXT4_SYNC_FL)
2587 if (flags & EXT4_APPEND_FL)
2589 - if (flags & EXT4_IMMUTABLE_FL)
2590 - new_fl |= S_IMMUTABLE;
2591 if (flags & EXT4_NOATIME_FL)
2592 new_fl |= S_NOATIME;
2593 if (flags & EXT4_DIRSYNC_FL)
2594 @@ -4403,31 +4407,52 @@ void ext4_set_inode_flags(struct inode *
2595 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
2597 inode_set_flags(inode, new_fl,
2598 - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
2599 + S_IXUNLINK | S_IMMUTABLE | S_DAX |
2600 + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2603 + if (flags & EXT4_BARRIER_FL)
2604 + new_fl |= V_BARRIER;
2605 + if (flags & EXT4_COW_FL)
2608 + set_mask_bits(&inode->i_vflags,
2609 + V_BARRIER | V_COW, new_fl);
2612 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
2613 void ext4_get_inode_flags(struct ext4_inode_info *ei)
2615 - unsigned int vfs_fl;
2616 + unsigned int vfs_fl, vfs_vf;
2617 unsigned long old_fl, new_fl;
2620 vfs_fl = ei->vfs_inode.i_flags;
2621 + vfs_vf = ei->vfs_inode.i_vflags;
2622 old_fl = ei->i_flags;
2623 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
2624 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
2626 + EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
2629 + if (vfs_fl & S_IMMUTABLE)
2630 + new_fl |= EXT4_IMMUTABLE_FL;
2631 + if (vfs_fl & S_IXUNLINK)
2632 + new_fl |= EXT4_IXUNLINK_FL;
2634 if (vfs_fl & S_SYNC)
2635 new_fl |= EXT4_SYNC_FL;
2636 if (vfs_fl & S_APPEND)
2637 new_fl |= EXT4_APPEND_FL;
2638 - if (vfs_fl & S_IMMUTABLE)
2639 - new_fl |= EXT4_IMMUTABLE_FL;
2640 if (vfs_fl & S_NOATIME)
2641 new_fl |= EXT4_NOATIME_FL;
2642 if (vfs_fl & S_DIRSYNC)
2643 new_fl |= EXT4_DIRSYNC_FL;
2645 + if (vfs_vf & V_BARRIER)
2646 + new_fl |= EXT4_BARRIER_FL;
2647 + if (vfs_vf & V_COW)
2648 + new_fl |= EXT4_COW_FL;
2649 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
2652 @@ -4553,8 +4578,10 @@ struct inode *ext4_iget(struct super_blo
2653 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2654 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2656 - i_uid_write(inode, i_uid);
2657 - i_gid_write(inode, i_gid);
2658 + i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2659 + i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
2660 + i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2661 + le16_to_cpu(raw_inode->i_raw_tag)));
2662 ei->i_projid = make_kprojid(&init_user_ns, i_projid);
2663 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2665 @@ -4882,8 +4909,10 @@ static int ext4_do_update_inode(handle_t
2667 ext4_get_inode_flags(ei);
2668 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
2669 - i_uid = i_uid_read(inode);
2670 - i_gid = i_gid_read(inode);
2671 + i_uid = from_kuid(&init_user_ns,
2672 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2673 + i_gid = from_kgid(&init_user_ns,
2674 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
2675 i_projid = from_kprojid(&init_user_ns, ei->i_projid);
2676 if (!(test_opt(inode->i_sb, NO_UID32))) {
2677 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
2678 @@ -4907,6 +4936,9 @@ static int ext4_do_update_inode(handle_t
2679 raw_inode->i_uid_high = 0;
2680 raw_inode->i_gid_high = 0;
2682 +#ifdef CONFIG_TAGGING_INTERN
2683 + raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
2685 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2687 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
2688 @@ -5164,7 +5196,8 @@ int ext4_setattr(struct dentry *dentry,
2691 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
2692 - (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
2693 + (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)) ||
2694 + (ia_valid & ATTR_TAG && !tag_eq(attr->ia_tag, inode->i_tag))) {
2697 /* (user+group)*(old+new) structure, inode write (sb,
2698 @@ -5187,6 +5220,8 @@ int ext4_setattr(struct dentry *dentry,
2699 inode->i_uid = attr->ia_uid;
2700 if (attr->ia_valid & ATTR_GID)
2701 inode->i_gid = attr->ia_gid;
2702 + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2703 + inode->i_tag = attr->ia_tag;
2704 error = ext4_mark_inode_dirty(handle, inode);
2705 ext4_journal_stop(handle);
2707 diff -NurpP --minimal linux-4.9.217/fs/ext4/ioctl.c linux-4.9.217-vs2.3.9.12/fs/ext4/ioctl.c
2708 --- linux-4.9.217/fs/ext4/ioctl.c 2020-03-27 00:51:19.620513553 +0000
2709 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/ioctl.c 2019-10-05 14:58:45.150315864 +0000
2711 #include <linux/file.h>
2712 #include <linux/quotaops.h>
2713 #include <linux/uuid.h>
2714 +#include <linux/vs_tag.h>
2715 #include <asm/uaccess.h>
2716 #include "ext4_jbd2.h"
2718 @@ -226,7 +227,9 @@ static int ext4_ioctl_setflags(struct in
2720 * This test looks nicer. Thanks to Pauline Middelink
2722 - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
2723 + if ((oldflags & EXT4_IMMUTABLE_FL) ||
2724 + ((flags ^ oldflags) & (EXT4_APPEND_FL |
2725 + EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
2726 if (!capable(CAP_LINUX_IMMUTABLE))
2729 @@ -432,6 +435,33 @@ static inline unsigned long ext4_xflags_
2733 +int ext4_sync_flags(struct inode *inode, int flags, int vflags)
2735 + handle_t *handle = NULL;
2736 + struct ext4_iloc iloc;
2739 + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
2740 + if (IS_ERR(handle))
2741 + return PTR_ERR(handle);
2743 + if (IS_SYNC(inode))
2744 + ext4_handle_sync(handle);
2745 + err = ext4_reserve_inode_write(handle, inode, &iloc);
2749 + inode->i_flags = flags;
2750 + inode->i_vflags = vflags;
2751 + ext4_get_inode_flags(EXT4_I(inode));
2752 + inode->i_ctime = ext4_current_time(inode);
2754 + err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2756 + ext4_journal_stop(handle);
2760 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2762 struct inode *inode = file_inode(filp);
2763 @@ -461,6 +491,11 @@ long ext4_ioctl(struct file *filp, unsig
2765 flags = ext4_mask_flags(inode->i_mode, flags);
2767 + if (IS_BARRIER(inode)) {
2768 + vxwprintk_task(1, "messing with the barrier.");
2773 err = ext4_ioctl_setflags(inode, flags);
2774 inode_unlock(inode);
2775 diff -NurpP --minimal linux-4.9.217/fs/ext4/namei.c linux-4.9.217-vs2.3.9.12/fs/ext4/namei.c
2776 --- linux-4.9.217/fs/ext4/namei.c 2020-03-27 00:51:19.650513081 +0000
2777 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/namei.c 2020-04-01 09:40:28.445468277 +0000
2779 #include <linux/quotaops.h>
2780 #include <linux/buffer_head.h>
2781 #include <linux/bio.h>
2782 +#include <linux/vs_tag.h>
2784 #include "ext4_jbd2.h"
2786 @@ -1475,6 +1476,7 @@ restart:
2787 REQ_META | REQ_PRIO,
2790 + dx_propagate_tag(nd, inode);
2792 if ((bh = bh_use[ra_ptr++]) == NULL)
2794 @@ -3954,6 +3956,7 @@ const struct inode_operations ext4_dir_i
2795 .get_acl = ext4_get_acl,
2796 .set_acl = ext4_set_acl,
2797 .fiemap = ext4_fiemap,
2798 + .sync_flags = ext4_sync_flags,
2801 const struct inode_operations ext4_special_inode_operations = {
2802 diff -NurpP --minimal linux-4.9.217/fs/ext4/super.c linux-4.9.217-vs2.3.9.12/fs/ext4/super.c
2803 --- linux-4.9.217/fs/ext4/super.c 2020-03-27 00:51:19.660512926 +0000
2804 +++ linux-4.9.217-vs2.3.9.12/fs/ext4/super.c 2020-04-01 09:40:28.445468277 +0000
2805 @@ -1302,6 +1302,7 @@ enum {
2806 Opt_dioread_nolock, Opt_dioread_lock,
2807 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
2808 Opt_max_dir_size_kb, Opt_nojournal_checksum,
2809 + Opt_tag, Opt_notag, Opt_tagid
2812 static const match_table_t tokens = {
2813 @@ -1388,6 +1389,9 @@ static const match_table_t tokens = {
2814 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
2815 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
2816 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
2818 + {Opt_notag, "notag"},
2819 + {Opt_tagid, "tagid=%u"},
2823 @@ -1633,6 +1637,20 @@ static int handle_mount_opt(struct super
2824 case Opt_nolazytime:
2825 sb->s_flags &= ~MS_LAZYTIME;
2827 +#ifndef CONFIG_TAGGING_NONE
2829 + set_opt(sb, TAGGED);
2832 + clear_opt(sb, TAGGED);
2835 +#ifdef CONFIG_PROPAGATE
2838 + set_opt(sb, TAGGED);
2843 for (m = ext4_mount_opts; m->token != Opt_err; m++)
2844 @@ -3652,6 +3670,9 @@ static int ext4_fill_super(struct super_
2845 sb->s_iflags |= SB_I_CGROUPWB;
2848 + if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
2849 + sb->s_flags |= MS_TAGGED;
2851 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2852 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2854 @@ -5045,6 +5066,14 @@ static int ext4_remount(struct super_blo
2855 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
2856 ext4_abort(sb, "Abort forced by user");
2858 + if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
2859 + !(sb->s_flags & MS_TAGGED)) {
2860 + printk("EXT4-fs: %s: tagging not permitted on remount.\n",
2863 + goto restore_opts;
2866 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2867 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2869 diff -NurpP --minimal linux-4.9.217/fs/fcntl.c linux-4.9.217-vs2.3.9.12/fs/fcntl.c
2870 --- linux-4.9.217/fs/fcntl.c 2020-03-27 00:51:19.840510086 +0000
2871 +++ linux-4.9.217-vs2.3.9.12/fs/fcntl.c 2018-10-20 04:58:13.000000000 +0000
2873 #include <linux/pid_namespace.h>
2874 #include <linux/user_namespace.h>
2875 #include <linux/shmem_fs.h>
2876 +#include <linux/vs_limit.h>
2878 #include <asm/poll.h>
2879 #include <asm/siginfo.h>
2880 @@ -390,6 +391,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, f
2884 + if (!vx_files_avail(1))
2887 if (unlikely(f.file->f_mode & FMODE_PATH)) {
2888 if (!check_fcntl_cmd(cmd))
2889 diff -NurpP --minimal linux-4.9.217/fs/file.c linux-4.9.217-vs2.3.9.12/fs/file.c
2890 --- linux-4.9.217/fs/file.c 2020-03-27 00:51:19.860509770 +0000
2891 +++ linux-4.9.217-vs2.3.9.12/fs/file.c 2019-10-05 14:58:45.150315864 +0000
2893 #include <linux/spinlock.h>
2894 #include <linux/rcupdate.h>
2895 #include <linux/workqueue.h>
2896 +#include <linux/vs_limit.h>
2898 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
2899 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
2900 @@ -357,6 +358,8 @@ struct files_struct *dup_fd(struct files
2901 struct file *f = *old_fds++;
2904 + /* TODO: sum it first for check and performance */
2905 + vx_openfd_inc(open_files - i);
2908 * The fd may be claimed in the fd bitmap but not yet
2909 @@ -406,9 +409,11 @@ static struct fdtable *close_files(struc
2910 filp_close(file, files);
2911 cond_resched_rcu_qs();
2921 @@ -540,6 +545,7 @@ repeat:
2923 __clear_close_on_exec(fd, fdt);
2925 + vx_openfd_inc(fd);
2928 if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
2929 @@ -570,6 +576,7 @@ static void __put_unused_fd(struct files
2930 __clear_open_fd(fd, fdt);
2931 if (fd < files->next_fd)
2932 files->next_fd = fd;
2933 + vx_openfd_dec(fd);
2936 void put_unused_fd(unsigned int fd)
2937 @@ -857,6 +864,8 @@ __releases(&files->file_lock)
2940 filp_close(tofree, files);
2942 + vx_openfd_inc(fd); /* fd was unused */
2946 diff -NurpP --minimal linux-4.9.217/fs/file_table.c linux-4.9.217-vs2.3.9.12/fs/file_table.c
2947 --- linux-4.9.217/fs/file_table.c 2016-12-11 19:17:54.000000000 +0000
2948 +++ linux-4.9.217-vs2.3.9.12/fs/file_table.c 2018-10-20 04:58:13.000000000 +0000
2950 #include <linux/task_work.h>
2951 #include <linux/ima.h>
2952 #include <linux/swap.h>
2953 +#include <linux/vs_limit.h>
2954 +#include <linux/vs_context.h>
2956 #include <linux/atomic.h>
2958 @@ -137,6 +139,8 @@ struct file *get_empty_filp(void)
2959 mutex_init(&f->f_pos_lock);
2960 eventpoll_init_file(f);
2961 /* f->f_version: 0 */
2962 + f->f_xid = vx_current_xid();
2967 @@ -219,6 +223,8 @@ static void __fput(struct file *file)
2968 put_write_access(inode);
2969 __mnt_drop_write(mnt);
2971 + vx_files_dec(file);
2973 file->f_path.dentry = NULL;
2974 file->f_path.mnt = NULL;
2975 file->f_inode = NULL;
2976 @@ -305,6 +311,8 @@ void put_filp(struct file *file)
2978 if (atomic_long_dec_and_test(&file->f_count)) {
2979 security_file_free(file);
2980 + vx_files_dec(file);
2985 diff -NurpP --minimal linux-4.9.217/fs/fs_struct.c linux-4.9.217-vs2.3.9.12/fs/fs_struct.c
2986 --- linux-4.9.217/fs/fs_struct.c 2016-12-11 19:17:54.000000000 +0000
2987 +++ linux-4.9.217-vs2.3.9.12/fs/fs_struct.c 2018-10-20 04:58:13.000000000 +0000
2989 #include <linux/path.h>
2990 #include <linux/slab.h>
2991 #include <linux/fs_struct.h>
2992 +#include <linux/vserver/global.h>
2993 #include "internal.h"
2996 @@ -87,6 +88,7 @@ void free_fs_struct(struct fs_struct *fs
2998 path_put(&fs->root);
3000 + atomic_dec(&vs_global_fs);
3001 kmem_cache_free(fs_cachep, fs);
3004 @@ -124,6 +126,7 @@ struct fs_struct *copy_fs_struct(struct
3007 spin_unlock(&old->lock);
3008 + atomic_inc(&vs_global_fs);
3012 diff -NurpP --minimal linux-4.9.217/fs/gfs2/file.c linux-4.9.217-vs2.3.9.12/fs/gfs2/file.c
3013 --- linux-4.9.217/fs/gfs2/file.c 2020-03-27 00:51:20.390501417 +0000
3014 +++ linux-4.9.217-vs2.3.9.12/fs/gfs2/file.c 2018-10-20 04:58:13.000000000 +0000
3015 @@ -137,6 +137,9 @@ static const u32 fsflags_to_gfs2[32] = {
3016 [12] = GFS2_DIF_EXHASH,
3017 [14] = GFS2_DIF_INHERIT_JDATA,
3018 [17] = GFS2_DIF_TOPDIR,
3019 + [27] = GFS2_DIF_IXUNLINK,
3020 + [26] = GFS2_DIF_BARRIER,
3021 + [29] = GFS2_DIF_COW,
3024 static const u32 gfs2_to_fsflags[32] = {
3025 @@ -147,6 +150,9 @@ static const u32 gfs2_to_fsflags[32] = {
3026 [gfs2fl_ExHash] = FS_INDEX_FL,
3027 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
3028 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3029 + [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3030 + [gfs2fl_Barrier] = FS_BARRIER_FL,
3031 + [gfs2fl_Cow] = FS_COW_FL,
3034 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
3035 @@ -178,12 +184,17 @@ void gfs2_set_inode_flags(struct inode *
3037 struct gfs2_inode *ip = GFS2_I(inode);
3038 unsigned int flags = inode->i_flags;
3039 + unsigned int vflags = inode->i_vflags;
3041 + flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3042 + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
3044 - flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
3045 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
3047 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
3048 flags |= S_IMMUTABLE;
3049 + if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
3050 + flags |= S_IXUNLINK;
3051 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
3053 if (ip->i_diskflags & GFS2_DIF_NOATIME)
3054 @@ -191,6 +202,43 @@ void gfs2_set_inode_flags(struct inode *
3055 if (ip->i_diskflags & GFS2_DIF_SYNC)
3057 inode->i_flags = flags;
3059 + vflags &= ~(V_BARRIER | V_COW);
3061 + if (ip->i_diskflags & GFS2_DIF_BARRIER)
3062 + vflags |= V_BARRIER;
3063 + if (ip->i_diskflags & GFS2_DIF_COW)
3065 + inode->i_vflags = vflags;
3068 +void gfs2_get_inode_flags(struct inode *inode)
3070 + struct gfs2_inode *ip = GFS2_I(inode);
3071 + unsigned int flags = inode->i_flags;
3072 + unsigned int vflags = inode->i_vflags;
3074 + ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
3075 + GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
3076 + GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
3077 + GFS2_DIF_BARRIER | GFS2_DIF_COW);
3079 + if (flags & S_IMMUTABLE)
3080 + ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
3081 + if (flags & S_IXUNLINK)
3082 + ip->i_diskflags |= GFS2_DIF_IXUNLINK;
3084 + if (flags & S_APPEND)
3085 + ip->i_diskflags |= GFS2_DIF_APPENDONLY;
3086 + if (flags & S_NOATIME)
3087 + ip->i_diskflags |= GFS2_DIF_NOATIME;
3088 + if (flags & S_SYNC)
3089 + ip->i_diskflags |= GFS2_DIF_SYNC;
3091 + if (vflags & V_BARRIER)
3092 + ip->i_diskflags |= GFS2_DIF_BARRIER;
3093 + if (vflags & V_COW)
3094 + ip->i_diskflags |= GFS2_DIF_COW;
3097 /* Flags that can be set by user space */
3098 @@ -306,6 +354,37 @@ static int gfs2_set_flags(struct file *f
3099 return do_gfs2_set_flags(filp, gfsflags, ~(GFS2_DIF_SYSTEM | GFS2_DIF_JDATA));
3102 +int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
3104 + struct gfs2_inode *ip = GFS2_I(inode);
3105 + struct gfs2_sbd *sdp = GFS2_SB(inode);
3106 + struct buffer_head *bh;
3107 + struct gfs2_holder gh;
3110 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
3113 + error = gfs2_trans_begin(sdp, RES_DINODE, 0);
3116 + error = gfs2_meta_inode_buffer(ip, &bh);
3118 + goto out_trans_end;
3119 + gfs2_trans_add_meta(ip->i_gl, bh);
3120 + inode->i_flags = flags;
3121 + inode->i_vflags = vflags;
3122 + gfs2_get_inode_flags(inode);
3123 + gfs2_dinode_out(ip, bh->b_data);
3125 + gfs2_set_aops(inode);
3127 + gfs2_trans_end(sdp);
3129 + gfs2_glock_dq_uninit(&gh);
3133 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3136 diff -NurpP --minimal linux-4.9.217/fs/gfs2/inode.h linux-4.9.217-vs2.3.9.12/fs/gfs2/inode.h
3137 --- linux-4.9.217/fs/gfs2/inode.h 2016-12-11 19:17:54.000000000 +0000
3138 +++ linux-4.9.217-vs2.3.9.12/fs/gfs2/inode.h 2018-10-20 04:58:13.000000000 +0000
3139 @@ -117,6 +117,7 @@ extern const struct file_operations gfs2
3140 extern const struct file_operations gfs2_dir_fops_nolock;
3142 extern void gfs2_set_inode_flags(struct inode *inode);
3143 +extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
3145 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
3146 extern const struct file_operations gfs2_file_fops;
3147 diff -NurpP --minimal linux-4.9.217/fs/hostfs/hostfs.h linux-4.9.217-vs2.3.9.12/fs/hostfs/hostfs.h
3148 --- linux-4.9.217/fs/hostfs/hostfs.h 2016-12-11 19:17:54.000000000 +0000
3149 +++ linux-4.9.217-vs2.3.9.12/fs/hostfs/hostfs.h 2018-10-20 04:58:13.000000000 +0000
3150 @@ -42,6 +42,7 @@ struct hostfs_iattr {
3151 unsigned short ia_mode;
3156 struct timespec ia_atime;
3157 struct timespec ia_mtime;
3158 diff -NurpP --minimal linux-4.9.217/fs/inode.c linux-4.9.217-vs2.3.9.12/fs/inode.c
3159 --- linux-4.9.217/fs/inode.c 2020-03-27 00:51:20.860494012 +0000
3160 +++ linux-4.9.217-vs2.3.9.12/fs/inode.c 2019-10-05 14:58:45.170315544 +0000
3162 #include <linux/buffer_head.h> /* for inode_has_buffers */
3163 #include <linux/ratelimit.h>
3164 #include <linux/list_lru.h>
3165 +#include <linux/vs_tag.h>
3166 #include <trace/events/writeback.h>
3167 #include "internal.h"
3169 @@ -133,6 +134,8 @@ int inode_init_always(struct super_block
3170 struct address_space *const mapping = &inode->i_data;
3174 + /* essential because of inode slab reuse */
3175 inode->i_blkbits = sb->s_blocksize_bits;
3177 atomic_set(&inode->i_count, 1);
3178 @@ -144,6 +147,7 @@ int inode_init_always(struct super_block
3179 inode->i_opflags |= IOP_XATTR;
3180 i_uid_write(inode, 0);
3181 i_gid_write(inode, 0);
3182 + i_tag_write(inode, 0);
3183 atomic_set(&inode->i_writecount, 0);
3185 inode->i_blocks = 0;
3186 @@ -155,6 +159,7 @@ int inode_init_always(struct super_block
3187 inode->i_link = NULL;
3188 inode->i_dir_seq = 0;
3190 + inode->i_mdev = 0;
3191 inode->dirtied_when = 0;
3193 #ifdef CONFIG_CGROUP_WRITEBACK
3194 @@ -479,6 +484,8 @@ void __insert_inode_hash(struct inode *i
3196 EXPORT_SYMBOL(__insert_inode_hash);
3198 +EXPORT_SYMBOL_GPL(__iget);
3201 * __remove_inode_hash - remove an inode from the hash
3202 * @inode: inode to unhash
3203 @@ -1982,9 +1989,11 @@ void init_special_inode(struct inode *in
3204 if (S_ISCHR(mode)) {
3205 inode->i_fop = &def_chr_fops;
3206 inode->i_rdev = rdev;
3207 + inode->i_mdev = rdev;
3208 } else if (S_ISBLK(mode)) {
3209 inode->i_fop = &def_blk_fops;
3210 inode->i_rdev = rdev;
3211 + inode->i_mdev = rdev;
3212 } else if (S_ISFIFO(mode))
3213 inode->i_fop = &pipefifo_fops;
3214 else if (S_ISSOCK(mode))
3215 @@ -2019,6 +2028,7 @@ void inode_init_owner(struct inode *inod
3217 inode->i_gid = current_fsgid();
3218 inode->i_mode = mode;
3219 + i_tag_write(inode, dx_current_fstag(inode->i_sb));
3221 EXPORT_SYMBOL(inode_init_owner);
3223 diff -NurpP --minimal linux-4.9.217/fs/ioctl.c linux-4.9.217-vs2.3.9.12/fs/ioctl.c
3224 --- linux-4.9.217/fs/ioctl.c 2016-12-11 19:17:54.000000000 +0000
3225 +++ linux-4.9.217-vs2.3.9.12/fs/ioctl.c 2018-10-20 04:58:13.000000000 +0000
3227 #include <linux/writeback.h>
3228 #include <linux/buffer_head.h>
3229 #include <linux/falloc.h>
3230 +#include <linux/proc_fs.h>
3231 +#include <linux/vserver/inode.h>
3232 +#include <linux/vs_tag.h>
3233 #include "internal.h"
3235 #include <asm/ioctls.h>
3236 diff -NurpP --minimal linux-4.9.217/fs/jfs/file.c linux-4.9.217-vs2.3.9.12/fs/jfs/file.c
3237 --- linux-4.9.217/fs/jfs/file.c 2016-12-11 19:17:54.000000000 +0000
3238 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/file.c 2018-10-20 04:58:13.000000000 +0000
3239 @@ -113,7 +113,8 @@ int jfs_setattr(struct dentry *dentry, s
3242 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
3243 - (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
3244 + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
3245 + (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
3246 rc = dquot_transfer(inode, iattr);
3249 @@ -146,6 +147,7 @@ const struct inode_operations jfs_file_i
3250 .get_acl = jfs_get_acl,
3251 .set_acl = jfs_set_acl,
3253 + .sync_flags = jfs_sync_flags,
3256 const struct file_operations jfs_file_operations = {
3257 diff -NurpP --minimal linux-4.9.217/fs/jfs/ioctl.c linux-4.9.217-vs2.3.9.12/fs/jfs/ioctl.c
3258 --- linux-4.9.217/fs/jfs/ioctl.c 2016-12-11 19:17:54.000000000 +0000
3259 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/ioctl.c 2018-10-20 04:58:13.000000000 +0000
3261 #include <linux/time.h>
3262 #include <linux/sched.h>
3263 #include <linux/blkdev.h>
3264 +#include <linux/mount.h>
3265 #include <asm/current.h>
3266 #include <asm/uaccess.h>
3268 @@ -56,6 +57,16 @@ static long jfs_map_ext2(unsigned long f
3272 +int jfs_sync_flags(struct inode *inode, int flags, int vflags)
3274 + inode->i_flags = flags;
3275 + inode->i_vflags = vflags;
3276 + jfs_get_inode_flags(JFS_IP(inode));
3277 + inode->i_ctime = CURRENT_TIME_SEC;
3278 + mark_inode_dirty(inode);
3282 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3284 struct inode *inode = file_inode(filp);
3285 @@ -89,6 +100,11 @@ long jfs_ioctl(struct file *filp, unsign
3286 if (!S_ISDIR(inode->i_mode))
3287 flags &= ~JFS_DIRSYNC_FL;
3289 + if (IS_BARRIER(inode)) {
3290 + vxwprintk_task(1, "messing with the barrier.");
3294 /* Is it quota file? Do not allow user to mess with it */
3295 if (IS_NOQUOTA(inode)) {
3297 @@ -106,8 +122,8 @@ long jfs_ioctl(struct file *filp, unsign
3298 * the relevant capability.
3300 if ((oldflags & JFS_IMMUTABLE_FL) ||
3301 - ((flags ^ oldflags) &
3302 - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
3303 + ((flags ^ oldflags) & (JFS_APPEND_FL |
3304 + JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
3305 if (!capable(CAP_LINUX_IMMUTABLE)) {
3306 inode_unlock(inode);
3308 @@ -115,7 +131,7 @@ long jfs_ioctl(struct file *filp, unsign
3312 - flags = flags & JFS_FL_USER_MODIFIABLE;
3313 + flags &= JFS_FL_USER_MODIFIABLE;
3314 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
3315 jfs_inode->mode2 = flags;
3317 diff -NurpP --minimal linux-4.9.217/fs/jfs/jfs_dinode.h linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_dinode.h
3318 --- linux-4.9.217/fs/jfs/jfs_dinode.h 2016-12-11 19:17:54.000000000 +0000
3319 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_dinode.h 2018-10-20 04:58:13.000000000 +0000
3320 @@ -161,9 +161,13 @@ struct dinode {
3322 #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
3323 #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
3324 +#define JFS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
3326 -#define JFS_FL_USER_VISIBLE 0x03F80000
3327 -#define JFS_FL_USER_MODIFIABLE 0x03F80000
3328 +#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
3329 +#define JFS_COW_FL 0x20000000 /* Copy on Write marker */
3331 +#define JFS_FL_USER_VISIBLE 0x07F80000
3332 +#define JFS_FL_USER_MODIFIABLE 0x07F80000
3333 #define JFS_FL_INHERIT 0x03C80000
3335 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
3336 diff -NurpP --minimal linux-4.9.217/fs/jfs/jfs_filsys.h linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_filsys.h
3337 --- linux-4.9.217/fs/jfs/jfs_filsys.h 2016-12-11 19:17:54.000000000 +0000
3338 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_filsys.h 2018-10-20 04:58:13.000000000 +0000
3340 #define JFS_NAME_MAX 255
3341 #define JFS_PATH_MAX BPSIZE
3343 +#define JFS_TAGGED 0x00800000 /* Context Tagging */
3346 * file system state (superblock state)
3347 diff -NurpP --minimal linux-4.9.217/fs/jfs/jfs_imap.c linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_imap.c
3348 --- linux-4.9.217/fs/jfs/jfs_imap.c 2016-12-11 19:17:54.000000000 +0000
3349 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_imap.c 2018-10-20 04:58:13.000000000 +0000
3351 #include <linux/pagemap.h>
3352 #include <linux/quotaops.h>
3353 #include <linux/slab.h>
3354 +#include <linux/vs_tag.h>
3356 #include "jfs_incore.h"
3357 #include "jfs_inode.h"
3358 @@ -3046,6 +3047,8 @@ static int copy_from_dinode(struct dinod
3360 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3361 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3365 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3366 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3367 @@ -3066,14 +3069,18 @@ static int copy_from_dinode(struct dinod
3369 set_nlink(ip, le32_to_cpu(dip->di_nlink));
3371 - jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3372 + kuid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3373 + kgid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3374 + ip->i_tag = INOTAG_KTAG(DX_TAG(ip), kuid, kgid, GLOBAL_ROOT_TAG);
3376 + jfs_ip->saved_uid = INOTAG_KUID(DX_TAG(ip), kuid, kgid);
3377 if (!uid_valid(sbi->uid))
3378 ip->i_uid = jfs_ip->saved_uid;
3380 ip->i_uid = sbi->uid;
3383 - jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3384 + jfs_ip->saved_gid = INOTAG_KGID(DX_TAG(ip), kuid, kgid);
3385 if (!gid_valid(sbi->gid))
3386 ip->i_gid = jfs_ip->saved_gid;
3388 @@ -3138,16 +3145,14 @@ static void copy_to_dinode(struct dinode
3389 dip->di_size = cpu_to_le64(ip->i_size);
3390 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3391 dip->di_nlink = cpu_to_le32(ip->i_nlink);
3392 - if (!uid_valid(sbi->uid))
3393 - dip->di_uid = cpu_to_le32(i_uid_read(ip));
3395 - dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3396 - jfs_ip->saved_uid));
3397 - if (!gid_valid(sbi->gid))
3398 - dip->di_gid = cpu_to_le32(i_gid_read(ip));
3400 - dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3401 - jfs_ip->saved_gid));
3402 + dip->di_uid = cpu_to_le32(from_kuid(&init_user_ns,
3403 + TAGINO_KUID(DX_TAG(ip),
3404 + !uid_valid(sbi->uid) ? ip->i_uid : jfs_ip->saved_uid,
3406 + dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3407 + TAGINO_KGID(DX_TAG(ip),
3408 + !gid_valid(sbi->gid) ? ip->i_gid : jfs_ip->saved_gid,
3410 jfs_get_inode_flags(jfs_ip);
3412 * mode2 is only needed for storing the higher order bits.
3413 diff -NurpP --minimal linux-4.9.217/fs/jfs/jfs_inode.c linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_inode.c
3414 --- linux-4.9.217/fs/jfs/jfs_inode.c 2016-12-11 19:17:54.000000000 +0000
3415 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_inode.c 2018-10-20 04:58:14.000000000 +0000
3418 #include <linux/fs.h>
3419 #include <linux/quotaops.h>
3420 +#include <linux/vs_tag.h>
3421 #include "jfs_incore.h"
3422 #include "jfs_inode.h"
3423 #include "jfs_filsys.h"
3424 @@ -33,6 +34,9 @@ void jfs_set_inode_flags(struct inode *i
3426 if (flags & JFS_IMMUTABLE_FL)
3427 new_fl |= S_IMMUTABLE;
3428 + if (flags & JFS_IXUNLINK_FL)
3429 + new_fl |= S_IXUNLINK;
3431 if (flags & JFS_APPEND_FL)
3433 if (flags & JFS_NOATIME_FL)
3434 @@ -41,18 +45,35 @@ void jfs_set_inode_flags(struct inode *i
3435 new_fl |= S_DIRSYNC;
3436 if (flags & JFS_SYNC_FL)
3438 - inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME |
3439 - S_DIRSYNC | S_SYNC);
3441 + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_IXUNLINK |
3442 + S_APPEND | S_NOATIME | S_DIRSYNC | S_SYNC);
3445 + if (flags & JFS_BARRIER_FL)
3446 + new_fl |= V_BARRIER;
3447 + if (flags & JFS_COW_FL)
3450 + set_mask_bits(&inode->i_vflags,
3451 + V_BARRIER | V_COW, new_fl);
3454 void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
3456 unsigned int flags = jfs_ip->vfs_inode.i_flags;
3457 + unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
3459 + jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
3460 + JFS_APPEND_FL | JFS_NOATIME_FL |
3461 + JFS_DIRSYNC_FL | JFS_SYNC_FL |
3462 + JFS_BARRIER_FL | JFS_COW_FL);
3464 - jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
3465 - JFS_DIRSYNC_FL | JFS_SYNC_FL);
3466 if (flags & S_IMMUTABLE)
3467 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
3468 + if (flags & S_IXUNLINK)
3469 + jfs_ip->mode2 |= JFS_IXUNLINK_FL;
3471 if (flags & S_APPEND)
3472 jfs_ip->mode2 |= JFS_APPEND_FL;
3473 if (flags & S_NOATIME)
3474 @@ -61,6 +82,11 @@ void jfs_get_inode_flags(struct jfs_inod
3475 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
3477 jfs_ip->mode2 |= JFS_SYNC_FL;
3479 + if (vflags & V_BARRIER)
3480 + jfs_ip->mode2 |= JFS_BARRIER_FL;
3481 + if (vflags & V_COW)
3482 + jfs_ip->mode2 |= JFS_COW_FL;
3486 diff -NurpP --minimal linux-4.9.217/fs/jfs/jfs_inode.h linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_inode.h
3487 --- linux-4.9.217/fs/jfs/jfs_inode.h 2016-12-11 19:17:54.000000000 +0000
3488 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/jfs_inode.h 2018-10-20 04:58:14.000000000 +0000
3489 @@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
3490 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
3491 int fh_len, int fh_type);
3492 extern void jfs_set_inode_flags(struct inode *);
3493 +extern int jfs_sync_flags(struct inode *, int, int);
3494 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
3495 extern int jfs_setattr(struct dentry *, struct iattr *);
3497 diff -NurpP --minimal linux-4.9.217/fs/jfs/namei.c linux-4.9.217-vs2.3.9.12/fs/jfs/namei.c
3498 --- linux-4.9.217/fs/jfs/namei.c 2020-03-27 00:51:21.670481243 +0000
3499 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/namei.c 2018-10-20 04:58:14.000000000 +0000
3501 #include <linux/ctype.h>
3502 #include <linux/quotaops.h>
3503 #include <linux/exportfs.h>
3504 +#include <linux/vs_tag.h>
3505 #include "jfs_incore.h"
3506 #include "jfs_superblock.h"
3507 #include "jfs_inode.h"
3508 @@ -1480,6 +1481,7 @@ static struct dentry *jfs_lookup(struct
3509 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
3512 + dx_propagate_tag(nd, ip);
3513 return d_splice_alias(ip, dentry);
3516 @@ -1542,6 +1544,7 @@ const struct inode_operations jfs_dir_in
3517 .get_acl = jfs_get_acl,
3518 .set_acl = jfs_set_acl,
3520 + .sync_flags = jfs_sync_flags,
3523 const struct file_operations jfs_dir_operations = {
3524 diff -NurpP --minimal linux-4.9.217/fs/jfs/super.c linux-4.9.217-vs2.3.9.12/fs/jfs/super.c
3525 --- linux-4.9.217/fs/jfs/super.c 2020-03-27 00:51:21.670481243 +0000
3526 +++ linux-4.9.217-vs2.3.9.12/fs/jfs/super.c 2018-10-20 04:58:14.000000000 +0000
3527 @@ -206,7 +206,8 @@ enum {
3528 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
3529 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
3530 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
3531 - Opt_discard, Opt_nodiscard, Opt_discard_minblk
3532 + Opt_discard, Opt_nodiscard, Opt_discard_minblk,
3533 + Opt_tag, Opt_notag, Opt_tagid
3536 static const match_table_t tokens = {
3537 @@ -216,6 +217,10 @@ static const match_table_t tokens = {
3538 {Opt_resize, "resize=%u"},
3539 {Opt_resize_nosize, "resize"},
3540 {Opt_errors, "errors=%s"},
3542 + {Opt_notag, "notag"},
3543 + {Opt_tagid, "tagid=%u"},
3544 + {Opt_tag, "tagxid"},
3545 {Opt_ignore, "noquota"},
3546 {Opt_ignore, "quota"},
3547 {Opt_usrquota, "usrquota"},
3548 @@ -405,7 +410,20 @@ static int parse_options(char *options,
3549 pr_err("JFS: discard option not supported on device\n");
3553 +#ifndef CONFIG_TAGGING_NONE
3555 + *flag |= JFS_TAGGED;
3558 + *flag &= JFS_TAGGED;
3561 +#ifdef CONFIG_PROPAGATE
3564 + *flag |= JFS_TAGGED;
3568 printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
3570 @@ -437,6 +455,12 @@ static int jfs_remount(struct super_bloc
3571 if (!parse_options(data, sb, &newLVSize, &flag))
3574 + if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
3575 + printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
3581 if (sb->s_flags & MS_RDONLY) {
3582 pr_err("JFS: resize requires volume to be mounted read-write\n");
3583 @@ -517,6 +541,9 @@ static int jfs_fill_super(struct super_b
3584 #ifdef CONFIG_JFS_POSIX_ACL
3585 sb->s_flags |= MS_POSIXACL;
3587 + /* map mount option tagxid */
3588 + if (sbi->flag & JFS_TAGGED)
3589 + sb->s_flags |= MS_TAGGED;
3592 pr_err("resize option for remount only\n");
3593 diff -NurpP --minimal linux-4.9.217/fs/libfs.c linux-4.9.217-vs2.3.9.12/fs/libfs.c
3594 --- linux-4.9.217/fs/libfs.c 2020-03-27 00:51:21.710480616 +0000
3595 +++ linux-4.9.217-vs2.3.9.12/fs/libfs.c 2019-10-22 13:49:06.407707139 +0000
3596 @@ -180,7 +180,8 @@ static inline unsigned char dt_type(stru
3597 * both impossible due to the lock on directory.
3600 -int dcache_readdir(struct file *file, struct dir_context *ctx)
3601 +static inline int do_dcache_readdir_filter(struct file *file,
3602 + struct dir_context *ctx, int (*filter)(struct dentry *dentry))
3604 struct dentry *dentry = file->f_path.dentry;
3605 struct dentry *cursor = file->private_data;
3606 @@ -197,9 +198,10 @@ int dcache_readdir(struct file *file, st
3607 p = &cursor->d_child;
3609 while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
3610 - if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
3611 + if (!filter || filter(next))
3612 + if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
3613 d_inode(next)->i_ino, dt_type(d_inode(next))))
3618 spin_lock(&dentry->d_lock);
3619 @@ -209,8 +211,22 @@ int dcache_readdir(struct file *file, st
3624 EXPORT_SYMBOL(dcache_readdir);
3626 +int dcache_readdir(struct file *filp, struct dir_context *ctx)
3628 + return do_dcache_readdir_filter(filp, ctx, NULL);
3631 +EXPORT_SYMBOL(dcache_readdir_filter);
3633 +int dcache_readdir_filter(struct file *filp, struct dir_context *ctx,
3634 + int (*filter)(struct dentry *))
3636 + return do_dcache_readdir_filter(filp, ctx, filter);
3639 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
3642 diff -NurpP --minimal linux-4.9.217/fs/locks.c linux-4.9.217-vs2.3.9.12/fs/locks.c
3643 --- linux-4.9.217/fs/locks.c 2020-03-27 00:51:21.800479197 +0000
3644 +++ linux-4.9.217-vs2.3.9.12/fs/locks.c 2020-04-01 09:40:28.545466623 +0000
3646 #include <linux/pid_namespace.h>
3647 #include <linux/hashtable.h>
3648 #include <linux/percpu.h>
3649 +#include <linux/vs_base.h>
3650 +#include <linux/vs_limit.h>
3652 #define CREATE_TRACE_POINTS
3653 #include <trace/events/filelock.h>
3654 @@ -292,11 +294,15 @@ static void locks_init_lock_heads(struct
3655 /* Allocate an empty lock structure. */
3656 struct file_lock *locks_alloc_lock(void)
3658 - struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
3659 + struct file_lock *fl;
3662 - locks_init_lock_heads(fl);
3663 + fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
3666 + locks_init_lock_heads(fl);
3672 EXPORT_SYMBOL_GPL(locks_alloc_lock);
3673 @@ -348,6 +354,7 @@ void locks_init_lock(struct file_lock *f
3675 memset(fl, 0, sizeof(struct file_lock));
3676 locks_init_lock_heads(fl);
3680 EXPORT_SYMBOL(locks_init_lock);
3681 @@ -365,6 +372,7 @@ void locks_copy_conflock(struct file_loc
3682 new->fl_start = fl->fl_start;
3683 new->fl_end = fl->fl_end;
3684 new->fl_lmops = fl->fl_lmops;
3685 + new->fl_xid = fl->fl_xid;
3689 @@ -426,7 +434,10 @@ flock_make_lock(struct file *filp, unsig
3690 fl->fl_flags = FL_FLOCK;
3692 fl->fl_end = OFFSET_MAX;
3695 + vxd_assert(filp->f_xid == vx_current_xid(),
3696 + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3697 + fl->fl_xid = filp->f_xid;
3701 @@ -548,6 +559,7 @@ static int lease_init(struct file *filp,
3703 fl->fl_owner = filp;
3704 fl->fl_pid = current->tgid;
3705 + fl->fl_xid = vx_current_xid();
3708 fl->fl_flags = FL_LEASE;
3709 @@ -567,6 +579,10 @@ static struct file_lock *lease_alloc(str
3711 return ERR_PTR(error);
3713 + fl->fl_xid = vx_current_xid();
3715 + vxd_assert(filp->f_xid == fl->fl_xid,
3716 + "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
3717 error = lease_init(filp, type, fl);
3719 locks_free_lock(fl);
3720 @@ -956,6 +972,7 @@ static int flock_lock_inode(struct inode
3724 + new_fl->fl_xid = -1;
3726 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
3727 if (!flock_locks_conflict(request, fl))
3728 @@ -984,7 +1001,7 @@ out:
3731 static int posix_lock_inode(struct inode *inode, struct file_lock *request,
3732 - struct file_lock *conflock)
3733 + struct file_lock *conflock, vxid_t xid)
3735 struct file_lock *fl, *tmp;
3736 struct file_lock *new_fl = NULL;
3737 @@ -1000,6 +1017,9 @@ static int posix_lock_inode(struct inode
3739 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
3742 + vxd_assert(xid == vx_current_xid(),
3743 + "xid(%d) == current(%d)", xid, vx_current_xid());
3745 * We may need two file_lock structures for this operation,
3746 * so we get them in advance to avoid races.
3747 @@ -1010,7 +1030,11 @@ static int posix_lock_inode(struct inode
3748 (request->fl_type != F_UNLCK ||
3749 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
3750 new_fl = locks_alloc_lock();
3751 + new_fl->fl_xid = xid;
3752 + // vx_locks_inc(new_fl);
3753 new_fl2 = locks_alloc_lock();
3754 + new_fl2->fl_xid = xid;
3755 + // vx_locks_inc(new_fl2);
3758 percpu_down_read_preempt_disable(&file_rwsem);
3759 @@ -1216,7 +1240,7 @@ static int posix_lock_inode(struct inode
3760 int posix_lock_file(struct file *filp, struct file_lock *fl,
3761 struct file_lock *conflock)
3763 - return posix_lock_inode(locks_inode(filp), fl, conflock);
3764 + return posix_lock_inode(locks_inode(filp), fl, conflock, filp->f_xid);
3766 EXPORT_SYMBOL(posix_lock_file);
3768 @@ -1232,7 +1256,7 @@ static int posix_lock_inode_wait(struct
3772 - error = posix_lock_inode(inode, fl, NULL);
3773 + error = posix_lock_inode(inode, fl, NULL, 0);
3774 if (error != FILE_LOCK_DEFERRED)
3776 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
3777 @@ -1308,10 +1332,13 @@ int locks_mandatory_area(struct inode *i
3785 fl.fl_flags &= ~FL_SLEEP;
3786 - error = posix_lock_inode(inode, &fl, NULL);
3787 + f_xid = filp->f_xid;
3788 + error = posix_lock_inode(inode, &fl, NULL, f_xid);
3792 @@ -1319,7 +1346,7 @@ int locks_mandatory_area(struct inode *i
3794 fl.fl_flags |= FL_SLEEP;
3795 fl.fl_owner = current->files;
3796 - error = posix_lock_inode(inode, &fl, NULL);
3797 + error = posix_lock_inode(inode, &fl, NULL, f_xid);
3798 if (error != FILE_LOCK_DEFERRED)
3800 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
3801 @@ -2374,6 +2401,16 @@ int fcntl_setlk64(unsigned int fd, struc
3802 if (file_lock == NULL)
3805 + vxd_assert(filp->f_xid == vx_current_xid(),
3806 + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3807 + file_lock->fl_xid = filp->f_xid;
3808 + // vx_locks_inc(file_lock);
3810 + vxd_assert(filp->f_xid == vx_current_xid(),
3811 + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3812 + file_lock->fl_xid = filp->f_xid;
3813 + // vx_locks_inc(file_lock);
3816 * This might block, so we do it before checking the inode.
3818 @@ -2710,8 +2747,11 @@ static int locks_show(struct seq_file *f
3820 lock_get_status(f, fl, iter->li_pos, "");
3822 - list_for_each_entry(bfl, &fl->fl_block, fl_block)
3823 + list_for_each_entry(bfl, &fl->fl_block, fl_block) {
3824 + if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
3826 lock_get_status(f, bfl, iter->li_pos, " ->");
3831 diff -NurpP --minimal linux-4.9.217/fs/mount.h linux-4.9.217-vs2.3.9.12/fs/mount.h
3832 --- linux-4.9.217/fs/mount.h 2020-03-27 00:51:21.850478409 +0000
3833 +++ linux-4.9.217-vs2.3.9.12/fs/mount.h 2018-10-20 04:58:14.000000000 +0000
3834 @@ -69,6 +69,7 @@ struct mount {
3835 struct hlist_head mnt_pins;
3836 struct fs_pin mnt_umount;
3837 struct dentry *mnt_ex_mountpoint;
3838 + vtag_t mnt_tag; /* tagging used for vfsmount */
3841 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
3842 diff -NurpP --minimal linux-4.9.217/fs/namei.c linux-4.9.217-vs2.3.9.12/fs/namei.c
3843 --- linux-4.9.217/fs/namei.c 2020-03-27 00:51:21.910477461 +0000
3844 +++ linux-4.9.217-vs2.3.9.12/fs/namei.c 2020-04-01 09:40:28.545466623 +0000
3846 #include <linux/hash.h>
3847 #include <linux/bitops.h>
3848 #include <linux/init_task.h>
3849 +#include <linux/proc_fs.h>
3850 +#include <linux/magic.h>
3851 +#include <linux/vserver/inode.h>
3852 +#include <linux/vs_base.h>
3853 +#include <linux/vs_tag.h>
3854 +#include <linux/vs_cowbl.h>
3855 +#include <linux/vs_device.h>
3856 +#include <linux/vs_context.h>
3857 +#include <linux/pid_namespace.h>
3858 #include <asm/uaccess.h>
3860 #include "internal.h"
3861 +#include "proc/internal.h"
3864 /* [Feb-1997 T. Schoebel-Theuer]
3865 @@ -286,6 +296,93 @@ static int check_acl(struct inode *inode
3869 +static inline int dx_barrier(const struct inode *inode)
3871 + if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
3872 + vxwprintk_task(1, "did hit the barrier.");
3878 +static int __dx_permission(const struct inode *inode, int mask)
3880 + if (dx_barrier(inode))
3883 + if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
3884 + /* devpts is xid tagged */
3885 + if (S_ISDIR(inode->i_mode) ||
3886 + vx_check((vxid_t)i_tag_read(inode), VS_IDENT | VS_WATCH_P))
3889 + /* just pretend we didn't find anything */
3892 + else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
3893 + struct proc_dir_entry *de = PDE(inode);
3895 + if (de && !vx_hide_check(0, de->vx_flags)) {
3896 + vxdprintk(VXD_CBIT(misc, 9),
3897 + VS_Q("%*s") " hidden by _dx_permission",
3898 + de->namelen, de->name);
3902 + if ((mask & (MAY_WRITE | MAY_APPEND))) {
3904 + struct task_struct *tsk;
3906 + if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
3907 + vx_flags(VXF_STATE_SETUP, 0))
3910 + pid = PROC_I(inode)->pid;
3915 + tsk = pid_task(pid, PIDTYPE_PID);
3916 + vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
3917 + tsk, (tsk ? vx_task_xid(tsk) : 0));
3919 + vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
3920 + rcu_read_unlock();
3923 + rcu_read_unlock();
3926 + /* FIXME: Should we block some entries here? */
3931 + if (dx_notagcheck(inode->i_sb) ||
3932 + dx_check((vxid_t)i_tag_read(inode),
3933 + DX_HOSTID | DX_ADMIN | DX_WATCH | DX_IDENT))
3941 +int dx_permission(const struct inode *inode, int mask)
3943 + int ret = __dx_permission(inode, mask);
3944 + if (unlikely(ret)) {
3945 +#ifndef CONFIG_VSERVER_WARN_DEVPTS
3946 + if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
3949 + "denied [0x%x] access to inode %s:%p[#%d,%lu]",
3950 + mask, inode->i_sb->s_id, inode,
3951 + i_tag_read(inode), inode->i_ino);
3957 * This does the basic permission checking
3959 @@ -410,7 +507,7 @@ int __inode_permission(struct inode *ino
3961 * Nobody gets write access to an immutable file.
3963 - if (IS_IMMUTABLE(inode))
3964 + if (IS_IMMUTABLE(inode) && !IS_COW(inode))
3968 @@ -422,6 +519,10 @@ int __inode_permission(struct inode *ino
3972 + retval = dx_permission(inode, mask);
3976 retval = do_inode_permission(inode, mask);
3979 @@ -2825,7 +2926,7 @@ static int may_delete(struct inode *dir,
3982 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
3983 - IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
3984 + IS_IXORUNLINK(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
3987 if (!d_is_dir(victim))
3988 @@ -2913,19 +3014,25 @@ int vfs_create(struct inode *dir, struct
3991 int error = may_create(dir, dentry);
3994 + vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
3998 if (!dir->i_op->create)
3999 return -EACCES; /* shouldn't it be ENOSYS? */
4002 error = security_inode_create(dir, dentry, mode);
4005 + vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
4008 error = dir->i_op->create(dir, dentry, mode, want_excl);
4010 fsnotify_create(dir, dentry);
4012 + vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
4015 EXPORT_SYMBOL(vfs_create);
4016 @@ -2963,6 +3070,15 @@ static int may_open(struct path *path, i
4020 +#ifdef CONFIG_VSERVER_COWBL
4021 + if (IS_COW(inode) &&
4022 + ((flag & O_ACCMODE) != O_RDONLY)) {
4023 + if (IS_COW_LINK(inode))
4025 + inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
4026 + mark_inode_dirty(inode);
4029 error = inode_permission(inode, MAY_OPEN | acc_mode);
4032 @@ -3423,6 +3539,16 @@ finish_open:
4034 finish_open_created:
4035 error = may_open(&nd->path, acc_mode, open_flag);
4036 +#ifdef CONFIG_VSERVER_COWBL
4037 + if (error == -EMLINK) {
4038 + struct dentry *dentry;
4039 + dentry = cow_break_link(nd->name->name);
4040 + if (IS_ERR(dentry))
4041 + error = PTR_ERR(dentry);
4048 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
4049 @@ -3526,6 +3652,9 @@ static struct file *path_openat(struct n
4053 +#ifdef CONFIG_VSERVER_COWBL
4056 file = get_empty_filp();
4059 @@ -3559,6 +3688,12 @@ static struct file *path_openat(struct n
4063 +#ifdef CONFIG_VSERVER_COWBL
4064 + if (error == -EMLINK) {
4065 + // path_cleanup(nd);
4070 if (!(opened & FILE_OPENED)) {
4072 @@ -3679,6 +3814,11 @@ static struct dentry *filename_create(in
4076 + vxdprintk(VXD_CBIT(misc, 3), "filename_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
4077 + path->dentry, path->dentry->d_name.len,
4078 + path->dentry->d_name.name, dentry,
4079 + dentry->d_name.len, dentry->d_name.name,
4080 + path->dentry->d_inode);
4084 @@ -3797,6 +3937,7 @@ retry:
4085 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
4090 done_path_create(&path, dentry);
4091 if (retry_estale(error, lookup_flags)) {
4092 @@ -4218,7 +4359,7 @@ int vfs_link(struct dentry *old_dentry,
4094 * A link to an append-only or immutable file cannot be created.
4096 - if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4097 + if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4100 * Updating the link count will likely cause i_uid and i_gid to
4101 @@ -4748,6 +4889,326 @@ const char *vfs_get_link(struct dentry *
4103 EXPORT_SYMBOL(vfs_get_link);
4106 +#ifdef CONFIG_VSERVER_COWBL
4109 +void dump_path(const char *name, struct path *path)
4111 + vxdprintk(VXD_CBIT(misc, 3),
4112 + "%s: path=%p mnt=%p dentry=%p", name, path,
4113 + path ? path->mnt : NULL,
4114 + path ? path->dentry : NULL);
4116 + if (path && path->mnt)
4117 + vxdprintk(VXD_CBIT(misc, 3),
4118 + "%s: path mnt_sb=%p[#%d,#%d] mnt_root=%p[#%d]", name,
4119 + path->mnt->mnt_sb,
4120 + path->mnt->mnt_sb ? path->mnt->mnt_sb->s_count : -1,
4121 + path->mnt->mnt_sb ? atomic_read(&path->mnt->mnt_sb->s_active) : -1,
4122 + path->mnt->mnt_root,
4123 + path->mnt->mnt_root ? path->mnt->mnt_root->d_lockref.count : -1);
4125 + if (path && path->dentry)
4126 + vxdprintk(VXD_CBIT(misc, 3),
4127 + "%s: path dentry=%p[#%d]", name,
4129 + path->dentry ? path->dentry->d_lockref.count : -1);
4133 +long do_cow_splice(struct file *in, struct file *out, size_t len)
4138 + return do_splice_direct(in, &ppos, out, &opos, len, 0);
4141 +struct dentry *cow_break_link(const char *pathname)
4143 + int ret, mode, pathlen, redo = 0, drop = 1;
4144 + struct path old_path = {}, par_path = {}, dir_path = {}, *new_path = NULL;
4145 + struct dentry *dir, *old_dentry, *new_dentry = NULL;
4146 + struct file *old_file;
4147 + struct file *new_file;
4148 + struct qstr new_qstr;
4150 + char *to, *path, pad='\251';
4152 + struct filename *filename = getname_kernel(pathname);
4153 + struct filename *to_filename;
4155 + vxdprintk(VXD_CBIT(misc, 1),
4156 + "cow_break_link(" VS_Q("%s") ")", pathname);
4158 + path = kmalloc(PATH_MAX, GFP_KERNEL);
4160 + if (!path || IS_ERR(filename))
4163 + /* old_path will have refs to dentry and mnt */
4164 + ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
4165 + vxdprintk(VXD_CBIT(misc, 2),
4166 + "do_path_lookup(old): %d", ret);
4168 + goto out_free_path;
4170 + dump_path("cow (old)", &old_path);
4172 + /* no explicit reference for old_dentry here */
4173 + old_dentry = old_path.dentry;
4175 + mode = old_dentry->d_inode->i_mode;
4176 + to = d_path(&old_path, path, PATH_MAX-2);
4177 + pathlen = strlen(to);
4178 + vxdprintk(VXD_CBIT(misc, 2),
4179 + "old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
4181 + old_dentry->d_name.len, old_dentry->d_name.name,
4182 + old_dentry->d_name.len);
4184 + to[pathlen + 1] = 0;
4186 + new_dentry = NULL;
4187 + to[pathlen] = pad--;
4189 + if (pad <= '\240')
4192 + vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
4194 + /* dir_path will have refs to dentry and mnt */
4195 + to_filename = getname_kernel(to);
4196 + to_filename = filename_parentat(AT_FDCWD, to_filename,
4197 + LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &par_path, &new_qstr, &new_type);
4198 + vxdprintk(VXD_CBIT(misc, 2), "filename_parentat(new): %p", to_filename);
4199 + dump_path("cow (par)", &par_path);
4200 + if (IS_ERR(to_filename))
4203 + vxdprintk(VXD_CBIT(misc, 2), "to_filename refcnt=%d", to_filename->refcnt);
4205 + /* this puppy downs the dir inode mutex if successful.
4206 + dir_path will hold refs to dentry and mnt and
4207 + we'll have write access to the mnt */
4208 + new_dentry = filename_create(AT_FDCWD, to_filename, &dir_path, 0);
4209 + if (!new_dentry || IS_ERR(new_dentry)) {
4210 + path_put(&par_path);
4211 + vxdprintk(VXD_CBIT(misc, 2),
4212 + "filename_create(new) failed with %ld",
4213 + PTR_ERR(new_dentry));
4216 + vxdprintk(VXD_CBIT(misc, 2),
4217 + "filename_create(new): %p [" VS_Q("%.*s") ":%d]",
4219 + new_dentry->d_name.len, new_dentry->d_name.name,
4220 + new_dentry->d_name.len);
4222 + dump_path("cow (dir)", &dir_path);
4224 + /* take a reference on new_dentry */
4227 + /* dentry/mnt refs handed over to new_path */
4228 + new_path = &dir_path;
4230 + /* dentry for old/new dir */
4231 + dir = par_path.dentry;
4233 + /* give up reference on dir */
4234 + dput(new_path->dentry);
4236 + /* new_dentry already has a reference */
4237 + new_path->dentry = new_dentry;
4239 + ret = vfs_create(dir->d_inode, new_dentry, mode, 1);
4240 + vxdprintk(VXD_CBIT(misc, 2),
4241 + "vfs_create(new): %d", ret);
4242 + if (ret == -EEXIST) {
4243 + path_put(&par_path);
4244 + inode_unlock(dir->d_inode);
4245 + mnt_drop_write(new_path->mnt);
4246 + path_put(new_path);
4247 + new_dentry = NULL;
4251 + goto out_unlock_new;
4253 + /* the old file went away */
4255 + if ((redo = d_unhashed(old_dentry)))
4256 + goto out_unlock_new;
4258 + /* doesn't change refs for old_path */
4259 + old_file = dentry_open(&old_path, O_RDONLY, current_cred());
4260 + vxdprintk(VXD_CBIT(misc, 2),
4261 + "dentry_open(old): %p", old_file);
4262 + if (IS_ERR(old_file)) {
4263 + ret = PTR_ERR(old_file);
4264 + goto out_unlock_new;
4267 + /* doesn't change refs for new_path */
4268 + new_file = dentry_open(new_path, O_WRONLY, current_cred());
4269 + vxdprintk(VXD_CBIT(misc, 2),
4270 + "dentry_open(new): %p", new_file);
4271 + if (IS_ERR(new_file)) {
4272 + ret = PTR_ERR(new_file);
4273 + goto out_fput_old;
4276 + /* unlock the inode from filename_create() */
4277 + inode_unlock(dir->d_inode);
4279 + /* drop write access to mnt */
4280 + mnt_drop_write(new_path->mnt);
4284 + size = i_size_read(old_file->f_path.dentry->d_inode);
4285 + ret = do_cow_splice(old_file, new_file, size);
4286 + vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4288 + goto out_fput_both;
4289 + } else if (ret < size) {
4291 + goto out_fput_both;
4293 + struct inode *old_inode = old_dentry->d_inode;
4294 + struct inode *new_inode = new_dentry->d_inode;
4295 + struct iattr attr = {
4296 + .ia_uid = old_inode->i_uid,
4297 + .ia_gid = old_inode->i_gid,
4298 + .ia_valid = ATTR_UID | ATTR_GID
4301 + setattr_copy(new_inode, &attr);
4302 + mark_inode_dirty(new_inode);
4305 + /* lock rename mutex */
4306 + mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
4308 + /* drop out late */
4310 + if ((redo = d_unhashed(old_dentry)))
4313 + vxdprintk(VXD_CBIT(misc, 2),
4314 + "vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
4315 + new_dentry->d_name.len, new_dentry->d_name.name,
4316 + new_dentry->d_name.len,
4317 + old_dentry->d_name.len, old_dentry->d_name.name,
4318 + old_dentry->d_name.len);
4319 + ret = vfs_rename(par_path.dentry->d_inode, new_dentry,
4320 + old_dentry->d_parent->d_inode, old_dentry, NULL, 0);
4321 + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
4324 + mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
4327 + vxdprintk(VXD_CBIT(misc, 3),
4328 + "fput(new_file=%p[#%ld])", new_file,
4329 + atomic_long_read(&new_file->f_count));
4333 + vxdprintk(VXD_CBIT(misc, 3),
4334 + "fput(old_file=%p[#%ld])", old_file,
4335 + atomic_long_read(&old_file->f_count));
4339 + /* drop references from par_path */
4340 + path_put(&par_path);
4343 + /* unlock the inode from filename_create() */
4344 + inode_unlock(dir->d_inode);
4346 + /* drop write access to mnt */
4347 + mnt_drop_write(new_path->mnt);
4353 + /* error path cleanup */
4354 + vfs_unlink(dir->d_inode, new_dentry, NULL);
4358 + goto out_rel_both;
4360 + /* lookup dentry once again
4361 + old_path will be freed as old_path in out_rel_old */
4362 + ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
4364 + goto out_rel_both;
4366 + /* drop reference on new_dentry */
4368 + new_dentry = old_path.dentry;
4370 + vxdprintk(VXD_CBIT(misc, 2),
4371 + "do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
4373 + new_dentry->d_name.len, new_dentry->d_name.name,
4374 + new_dentry->d_name.len);
4377 + dump_path("put (new)", new_path);
4379 + path_put(new_path);
4381 + dump_path("put (old)", &old_path);
4382 + path_put(&old_path);
4388 + new_dentry = ERR_PTR(ret);
4390 + // if (!IS_ERR(filename))
4391 + // putname(filename);
4392 + vxdprintk(VXD_CBIT(misc, 3),
4393 + "cow_break_link returning with %p", new_dentry);
4394 + return new_dentry;
4399 +int vx_info_mnt_namespace(struct mnt_namespace *ns, char *buffer)
4402 + struct vfsmount *vmnt;
4403 + char *pstr, *root;
4406 + pstr = kmalloc(PATH_MAX, GFP_KERNEL);
4410 + vmnt = &ns->root->mnt;
4412 + path.dentry = vmnt->mnt_root;
4413 + root = d_path(&path, pstr, PATH_MAX - 2);
4414 + length = sprintf(buffer + length,
4415 + "Namespace:\t%p [#%u]\n"
4416 + "RootPath:\t%s\n",
4417 + ns, atomic_read(&ns->count),
4423 +EXPORT_SYMBOL(vx_info_mnt_namespace);
4425 /* get the link contents into pagecache */
4426 const char *page_get_link(struct dentry *dentry, struct inode *inode,
4427 struct delayed_call *callback)
4428 diff -NurpP --minimal linux-4.9.217/fs/namespace.c linux-4.9.217-vs2.3.9.12/fs/namespace.c
4429 --- linux-4.9.217/fs/namespace.c 2020-03-27 00:51:21.950476834 +0000
4430 +++ linux-4.9.217-vs2.3.9.12/fs/namespace.c 2019-02-22 08:37:55.023061290 +0000
4432 #include <linux/magic.h>
4433 #include <linux/bootmem.h>
4434 #include <linux/task_work.h>
4435 +#include <linux/vs_base.h>
4436 +#include <linux/vs_context.h>
4437 +#include <linux/vs_tag.h>
4438 +#include <linux/vserver/space.h>
4439 +#include <linux/vserver/global.h>
4441 #include "internal.h"
4443 @@ -980,6 +985,10 @@ vfs_kern_mount(struct file_system_type *
4445 return ERR_PTR(-ENODEV);
4447 + if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
4448 + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
4449 + return ERR_PTR(-EPERM);
4451 mnt = alloc_vfsmnt(name);
4453 return ERR_PTR(-ENOMEM);
4454 @@ -1071,6 +1080,7 @@ static struct mount *clone_mnt(struct mo
4455 mnt->mnt.mnt_root = dget(root);
4456 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
4457 mnt->mnt_parent = mnt;
4458 + mnt->mnt_tag = old->mnt_tag;
4460 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
4461 unlock_mount_hash();
4462 @@ -1666,7 +1676,8 @@ out_unlock:
4464 static inline bool may_mount(void)
4466 - return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
4467 + return vx_ns_capable(current->nsproxy->mnt_ns->user_ns,
4468 + CAP_SYS_ADMIN, VXC_SECURE_MOUNT);
4471 static inline bool may_mandlock(void)
4472 @@ -2181,6 +2192,7 @@ static int do_change_type(struct path *p
4476 + // mnt->mnt_flags = mnt_flags;
4479 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
4480 @@ -2209,12 +2221,14 @@ static bool has_locked_children(struct m
4481 * do loopback mount.
4483 static int do_loopback(struct path *path, const char *old_name,
4485 + vtag_t tag, unsigned long flags, int mnt_flags)
4487 struct path old_path;
4488 struct mount *mnt = NULL, *old, *parent;
4489 struct mountpoint *mp;
4490 + int recurse = flags & MS_REC;
4493 if (!old_name || !*old_name)
4495 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
4496 @@ -2294,7 +2308,7 @@ static int change_mount_flags(struct vfs
4497 * on it - tough luck.
4499 static int do_remount(struct path *path, int flags, int mnt_flags,
4501 + void *data, vxid_t xid)
4504 struct super_block *sb = path->mnt->mnt_sb;
4505 @@ -2779,6 +2793,7 @@ long do_mount(const char *dev_name, cons
4512 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
4513 @@ -2806,6 +2821,12 @@ long do_mount(const char *dev_name, cons
4514 if (!(flags & MS_NOATIME))
4515 mnt_flags |= MNT_RELATIME;
4517 + if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
4518 + /* FIXME: bind and re-mounts get the tag flag? */
4519 + if (flags & (MS_BIND|MS_REMOUNT))
4520 + flags |= MS_TAGID;
4523 /* Separate the per-mountpoint flags */
4524 if (flags & MS_NOSUID)
4525 mnt_flags |= MNT_NOSUID;
4526 @@ -2830,15 +2851,18 @@ long do_mount(const char *dev_name, cons
4527 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
4530 + if (!vx_capable(CAP_SYS_ADMIN, VXC_DEV_MOUNT))
4531 + mnt_flags |= MNT_NODEV;
4533 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
4534 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
4535 MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
4537 if (flags & MS_REMOUNT)
4538 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
4541 else if (flags & MS_BIND)
4542 - retval = do_loopback(&path, dev_name, flags & MS_REC);
4543 + retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
4544 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
4545 retval = do_change_type(&path, flags);
4546 else if (flags & MS_MOVE)
4547 @@ -2979,6 +3003,7 @@ struct mnt_namespace *copy_mnt_ns(unsign
4548 p = next_mnt(p, old);
4551 + atomic_inc(&vs_global_mnt_ns);
4555 @@ -3154,9 +3179,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
4556 new_mnt = real_mount(new.mnt);
4557 root_mnt = real_mount(root.mnt);
4558 old_mnt = real_mount(old.mnt);
4559 - if (IS_MNT_SHARED(old_mnt) ||
4560 + if ((IS_MNT_SHARED(old_mnt) ||
4561 IS_MNT_SHARED(new_mnt->mnt_parent) ||
4562 - IS_MNT_SHARED(root_mnt->mnt_parent))
4563 + IS_MNT_SHARED(root_mnt->mnt_parent)) &&
4564 + !vx_flags(VXF_STATE_SETUP, 0))
4566 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
4568 @@ -3294,6 +3320,7 @@ void put_mnt_ns(struct mnt_namespace *ns
4569 if (!atomic_dec_and_test(&ns->count))
4571 drop_collected_mounts(&ns->root->mnt);
4572 + atomic_dec(&vs_global_mnt_ns);
4576 diff -NurpP --minimal linux-4.9.217/fs/nfs/client.c linux-4.9.217-vs2.3.9.12/fs/nfs/client.c
4577 --- linux-4.9.217/fs/nfs/client.c 2020-03-27 00:51:22.630466112 +0000
4578 +++ linux-4.9.217-vs2.3.9.12/fs/nfs/client.c 2019-10-05 14:58:45.410311711 +0000
4579 @@ -586,6 +586,9 @@ int nfs_init_server_rpcclient(struct nfs
4580 if (server->flags & NFS_MOUNT_SOFT)
4581 server->client->cl_softrtry = 1;
4583 + server->client->cl_tag = 0;
4584 + if (server->flags & NFS_MOUNT_TAGGED)
4585 + server->client->cl_tag = 1;
4588 EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
4589 @@ -762,6 +765,10 @@ static void nfs_server_set_fsinfo(struct
4590 server->acdirmin = server->acdirmax = 0;
4593 + /* FIXME: needs fsinfo
4594 + if (server->flags & NFS_MOUNT_TAGGED)
4595 + sb->s_flags |= MS_TAGGED; */
4597 server->maxfilesize = fsinfo->maxfilesize;
4599 server->time_delta = fsinfo->time_delta;
4600 diff -NurpP --minimal linux-4.9.217/fs/nfs/dir.c linux-4.9.217-vs2.3.9.12/fs/nfs/dir.c
4601 --- linux-4.9.217/fs/nfs/dir.c 2020-03-27 00:51:22.830462961 +0000
4602 +++ linux-4.9.217-vs2.3.9.12/fs/nfs/dir.c 2020-04-01 09:40:28.555466454 +0000
4604 #include <linux/sched.h>
4605 #include <linux/kmemleak.h>
4606 #include <linux/xattr.h>
4607 +#include <linux/vs_tag.h>
4609 #include "delegation.h"
4611 @@ -1428,6 +1429,7 @@ struct dentry *nfs_lookup(struct inode *
4612 /* Success: notify readdir to use READDIRPLUS */
4613 nfs_advise_use_readdirplus(dir);
4615 + dx_propagate_tag(nd, inode);
4617 res = d_splice_alias(inode, dentry);
4619 diff -NurpP --minimal linux-4.9.217/fs/nfs/inode.c linux-4.9.217-vs2.3.9.12/fs/nfs/inode.c
4620 --- linux-4.9.217/fs/nfs/inode.c 2020-03-27 00:51:22.940461230 +0000
4621 +++ linux-4.9.217-vs2.3.9.12/fs/nfs/inode.c 2019-10-05 14:58:45.430311390 +0000
4623 #include <linux/slab.h>
4624 #include <linux/compat.h>
4625 #include <linux/freezer.h>
4626 +#include <linux/vs_tag.h>
4628 #include <asm/uaccess.h>
4630 @@ -383,6 +384,8 @@ nfs_fhget(struct super_block *sb, struct
4631 if (inode->i_state & I_NEW) {
4632 struct nfs_inode *nfsi = NFS_I(inode);
4633 unsigned long now = jiffies;
4637 /* We set i_ino for the few things that still rely on it,
4638 * such as stat(2) */
4639 @@ -427,8 +430,8 @@ nfs_fhget(struct super_block *sb, struct
4640 inode->i_version = 0;
4643 - inode->i_uid = make_kuid(&init_user_ns, -2);
4644 - inode->i_gid = make_kgid(&init_user_ns, -2);
4645 + kuid = make_kuid(&init_user_ns, -2);
4646 + kgid = make_kgid(&init_user_ns, -2);
4647 inode->i_blocks = 0;
4648 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
4650 @@ -463,11 +466,11 @@ nfs_fhget(struct super_block *sb, struct
4651 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
4652 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
4653 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
4654 - inode->i_uid = fattr->uid;
4655 + kuid = fattr->uid;
4656 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
4657 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
4658 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
4659 - inode->i_gid = fattr->gid;
4660 + kgid = fattr->gid;
4661 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
4662 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
4663 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
4664 @@ -478,6 +481,10 @@ nfs_fhget(struct super_block *sb, struct
4666 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
4668 + inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4669 + inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4670 + inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, GLOBAL_ROOT_TAG);
4671 + /* maybe fattr->xid someday */
4673 nfs_setsecurity(inode, fattr, label);
4675 @@ -619,6 +626,8 @@ void nfs_setattr_update_inode(struct ino
4676 inode->i_uid = attr->ia_uid;
4677 if ((attr->ia_valid & ATTR_GID) != 0)
4678 inode->i_gid = attr->ia_gid;
4679 + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
4680 + inode->i_tag = attr->ia_tag;
4681 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
4682 | NFS_INO_INVALID_ACL);
4684 @@ -1273,7 +1282,9 @@ static int nfs_check_inode_attributes(st
4685 struct nfs_inode *nfsi = NFS_I(inode);
4686 loff_t cur_size, new_isize;
4687 unsigned long invalid = 0;
4693 if (nfs_have_delegated_attributes(inode))
4695 @@ -1302,13 +1313,18 @@ static int nfs_check_inode_attributes(st
4699 + kuid = INOTAG_KUID(DX_TAG(inode), fattr->uid, fattr->gid);
4700 + kgid = INOTAG_KGID(DX_TAG(inode), fattr->uid, fattr->gid);
4701 + ktag = INOTAG_KTAG(DX_TAG(inode), fattr->uid, fattr->gid, GLOBAL_ROOT_TAG);
4703 /* Have any file permissions changed? */
4704 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
4705 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4706 - if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
4707 + if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, kuid))
4708 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4709 - if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
4710 + if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, kgid))
4711 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4712 + /* maybe check for tag too? */
4714 /* Has the link count changed? */
4715 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
4716 @@ -1667,6 +1683,9 @@ static int nfs_update_inode(struct inode
4717 unsigned long save_cache_validity;
4718 bool have_writers = nfs_file_has_buffered_writers(nfsi);
4719 bool cache_revalidated = true;
4724 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
4725 __func__, inode->i_sb->s_id, inode->i_ino,
4726 @@ -1786,6 +1805,9 @@ static int nfs_update_inode(struct inode
4727 cache_revalidated = false;
4730 + kuid = TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4731 + kgid = TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4732 + ktag = TAGINO_KTAG(DX_TAG(inode), inode->i_tag);
4734 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
4735 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
4736 @@ -1840,6 +1862,10 @@ static int nfs_update_inode(struct inode
4737 cache_revalidated = false;
4740 + inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4741 + inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4742 + inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
4744 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
4745 if (inode->i_nlink != fattr->nlink) {
4746 invalid |= NFS_INO_INVALID_ATTR;
4747 diff -NurpP --minimal linux-4.9.217/fs/nfs/nfs3xdr.c linux-4.9.217-vs2.3.9.12/fs/nfs/nfs3xdr.c
4748 --- linux-4.9.217/fs/nfs/nfs3xdr.c 2016-12-11 19:17:54.000000000 +0000
4749 +++ linux-4.9.217-vs2.3.9.12/fs/nfs/nfs3xdr.c 2018-10-20 04:58:14.000000000 +0000
4751 #include <linux/nfs3.h>
4752 #include <linux/nfs_fs.h>
4753 #include <linux/nfsacl.h>
4754 +#include <linux/vs_tag.h>
4755 #include "internal.h"
4757 #define NFSDBG_FACILITY NFSDBG_XDR
4758 @@ -558,7 +559,8 @@ static __be32 *xdr_decode_nfstime3(__be3
4762 -static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
4763 +static void encode_sattr3(struct xdr_stream *xdr,
4764 + const struct iattr *attr, int tag)
4768 @@ -590,15 +592,19 @@ static void encode_sattr3(struct xdr_str
4772 - if (attr->ia_valid & ATTR_UID) {
4773 + if (attr->ia_valid & ATTR_UID ||
4774 + (tag && (attr->ia_valid & ATTR_TAG))) {
4776 - *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
4777 + *p++ = cpu_to_be32(from_kuid(&init_user_ns,
4778 + TAGINO_KUID(tag, attr->ia_uid, attr->ia_tag)));
4782 - if (attr->ia_valid & ATTR_GID) {
4783 + if (attr->ia_valid & ATTR_GID ||
4784 + (tag && (attr->ia_valid & ATTR_TAG))) {
4786 - *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
4787 + *p++ = cpu_to_be32(from_kgid(&init_user_ns,
4788 + TAGINO_KGID(tag, attr->ia_gid, attr->ia_tag)));
4792 @@ -887,7 +893,7 @@ static void nfs3_xdr_enc_setattr3args(st
4793 const struct nfs3_sattrargs *args)
4795 encode_nfs_fh3(xdr, args->fh);
4796 - encode_sattr3(xdr, args->sattr);
4797 + encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4798 encode_sattrguard3(xdr, args);
4801 @@ -1037,13 +1043,13 @@ static void nfs3_xdr_enc_write3args(stru
4804 static void encode_createhow3(struct xdr_stream *xdr,
4805 - const struct nfs3_createargs *args)
4806 + const struct nfs3_createargs *args, int tag)
4808 encode_uint32(xdr, args->createmode);
4809 switch (args->createmode) {
4810 case NFS3_CREATE_UNCHECKED:
4811 case NFS3_CREATE_GUARDED:
4812 - encode_sattr3(xdr, args->sattr);
4813 + encode_sattr3(xdr, args->sattr, tag);
4815 case NFS3_CREATE_EXCLUSIVE:
4816 encode_createverf3(xdr, args->verifier);
4817 @@ -1058,7 +1064,7 @@ static void nfs3_xdr_enc_create3args(str
4818 const struct nfs3_createargs *args)
4820 encode_diropargs3(xdr, args->fh, args->name, args->len);
4821 - encode_createhow3(xdr, args);
4822 + encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
4826 @@ -1074,7 +1080,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
4827 const struct nfs3_mkdirargs *args)
4829 encode_diropargs3(xdr, args->fh, args->name, args->len);
4830 - encode_sattr3(xdr, args->sattr);
4831 + encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4835 @@ -1091,9 +1097,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
4838 static void encode_symlinkdata3(struct xdr_stream *xdr,
4839 - const struct nfs3_symlinkargs *args)
4840 + const struct nfs3_symlinkargs *args, int tag)
4842 - encode_sattr3(xdr, args->sattr);
4843 + encode_sattr3(xdr, args->sattr, tag);
4844 encode_nfspath3(xdr, args->pages, args->pathlen);
4847 @@ -1102,7 +1108,7 @@ static void nfs3_xdr_enc_symlink3args(st
4848 const struct nfs3_symlinkargs *args)
4850 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
4851 - encode_symlinkdata3(xdr, args);
4852 + encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
4853 xdr->buf->flags |= XDRBUF_WRITE;
4856 @@ -1131,24 +1137,24 @@ static void nfs3_xdr_enc_symlink3args(st
4859 static void encode_devicedata3(struct xdr_stream *xdr,
4860 - const struct nfs3_mknodargs *args)
4861 + const struct nfs3_mknodargs *args, int tag)
4863 - encode_sattr3(xdr, args->sattr);
4864 + encode_sattr3(xdr, args->sattr, tag);
4865 encode_specdata3(xdr, args->rdev);
4868 static void encode_mknoddata3(struct xdr_stream *xdr,
4869 - const struct nfs3_mknodargs *args)
4870 + const struct nfs3_mknodargs *args, int tag)
4872 encode_ftype3(xdr, args->type);
4873 switch (args->type) {
4876 - encode_devicedata3(xdr, args);
4877 + encode_devicedata3(xdr, args, tag);
4881 - encode_sattr3(xdr, args->sattr);
4882 + encode_sattr3(xdr, args->sattr, tag);
4886 @@ -1163,7 +1169,7 @@ static void nfs3_xdr_enc_mknod3args(stru
4887 const struct nfs3_mknodargs *args)
4889 encode_diropargs3(xdr, args->fh, args->name, args->len);
4890 - encode_mknoddata3(xdr, args);
4891 + encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
4895 diff -NurpP --minimal linux-4.9.217/fs/nfs/super.c linux-4.9.217-vs2.3.9.12/fs/nfs/super.c
4896 --- linux-4.9.217/fs/nfs/super.c 2020-03-27 00:51:23.470452877 +0000
4897 +++ linux-4.9.217-vs2.3.9.12/fs/nfs/super.c 2020-04-01 09:40:28.555466454 +0000
4899 #include <linux/parser.h>
4900 #include <linux/nsproxy.h>
4901 #include <linux/rcupdate.h>
4902 +#include <linux/vs_tag.h>
4904 #include <asm/uaccess.h>
4906 @@ -102,6 +103,7 @@ enum {
4912 /* Mount options that take string arguments */
4914 @@ -114,6 +116,9 @@ enum {
4915 /* Special mount options */
4916 Opt_userspace, Opt_deprecated, Opt_sloppy,
4918 + /* Linux-VServer tagging options */
4919 + Opt_tag, Opt_notag,
4924 @@ -183,6 +188,10 @@ static const match_table_t nfs_mount_opt
4925 { Opt_fscache_uniq, "fsc=%s" },
4926 { Opt_local_lock, "local_lock=%s" },
4928 + { Opt_tag, "tag" },
4929 + { Opt_notag, "notag" },
4930 + { Opt_tagid, "tagid=%u" },
4932 /* The following needs to be listed after all other options */
4933 { Opt_nfsvers, "v%s" },
4935 @@ -644,6 +653,7 @@ static void nfs_show_mount_options(struc
4936 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
4937 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
4938 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
4939 + { NFS_MOUNT_TAGGED, ",tag", "" },
4942 const struct proc_nfs_info *nfs_infop;
4943 @@ -1341,6 +1351,14 @@ static int nfs_parse_mount_options(char
4944 case Opt_nomigration:
4945 mnt->options &= ~NFS_OPTION_MIGRATION;
4947 +#ifndef CONFIG_TAGGING_NONE
4949 + mnt->flags |= NFS_MOUNT_TAGGED;
4952 + mnt->flags &= ~NFS_MOUNT_TAGGED;
4957 * options that take numeric values
4958 @@ -1427,6 +1445,12 @@ static int nfs_parse_mount_options(char
4959 goto out_invalid_value;
4960 mnt->minorversion = option;
4962 +#ifdef CONFIG_PROPAGATE
4965 + nfs_data.flags |= NFS_MOUNT_TAGGED;
4970 * options that take text values
4971 diff -NurpP --minimal linux-4.9.217/fs/nfsd/auth.c linux-4.9.217-vs2.3.9.12/fs/nfsd/auth.c
4972 --- linux-4.9.217/fs/nfsd/auth.c 2020-03-27 00:51:23.680449564 +0000
4973 +++ linux-4.9.217-vs2.3.9.12/fs/nfsd/auth.c 2018-10-20 04:58:14.000000000 +0000
4975 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
4977 #include <linux/sched.h>
4978 +#include <linux/vs_tag.h>
4982 @@ -35,6 +36,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
4984 new->fsuid = rqstp->rq_cred.cr_uid;
4985 new->fsgid = rqstp->rq_cred.cr_gid;
4986 + /* FIXME: this desperately needs a tag :)
4987 + new->xid = (vxid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
4990 rqgi = rqstp->rq_cred.cr_group_info;
4992 diff -NurpP --minimal linux-4.9.217/fs/nfsd/nfs3xdr.c linux-4.9.217-vs2.3.9.12/fs/nfsd/nfs3xdr.c
4993 --- linux-4.9.217/fs/nfsd/nfs3xdr.c 2020-03-27 00:51:23.700449249 +0000
4994 +++ linux-4.9.217-vs2.3.9.12/fs/nfsd/nfs3xdr.c 2019-10-05 14:58:45.440311232 +0000
4997 #include <linux/namei.h>
4998 #include <linux/sunrpc/svc_xprt.h>
4999 +#include <linux/vs_tag.h>
5003 @@ -98,6 +99,8 @@ static __be32 *
5004 decode_sattr3(__be32 *p, struct iattr *iap)
5007 + kuid_t kuid = GLOBAL_ROOT_UID;
5008 + kgid_t kgid = GLOBAL_ROOT_GID;
5012 @@ -106,15 +109,18 @@ decode_sattr3(__be32 *p, struct iattr *i
5013 iap->ia_mode = ntohl(*p++);
5016 - iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
5017 + kuid = make_kuid(&init_user_ns, ntohl(*p++));
5018 if (uid_valid(iap->ia_uid))
5019 iap->ia_valid |= ATTR_UID;
5022 - iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
5023 + kgid = make_kgid(&init_user_ns, ntohl(*p++));
5024 if (gid_valid(iap->ia_gid))
5025 iap->ia_valid |= ATTR_GID;
5027 + iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5028 + iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5029 + iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
5033 @@ -167,8 +173,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
5034 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
5035 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
5036 *p++ = htonl((u32) stat->nlink);
5037 - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5038 - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5039 + *p++ = htonl((u32) from_kuid(&init_user_ns,
5040 + TAGINO_KUID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5041 + stat->uid, stat->tag)));
5042 + *p++ = htonl((u32) from_kgid(&init_user_ns,
5043 + TAGINO_KGID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5044 + stat->gid, stat->tag)));
5045 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5046 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5048 diff -NurpP --minimal linux-4.9.217/fs/nfsd/nfs4xdr.c linux-4.9.217-vs2.3.9.12/fs/nfsd/nfs4xdr.c
5049 --- linux-4.9.217/fs/nfsd/nfs4xdr.c 2020-03-27 00:51:23.950445311 +0000
5050 +++ linux-4.9.217-vs2.3.9.12/fs/nfsd/nfs4xdr.c 2018-10-20 05:55:43.000000000 +0000
5052 #include <linux/utsname.h>
5053 #include <linux/pagemap.h>
5054 #include <linux/sunrpc/svcauth_gss.h>
5055 +#include <linux/vs_tag.h>
5059 @@ -2679,12 +2680,16 @@ out_acl:
5060 *p++ = cpu_to_be32(stat.nlink);
5062 if (bmval1 & FATTR4_WORD1_OWNER) {
5063 - status = nfsd4_encode_user(xdr, rqstp, stat.uid);
5064 + status = nfsd4_encode_user(xdr, rqstp,
5065 + TAGINO_KUID(DX_TAG(dentry->d_inode),
5066 + stat.uid, stat.tag));
5070 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
5071 - status = nfsd4_encode_group(xdr, rqstp, stat.gid);
5072 + status = nfsd4_encode_group(xdr, rqstp,
5073 + TAGINO_KGID(DX_TAG(dentry->d_inode),
5074 + stat.gid, stat.tag));
5078 diff -NurpP --minimal linux-4.9.217/fs/nfsd/nfsxdr.c linux-4.9.217-vs2.3.9.12/fs/nfsd/nfsxdr.c
5079 --- linux-4.9.217/fs/nfsd/nfsxdr.c 2020-03-27 00:51:24.030444047 +0000
5080 +++ linux-4.9.217-vs2.3.9.12/fs/nfsd/nfsxdr.c 2018-10-20 04:58:14.000000000 +0000
5085 +#include <linux/vs_tag.h>
5087 #define NFSDDBG_FACILITY NFSDDBG_XDR
5089 @@ -89,6 +90,8 @@ static __be32 *
5090 decode_sattr(__be32 *p, struct iattr *iap)
5093 + kuid_t kuid = GLOBAL_ROOT_UID;
5094 + kgid_t kgid = GLOBAL_ROOT_GID;
5098 @@ -101,15 +104,18 @@ decode_sattr(__be32 *p, struct iattr *ia
5101 if ((tmp = ntohl(*p++)) != (u32)-1) {
5102 - iap->ia_uid = make_kuid(&init_user_ns, tmp);
5103 + kuid = make_kuid(&init_user_ns, tmp);
5104 if (uid_valid(iap->ia_uid))
5105 iap->ia_valid |= ATTR_UID;
5107 if ((tmp = ntohl(*p++)) != (u32)-1) {
5108 - iap->ia_gid = make_kgid(&init_user_ns, tmp);
5109 + kgid = make_kgid(&init_user_ns, tmp);
5110 if (gid_valid(iap->ia_gid))
5111 iap->ia_valid |= ATTR_GID;
5113 + iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5114 + iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5115 + iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
5116 if ((tmp = ntohl(*p++)) != (u32)-1) {
5117 iap->ia_valid |= ATTR_SIZE;
5119 @@ -154,8 +160,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
5120 *p++ = htonl(nfs_ftypes[type >> 12]);
5121 *p++ = htonl((u32) stat->mode);
5122 *p++ = htonl((u32) stat->nlink);
5123 - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5124 - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5125 + *p++ = htonl((u32) from_kuid(&init_user_ns,
5126 + TAGINO_KUID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
5127 + *p++ = htonl((u32) from_kgid(&init_user_ns,
5128 + TAGINO_KGID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
5130 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5131 *p++ = htonl(NFS_MAXPATHLEN);
5132 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/dlmglue.c linux-4.9.217-vs2.3.9.12/fs/ocfs2/dlmglue.c
5133 --- linux-4.9.217/fs/ocfs2/dlmglue.c 2020-03-27 00:51:25.080427501 +0000
5134 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/dlmglue.c 2019-12-25 15:37:51.918430506 +0000
5135 @@ -2120,6 +2120,7 @@ static void __ocfs2_stuff_meta_lvb(struc
5136 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
5137 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
5138 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
5139 + lvb->lvb_itag = cpu_to_be16(i_tag_read(inode));
5140 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
5141 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
5142 lvb->lvb_iatime_packed =
5143 @@ -2170,6 +2171,7 @@ static void ocfs2_refresh_inode_from_lvb
5145 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
5146 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
5147 + i_tag_write(inode, be16_to_cpu(lvb->lvb_itag));
5148 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
5149 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
5150 ocfs2_unpack_timespec(&inode->i_atime,
5151 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/dlmglue.h linux-4.9.217-vs2.3.9.12/fs/ocfs2/dlmglue.h
5152 --- linux-4.9.217/fs/ocfs2/dlmglue.h 2020-03-27 00:51:25.080427501 +0000
5153 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/dlmglue.h 2018-10-20 04:58:14.000000000 +0000
5154 @@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5157 __be32 lvb_igeneration;
5158 - __be32 lvb_reserved2;
5160 + __be16 lvb_reserved2;
5163 #define OCFS2_QINFO_LVB_VERSION 1
5164 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/file.c linux-4.9.217-vs2.3.9.12/fs/ocfs2/file.c
5165 --- linux-4.9.217/fs/ocfs2/file.c 2020-03-27 00:51:25.110427029 +0000
5166 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/file.c 2018-10-20 04:58:14.000000000 +0000
5167 @@ -1151,7 +1151,7 @@ int ocfs2_setattr(struct dentry *dentry,
5168 attr->ia_valid &= ~ATTR_SIZE;
5170 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5171 - | ATTR_GID | ATTR_UID | ATTR_MODE)
5172 + | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
5173 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
5176 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/inode.c linux-4.9.217-vs2.3.9.12/fs/ocfs2/inode.c
5177 --- linux-4.9.217/fs/ocfs2/inode.c 2016-12-11 19:17:54.000000000 +0000
5178 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/inode.c 2018-10-20 04:58:14.000000000 +0000
5180 #include <linux/highmem.h>
5181 #include <linux/pagemap.h>
5182 #include <linux/quotaops.h>
5183 +#include <linux/vs_tag.h>
5185 #include <asm/byteorder.h>
5187 @@ -87,11 +88,13 @@ void ocfs2_set_inode_flags(struct inode
5189 unsigned int flags = OCFS2_I(inode)->ip_attr;
5191 - inode->i_flags &= ~(S_IMMUTABLE |
5192 + inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5193 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
5195 if (flags & OCFS2_IMMUTABLE_FL)
5196 inode->i_flags |= S_IMMUTABLE;
5197 + if (flags & OCFS2_IXUNLINK_FL)
5198 + inode->i_flags |= S_IXUNLINK;
5200 if (flags & OCFS2_SYNC_FL)
5201 inode->i_flags |= S_SYNC;
5202 @@ -101,25 +104,44 @@ void ocfs2_set_inode_flags(struct inode
5203 inode->i_flags |= S_NOATIME;
5204 if (flags & OCFS2_DIRSYNC_FL)
5205 inode->i_flags |= S_DIRSYNC;
5207 + inode->i_vflags &= ~(V_BARRIER | V_COW);
5209 + if (flags & OCFS2_BARRIER_FL)
5210 + inode->i_vflags |= V_BARRIER;
5211 + if (flags & OCFS2_COW_FL)
5212 + inode->i_vflags |= V_COW;
5215 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5216 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5218 unsigned int flags = oi->vfs_inode.i_flags;
5219 + unsigned int vflags = oi->vfs_inode.i_vflags;
5221 + oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5222 + OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5223 + OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5224 + OCFS2_BARRIER_FL | OCFS2_COW_FL);
5226 + if (flags & S_IMMUTABLE)
5227 + oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5228 + if (flags & S_IXUNLINK)
5229 + oi->ip_attr |= OCFS2_IXUNLINK_FL;
5231 - oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
5232 - OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
5234 oi->ip_attr |= OCFS2_SYNC_FL;
5235 if (flags & S_APPEND)
5236 oi->ip_attr |= OCFS2_APPEND_FL;
5237 - if (flags & S_IMMUTABLE)
5238 - oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5239 if (flags & S_NOATIME)
5240 oi->ip_attr |= OCFS2_NOATIME_FL;
5241 if (flags & S_DIRSYNC)
5242 oi->ip_attr |= OCFS2_DIRSYNC_FL;
5244 + if (vflags & V_BARRIER)
5245 + oi->ip_attr |= OCFS2_BARRIER_FL;
5246 + if (vflags & V_COW)
5247 + oi->ip_attr |= OCFS2_COW_FL;
5250 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
5251 @@ -278,6 +300,8 @@ void ocfs2_populate_inode(struct inode *
5252 struct super_block *sb;
5253 struct ocfs2_super *osb;
5260 @@ -306,8 +330,12 @@ void ocfs2_populate_inode(struct inode *
5261 inode->i_generation = le32_to_cpu(fe->i_generation);
5262 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
5263 inode->i_mode = le16_to_cpu(fe->i_mode);
5264 - i_uid_write(inode, le32_to_cpu(fe->i_uid));
5265 - i_gid_write(inode, le32_to_cpu(fe->i_gid));
5266 + uid = le32_to_cpu(fe->i_uid);
5267 + gid = le32_to_cpu(fe->i_gid);
5268 + i_uid_write(inode, INOTAG_UID(DX_TAG(inode), uid, gid));
5269 + i_gid_write(inode, INOTAG_GID(DX_TAG(inode), uid, gid));
5270 + i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), uid, gid,
5271 + /* le16_to_cpu(raw_inode->i_raw_tag) */ 0));
5273 /* Fast symlinks will have i_size but no allocated clusters. */
5274 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
5275 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/inode.h linux-4.9.217-vs2.3.9.12/fs/ocfs2/inode.h
5276 --- linux-4.9.217/fs/ocfs2/inode.h 2016-12-11 19:17:54.000000000 +0000
5277 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/inode.h 2018-10-20 04:58:14.000000000 +0000
5278 @@ -155,6 +155,7 @@ int ocfs2_mark_inode_dirty(handle_t *han
5280 void ocfs2_set_inode_flags(struct inode *inode);
5281 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
5282 +int ocfs2_sync_flags(struct inode *inode, int, int);
5284 static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
5286 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/ioctl.c linux-4.9.217-vs2.3.9.12/fs/ocfs2/ioctl.c
5287 --- linux-4.9.217/fs/ocfs2/ioctl.c 2020-03-27 00:51:25.160426241 +0000
5288 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/ioctl.c 2019-12-25 15:37:51.918430506 +0000
5289 @@ -76,7 +76,41 @@ static int ocfs2_get_inode_attr(struct i
5293 -static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5294 +int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
5296 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5297 + struct buffer_head *bh = NULL;
5298 + handle_t *handle = NULL;
5301 + status = ocfs2_inode_lock(inode, &bh, 1);
5303 + mlog_errno(status);
5306 + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5307 + if (IS_ERR(handle)) {
5308 + status = PTR_ERR(handle);
5309 + mlog_errno(status);
5313 + inode->i_flags = flags;
5314 + inode->i_vflags = vflags;
5315 + ocfs2_get_inode_flags(OCFS2_I(inode));
5317 + status = ocfs2_mark_inode_dirty(handle, inode, bh);
5319 + mlog_errno(status);
5321 + ocfs2_commit_trans(osb, handle);
5323 + ocfs2_inode_unlock(inode, 1);
5328 +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5331 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
5332 @@ -116,6 +150,11 @@ static int ocfs2_set_inode_attr(struct i
5336 + if (IS_BARRIER(inode)) {
5337 + vxwprintk_task(1, "messing with the barrier.");
5341 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5342 if (IS_ERR(handle)) {
5343 status = PTR_ERR(handle);
5344 @@ -839,6 +878,7 @@ bail:
5349 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
5351 struct inode *inode = file_inode(filp);
5352 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/namei.c linux-4.9.217-vs2.3.9.12/fs/ocfs2/namei.c
5353 --- linux-4.9.217/fs/ocfs2/namei.c 2016-12-11 19:17:54.000000000 +0000
5354 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/namei.c 2018-10-20 04:58:14.000000000 +0000
5356 #include <linux/slab.h>
5357 #include <linux/highmem.h>
5358 #include <linux/quotaops.h>
5359 +#include <linux/vs_tag.h>
5361 #include <cluster/masklog.h>
5363 @@ -516,6 +517,7 @@ static int __ocfs2_mknod_locked(struct i
5364 struct ocfs2_extent_list *fel;
5366 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5371 @@ -553,8 +555,13 @@ static int __ocfs2_mknod_locked(struct i
5372 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
5373 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
5374 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
5375 - fe->i_uid = cpu_to_le32(i_uid_read(inode));
5376 - fe->i_gid = cpu_to_le32(i_gid_read(inode));
5378 + ktag = make_ktag(&init_user_ns, dx_current_fstag(osb->sb));
5379 + fe->i_uid = cpu_to_le32(from_kuid(&init_user_ns,
5380 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, ktag)));
5381 + fe->i_gid = cpu_to_le32(from_kgid(&init_user_ns,
5382 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, ktag)));
5383 + inode->i_tag = ktag; /* is this correct? */
5384 fe->i_mode = cpu_to_le16(inode->i_mode);
5385 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
5386 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
5387 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/ocfs2_fs.h linux-4.9.217-vs2.3.9.12/fs/ocfs2/ocfs2_fs.h
5388 --- linux-4.9.217/fs/ocfs2/ocfs2_fs.h 2016-12-11 19:17:54.000000000 +0000
5389 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/ocfs2_fs.h 2018-10-20 04:58:14.000000000 +0000
5390 @@ -275,6 +275,11 @@
5391 #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
5392 #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
5394 +#define OCFS2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
5396 +#define OCFS2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
5397 +#define OCFS2_COW_FL FS_COW_FL /* Copy on Write marker */
5399 #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
5400 #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
5402 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/ocfs2.h linux-4.9.217-vs2.3.9.12/fs/ocfs2/ocfs2.h
5403 --- linux-4.9.217/fs/ocfs2/ocfs2.h 2020-03-27 00:51:25.320423716 +0000
5404 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/ocfs2.h 2018-10-20 04:58:14.000000000 +0000
5405 @@ -289,6 +289,7 @@ enum ocfs2_mount_options
5406 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
5407 OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
5408 OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
5409 + OCFS2_MOUNT_TAGGED = 1 << 18, /* use tagging */
5412 #define OCFS2_OSB_SOFT_RO 0x0001
5413 diff -NurpP --minimal linux-4.9.217/fs/ocfs2/super.c linux-4.9.217-vs2.3.9.12/fs/ocfs2/super.c
5414 --- linux-4.9.217/fs/ocfs2/super.c 2020-03-27 00:51:25.350423246 +0000
5415 +++ linux-4.9.217-vs2.3.9.12/fs/ocfs2/super.c 2018-10-20 04:58:14.000000000 +0000
5416 @@ -188,6 +188,7 @@ enum {
5418 Opt_journal_async_commit,
5420 + Opt_tag, Opt_notag, Opt_tagid,
5424 @@ -221,6 +222,9 @@ static const match_table_t tokens = {
5425 {Opt_dir_resv_level, "dir_resv_level=%u"},
5426 {Opt_journal_async_commit, "journal_async_commit"},
5427 {Opt_err_cont, "errors=continue"},
5429 + {Opt_notag, "notag"},
5430 + {Opt_tagid, "tagid=%u"},
5434 @@ -672,6 +676,13 @@ static int ocfs2_remount(struct super_bl
5438 + if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
5439 + (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
5441 + mlog(ML_ERROR, "Cannot change tagging on remount\n");
5445 /* We're going to/from readonly mode. */
5446 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5447 /* Disable quota accounting before remounting RO */
5448 @@ -1161,6 +1172,9 @@ static int ocfs2_fill_super(struct super
5450 ocfs2_complete_mount_recovery(osb);
5452 + if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
5453 + sb->s_flags |= MS_TAGGED;
5455 if (ocfs2_mount_local(osb))
5456 snprintf(nodestr, sizeof(nodestr), "local");
5458 @@ -1480,6 +1494,20 @@ static int ocfs2_parse_options(struct su
5459 case Opt_journal_async_commit:
5460 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
5462 +#ifndef CONFIG_TAGGING_NONE
5464 + mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
5467 + mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
5470 +#ifdef CONFIG_PROPAGATE
5473 + mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
5478 "Unrecognized mount option \"%s\" "
5479 diff -NurpP --minimal linux-4.9.217/fs/open.c linux-4.9.217-vs2.3.9.12/fs/open.c
5480 --- linux-4.9.217/fs/open.c 2020-03-27 00:51:25.400422458 +0000
5481 +++ linux-4.9.217-vs2.3.9.12/fs/open.c 2020-04-01 09:40:28.635465130 +0000
5483 #include <linux/ima.h>
5484 #include <linux/dnotify.h>
5485 #include <linux/compat.h>
5486 +#include <linux/vs_base.h>
5487 +#include <linux/vs_limit.h>
5488 +#include <linux/vs_tag.h>
5489 +#include <linux/vs_cowbl.h>
5490 +#include <linux/vserver/dlimit.h>
5492 #include "internal.h"
5494 @@ -65,12 +70,17 @@ int do_truncate(struct dentry *dentry, l
5498 -long vfs_truncate(const struct path *path, loff_t length)
5499 +long vfs_truncate(struct path *path, loff_t length)
5501 struct inode *inode;
5502 struct dentry *upperdentry;
5505 +#ifdef CONFIG_VSERVER_COWBL
5506 + error = cow_check_and_break(path);
5510 inode = path->dentry->d_inode;
5512 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
5513 @@ -584,6 +594,13 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
5514 unsigned int lookup_flags = LOOKUP_FOLLOW;
5516 error = user_path_at(dfd, filename, lookup_flags, &path);
5517 +#ifdef CONFIG_VSERVER_COWBL
5519 + error = cow_check_and_break(&path);
5525 error = chmod_common(&path, mode);
5527 @@ -618,13 +635,15 @@ retry_deleg:
5528 if (!uid_valid(uid))
5530 newattrs.ia_valid |= ATTR_UID;
5531 - newattrs.ia_uid = uid;
5532 + newattrs.ia_uid = make_kuid(&init_user_ns,
5533 + dx_map_uid(user));
5535 if (group != (gid_t) -1) {
5536 if (!gid_valid(gid))
5538 newattrs.ia_valid |= ATTR_GID;
5539 - newattrs.ia_gid = gid;
5540 + newattrs.ia_gid = make_kgid(&init_user_ns,
5541 + dx_map_gid(group));
5543 if (!S_ISDIR(inode->i_mode))
5544 newattrs.ia_valid |=
5545 @@ -662,6 +681,10 @@ retry:
5546 error = mnt_want_write(path.mnt);
5549 +#ifdef CONFIG_VSERVER_COWBL
5550 + error = cow_check_and_break(&path);
5553 error = chown_common(&path, user, group);
5554 mnt_drop_write(path.mnt);
5556 diff -NurpP --minimal linux-4.9.217/fs/proc/array.c linux-4.9.217-vs2.3.9.12/fs/proc/array.c
5557 --- linux-4.9.217/fs/proc/array.c 2020-03-27 00:51:25.650418515 +0000
5558 +++ linux-4.9.217-vs2.3.9.12/fs/proc/array.c 2019-10-05 14:58:45.660307716 +0000
5560 #include <linux/string_helpers.h>
5561 #include <linux/user_namespace.h>
5562 #include <linux/fs_struct.h>
5563 +#include <linux/vs_context.h>
5564 +#include <linux/vs_network.h>
5566 #include <asm/pgtable.h>
5567 #include <asm/processor.h>
5568 @@ -170,6 +172,9 @@ static inline void task_state(struct seq
5569 ppid = pid_alive(p) ?
5570 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
5572 + if (unlikely(vx_current_initpid(p->pid)))
5575 tracer = ptrace_parent(p);
5577 tpid = task_pid_nr_ns(tracer, ns);
5578 @@ -307,8 +312,8 @@ static inline void task_sig(struct seq_f
5579 render_sigset_t(m, "SigCgt:\t", &caught);
5582 -static void render_cap_t(struct seq_file *m, const char *header,
5584 +void render_cap_t(struct seq_file *m, const char *header,
5585 + struct vx_info *vxi, kernel_cap_t *a)
5589 @@ -335,11 +340,12 @@ static inline void task_cap(struct seq_f
5590 cap_ambient = cred->cap_ambient;
5593 - render_cap_t(m, "CapInh:\t", &cap_inheritable);
5594 - render_cap_t(m, "CapPrm:\t", &cap_permitted);
5595 - render_cap_t(m, "CapEff:\t", &cap_effective);
5596 - render_cap_t(m, "CapBnd:\t", &cap_bset);
5597 - render_cap_t(m, "CapAmb:\t", &cap_ambient);
5598 + /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
5599 + render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
5600 + render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
5601 + render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
5602 + render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
5603 + render_cap_t(m, "CapAmb:\t", p->vx_info, &cap_ambient);
5606 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
5607 @@ -391,6 +397,43 @@ static void task_cpus_allowed(struct seq
5608 cpumask_pr_args(&task->cpus_allowed));
5611 +int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5612 + struct pid *pid, struct task_struct *task)
5614 + seq_printf(m, "Proxy:\t%p(%c)\n"
5622 + (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
5623 + atomic_read(&task->nsproxy->count),
5624 + task->nsproxy->uts_ns,
5625 + (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
5626 + task->nsproxy->ipc_ns,
5627 + (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
5628 + task->nsproxy->mnt_ns,
5629 + (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
5630 + task->nsproxy->pid_ns_for_children,
5631 + (task->nsproxy->pid_ns_for_children ==
5632 + init_task.nsproxy->pid_ns_for_children ? 'I' : '-'),
5633 + task->nsproxy->net_ns,
5634 + (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
5638 +void task_vs_id(struct seq_file *m, struct task_struct *task)
5640 + if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
5643 + seq_printf(m, "VxID:\t%d\n", vx_task_xid(task));
5644 + seq_printf(m, "NxID:\t%d\n", nx_task_nid(task));
5648 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
5649 struct pid *pid, struct task_struct *task)
5651 @@ -408,6 +451,7 @@ int proc_pid_status(struct seq_file *m,
5652 task_seccomp(m, task);
5653 task_cpus_allowed(m, task);
5654 cpuset_task_status_allowed(m, task);
5655 + task_vs_id(m, task);
5656 task_context_switch_counts(m, task);
5659 @@ -523,6 +567,17 @@ static int do_task_stat(struct seq_file
5660 /* convert nsec -> ticks */
5661 start_time = nsec_to_clock_t(task->real_start_time);
5663 + /* fixup start time for virt uptime */
5664 + if (vx_flags(VXF_VIRT_UPTIME, 0)) {
5665 + unsigned long long bias =
5666 + current->vx_info->cvirt.bias_clock;
5668 + if (start_time > bias)
5669 + start_time -= bias;
5674 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
5675 seq_put_decimal_ll(m, " ", ppid);
5676 seq_put_decimal_ll(m, " ", pgid);
5677 diff -NurpP --minimal linux-4.9.217/fs/proc/base.c linux-4.9.217-vs2.3.9.12/fs/proc/base.c
5678 --- linux-4.9.217/fs/proc/base.c 2020-03-27 00:51:25.650418515 +0000
5679 +++ linux-4.9.217-vs2.3.9.12/fs/proc/base.c 2019-10-05 14:58:45.660307716 +0000
5681 #include <linux/slab.h>
5682 #include <linux/flex_array.h>
5683 #include <linux/posix-timers.h>
5684 +#include <linux/vs_context.h>
5685 +#include <linux/vs_network.h>
5686 #ifdef CONFIG_HARDWALL
5687 #include <asm/hardwall.h>
5689 @@ -1079,10 +1081,15 @@ static int __set_oom_adj(struct file *fi
5690 mutex_lock(&oom_adj_mutex);
5692 if (oom_adj < task->signal->oom_score_adj &&
5693 - !capable(CAP_SYS_RESOURCE)) {
5694 + !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
5699 + /* prevent guest processes from circumventing the oom killer */
5700 + if (vx_current_xid() && (oom_adj == OOM_DISABLE))
5701 + oom_adj = OOM_ADJUST_MIN;
5704 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
5705 * /proc/pid/oom_score_adj instead.
5706 @@ -1708,6 +1715,8 @@ struct inode *proc_pid_make_inode(struct
5707 inode->i_gid = cred->egid;
5710 + /* procfs is xid tagged */
5711 + i_tag_write(inode, (vtag_t)vx_task_xid(task));
5712 security_task_to_inode(task, inode);
5715 @@ -1753,6 +1762,8 @@ int pid_getattr(struct vfsmount *mnt, st
5719 +// static unsigned name_to_int(struct dentry *dentry);
5722 * Exceptional case: normally we are not allowed to unhash a busy
5723 * directory. In this case, however, we can do it - no aliasing problems
5724 @@ -1781,6 +1792,19 @@ int pid_revalidate(struct dentry *dentry
5725 task = get_proc_task(inode);
5728 + unsigned pid = name_to_int(&dentry->d_name);
5730 + if (pid != ~0U && pid != vx_map_pid(task->pid) &&
5731 + pid != __task_pid_nr_ns(task, PIDTYPE_PID,
5732 + task_active_pid_ns(task))) {
5733 + vxdprintk(VXD_CBIT(misc, 10),
5734 + VS_Q("%*s") " dropped by pid_revalidate(%d!=%d)",
5735 + dentry->d_name.len, dentry->d_name.name,
5736 + pid, vx_map_pid(task->pid));
5737 + put_task_struct(task);
5741 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
5742 task_dumpable(task)) {
5744 @@ -2445,6 +2469,13 @@ static struct dentry *proc_pident_lookup
5748 + /* TODO: maybe we can come up with a generic approach? */
5749 + if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
5750 + (dentry->d_name.len == 5) &&
5751 + (!memcmp(dentry->d_name.name, "vinfo", 5) ||
5752 + !memcmp(dentry->d_name.name, "ninfo", 5)))
5756 * Yes, it does not scale. And it should not. Don't add
5757 * new entries into /proc/<tgid>/ without very good reasons.
5758 @@ -2884,6 +2915,11 @@ static int proc_pid_personality(struct s
5759 static const struct file_operations proc_task_operations;
5760 static const struct inode_operations proc_task_inode_operations;
5762 +extern int proc_pid_vx_info(struct seq_file *,
5763 + struct pid_namespace *, struct pid *, struct task_struct *);
5764 +extern int proc_pid_nx_info(struct seq_file *,
5765 + struct pid_namespace *, struct pid *, struct task_struct *);
5767 static const struct pid_entry tgid_base_stuff[] = {
5768 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
5769 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
5770 @@ -2948,6 +2984,8 @@ static const struct pid_entry tgid_base_
5771 #ifdef CONFIG_CGROUPS
5772 ONE("cgroup", S_IRUGO, proc_cgroup_show),
5774 + ONE("vinfo", S_IRUGO, proc_pid_vx_info),
5775 + ONE("ninfo", S_IRUGO, proc_pid_nx_info),
5776 ONE("oom_score", S_IRUGO, proc_oom_score),
5777 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
5778 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
5779 @@ -3163,7 +3201,7 @@ retry:
5781 pid = find_ge_pid(iter.tgid, ns);
5783 - iter.tgid = pid_nr_ns(pid, ns);
5784 + iter.tgid = pid_unmapped_nr_ns(pid, ns);
5785 iter.task = pid_task(pid, PIDTYPE_PID);
5786 /* What we to know is if the pid we have find is the
5787 * pid of a thread_group_leader. Testing for task
5788 @@ -3223,8 +3261,10 @@ int proc_pid_readdir(struct file *file,
5789 if (!has_pid_permissions(ns, iter.task, 2))
5792 - len = snprintf(name, sizeof(name), "%d", iter.tgid);
5793 + len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
5794 ctx->pos = iter.tgid + TGID_OFFSET;
5795 + if (!vx_proc_task_visible(iter.task))
5797 if (!proc_fill_cache(file, ctx, name, len,
5798 proc_pid_instantiate, iter.task, NULL)) {
5799 put_task_struct(iter.task);
5800 @@ -3361,6 +3401,7 @@ static const struct pid_entry tid_base_s
5801 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
5802 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
5804 + ONE("nsproxy", S_IRUGO, proc_pid_nsproxy),
5807 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
5808 @@ -3427,6 +3468,8 @@ static struct dentry *proc_task_lookup(s
5809 tid = name_to_int(&dentry->d_name);
5812 + if (vx_current_initpid(tid))
5815 ns = dentry->d_sb->s_fs_info;
5817 diff -NurpP --minimal linux-4.9.217/fs/proc/generic.c linux-4.9.217-vs2.3.9.12/fs/proc/generic.c
5818 --- linux-4.9.217/fs/proc/generic.c 2020-03-27 00:51:25.650418515 +0000
5819 +++ linux-4.9.217-vs2.3.9.12/fs/proc/generic.c 2018-10-20 04:58:14.000000000 +0000
5821 #include <linux/bitops.h>
5822 #include <linux/spinlock.h>
5823 #include <linux/completion.h>
5824 +#include <linux/vserver/inode.h>
5825 #include <asm/uaccess.h>
5827 #include "internal.h"
5828 @@ -66,8 +67,16 @@ static struct proc_dir_entry *pde_subdir
5829 node = node->rb_left;
5830 else if (result > 0)
5831 node = node->rb_right;
5834 + if (!vx_hide_check(0, de->vx_flags)) {
5835 + vxdprintk(VXD_CBIT(misc, 9),
5837 + " hidden in pde_subdir_find()",
5838 + de->namelen, de->name);
5846 @@ -241,6 +250,8 @@ struct dentry *proc_lookup_de(struct pro
5847 return ERR_PTR(-ENOMEM);
5848 d_set_d_op(dentry, &simple_dentry_operations);
5849 d_add(dentry, inode);
5850 + /* generic proc entries belong to the host */
5851 + i_tag_write(inode, 0);
5854 read_unlock(&proc_subdir_lock);
5855 @@ -287,6 +298,12 @@ int proc_readdir_de(struct proc_dir_entr
5857 struct proc_dir_entry *next;
5859 + if (!vx_hide_check(0, de->vx_flags)) {
5860 + vxdprintk(VXD_CBIT(misc, 9),
5861 + VS_Q("%*s") " hidden in proc_readdir_de()",
5862 + de->namelen, de->name);
5865 read_unlock(&proc_subdir_lock);
5866 if (!dir_emit(ctx, de->name, de->namelen,
5867 de->low_ino, de->mode >> 12)) {
5868 @@ -294,6 +311,7 @@ int proc_readdir_de(struct proc_dir_entr
5871 read_lock(&proc_subdir_lock);
5874 next = pde_subdir_next(de);
5876 @@ -387,6 +405,7 @@ static struct proc_dir_entry *__proc_cre
5879 ent->subdir = RB_ROOT;
5880 + ent->vx_flags = IATTR_PROC_DEFAULT;
5881 atomic_set(&ent->count, 1);
5882 spin_lock_init(&ent->pde_unload_lock);
5883 INIT_LIST_HEAD(&ent->pde_openers);
5884 @@ -413,7 +432,8 @@ struct proc_dir_entry *proc_symlink(cons
5890 + ent->vx_flags = IATTR_PROC_SYMLINK;
5894 diff -NurpP --minimal linux-4.9.217/fs/proc/inode.c linux-4.9.217-vs2.3.9.12/fs/proc/inode.c
5895 --- linux-4.9.217/fs/proc/inode.c 2020-03-27 00:51:25.670418199 +0000
5896 +++ linux-4.9.217-vs2.3.9.12/fs/proc/inode.c 2018-10-20 05:55:43.000000000 +0000
5897 @@ -433,6 +433,8 @@ struct inode *proc_get_inode(struct supe
5898 inode->i_uid = de->uid;
5899 inode->i_gid = de->gid;
5902 + PROC_I(inode)->vx_flags = de->vx_flags;
5904 inode->i_size = de->size;
5906 diff -NurpP --minimal linux-4.9.217/fs/proc/internal.h linux-4.9.217-vs2.3.9.12/fs/proc/internal.h
5907 --- linux-4.9.217/fs/proc/internal.h 2020-03-27 00:51:25.680418042 +0000
5908 +++ linux-4.9.217-vs2.3.9.12/fs/proc/internal.h 2018-10-20 05:55:43.000000000 +0000
5910 #include <linux/spinlock.h>
5911 #include <linux/atomic.h>
5912 #include <linux/binfmts.h>
5913 +#include <linux/vs_pid.h>
5915 struct ctl_table_header;
5917 @@ -34,6 +35,7 @@ struct proc_dir_entry {
5923 const struct inode_operations *proc_iops;
5924 const struct file_operations *proc_fops;
5925 @@ -51,15 +53,22 @@ struct proc_dir_entry {
5933 int (*proc_get_link)(struct dentry *, struct path *);
5934 int (*proc_show)(struct seq_file *m,
5935 struct pid_namespace *ns, struct pid *pid,
5936 struct task_struct *task);
5937 + int (*proc_vs_read)(char *page);
5938 + int (*proc_vxi_read)(struct vx_info *vxi, char *page);
5939 + int (*proc_nxi_read)(struct nx_info *nxi, char *page);
5947 struct proc_dir_entry *pde;
5948 @@ -93,11 +102,16 @@ static inline struct pid *proc_pid(struc
5949 return PROC_I(inode)->pid;
5952 -static inline struct task_struct *get_proc_task(struct inode *inode)
5953 +static inline struct task_struct *get_proc_task_real(struct inode *inode)
5955 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
5958 +static inline struct task_struct *get_proc_task(struct inode *inode)
5960 + return vx_get_proc_task(inode, proc_pid(inode));
5963 static inline int task_dumpable(struct task_struct *task)
5966 @@ -156,6 +170,8 @@ extern int proc_pid_status(struct seq_fi
5967 struct pid *, struct task_struct *);
5968 extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
5969 struct pid *, struct task_struct *);
5970 +extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5971 + struct pid *pid, struct task_struct *task);
5975 diff -NurpP --minimal linux-4.9.217/fs/proc/loadavg.c linux-4.9.217-vs2.3.9.12/fs/proc/loadavg.c
5976 --- linux-4.9.217/fs/proc/loadavg.c 2016-12-11 19:17:54.000000000 +0000
5977 +++ linux-4.9.217-vs2.3.9.12/fs/proc/loadavg.c 2018-10-20 04:58:14.000000000 +0000
5980 static int loadavg_proc_show(struct seq_file *m, void *v)
5982 + unsigned long running;
5983 + unsigned int threads;
5984 unsigned long avnrun[3];
5986 get_avenrun(avnrun, FIXED_1/200, 0);
5988 + if (vx_flags(VXF_VIRT_LOAD, 0)) {
5989 + struct vx_info *vxi = current_vx_info();
5991 + running = atomic_read(&vxi->cvirt.nr_running);
5992 + threads = atomic_read(&vxi->cvirt.nr_threads);
5994 + running = nr_running();
5995 + threads = nr_threads;
5998 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
5999 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
6000 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
6001 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
6002 - nr_running(), nr_threads,
6004 task_active_pid_ns(current)->last_pid);
6007 diff -NurpP --minimal linux-4.9.217/fs/proc/meminfo.c linux-4.9.217-vs2.3.9.12/fs/proc/meminfo.c
6008 --- linux-4.9.217/fs/proc/meminfo.c 2016-12-11 19:17:54.000000000 +0000
6009 +++ linux-4.9.217-vs2.3.9.12/fs/proc/meminfo.c 2018-10-20 04:58:14.000000000 +0000
6010 @@ -55,7 +55,8 @@ static int meminfo_proc_show(struct seq_
6012 committed = percpu_counter_read_positive(&vm_committed_as);
6014 - cached = global_node_page_state(NR_FILE_PAGES) -
6015 + cached = vx_flags(VXF_VIRT_MEM, 0) ?
6016 + vx_vsi_cached(&i) : global_node_page_state(NR_FILE_PAGES) -
6017 total_swapcache_pages() - i.bufferram;
6020 diff -NurpP --minimal linux-4.9.217/fs/proc/root.c linux-4.9.217-vs2.3.9.12/fs/proc/root.c
6021 --- linux-4.9.217/fs/proc/root.c 2016-12-11 19:17:54.000000000 +0000
6022 +++ linux-4.9.217-vs2.3.9.12/fs/proc/root.c 2018-10-20 04:58:14.000000000 +0000
6024 #include <linux/mount.h>
6025 #include <linux/pid_namespace.h>
6026 #include <linux/parser.h>
6027 +#include <linux/vserver/inode.h>
6029 #include "internal.h"
6031 +struct proc_dir_entry *proc_virtual;
6033 +extern void proc_vx_init(void);
6036 Opt_gid, Opt_hidepid, Opt_err,
6038 @@ -145,6 +150,7 @@ void __init proc_root_init(void)
6040 proc_mkdir("bus", NULL);
6045 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
6046 @@ -206,6 +212,7 @@ struct proc_dir_entry proc_root = {
6047 .proc_iops = &proc_root_inode_operations,
6048 .proc_fops = &proc_root_operations,
6049 .parent = &proc_root,
6050 + .vx_flags = IATTR_ADMIN | IATTR_WATCH,
6054 diff -NurpP --minimal linux-4.9.217/fs/proc/self.c linux-4.9.217-vs2.3.9.12/fs/proc/self.c
6055 --- linux-4.9.217/fs/proc/self.c 2016-12-11 19:17:54.000000000 +0000
6056 +++ linux-4.9.217-vs2.3.9.12/fs/proc/self.c 2018-10-20 04:58:14.000000000 +0000
6058 #include <linux/sched.h>
6059 #include <linux/slab.h>
6060 #include <linux/pid_namespace.h>
6061 +#include <linux/vserver/inode.h>
6062 #include "internal.h"
6065 @@ -54,6 +55,8 @@ int proc_setup_self(struct super_block *
6066 self = d_alloc_name(s->s_root, "self");
6068 struct inode *inode = new_inode(s);
6070 + // self->vx_flags = IATTR_PROC_SYMLINK;
6072 inode->i_ino = self_inum;
6073 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
6074 diff -NurpP --minimal linux-4.9.217/fs/proc/stat.c linux-4.9.217-vs2.3.9.12/fs/proc/stat.c
6075 --- linux-4.9.217/fs/proc/stat.c 2020-03-27 00:51:25.690417888 +0000
6076 +++ linux-4.9.217-vs2.3.9.12/fs/proc/stat.c 2019-10-13 15:58:54.758080005 +0000
6078 #include <linux/slab.h>
6079 #include <linux/time.h>
6080 #include <linux/irqnr.h>
6081 +#include <linux/vserver/cvirt.h>
6082 #include <linux/cputime.h>
6083 #include <linux/tick.h>
6084 +#include <linux/cpuset.h>
6086 #ifndef arch_irq_stat_cpu
6087 #define arch_irq_stat_cpu(cpu) 0
6088 @@ -86,13 +88,21 @@ static int show_stat(struct seq_file *p,
6089 u64 sum_softirq = 0;
6090 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
6091 struct timespec64 boottime;
6092 + cpumask_var_t cpus_allowed;
6093 + bool virt_cpu = vx_flags(VXF_VIRT_CPU, 0);
6095 user = nice = system = idle = iowait =
6096 irq = softirq = steal = 0;
6097 guest = guest_nice = 0;
6098 getboottime64(&boottime);
6101 + cpuset_cpus_allowed(current, cpus_allowed);
6103 for_each_possible_cpu(i) {
6104 + if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6107 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
6108 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6109 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
6110 @@ -128,6 +138,9 @@ static int show_stat(struct seq_file *p,
6113 for_each_online_cpu(i) {
6114 + if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6117 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
6118 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
6119 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6120 diff -NurpP --minimal linux-4.9.217/fs/proc/uptime.c linux-4.9.217-vs2.3.9.12/fs/proc/uptime.c
6121 --- linux-4.9.217/fs/proc/uptime.c 2020-03-27 00:51:25.690417888 +0000
6122 +++ linux-4.9.217-vs2.3.9.12/fs/proc/uptime.c 2019-10-13 16:02:19.324763467 +0000
6124 #include <linux/seq_file.h>
6125 #include <linux/time.h>
6126 #include <linux/kernel_stat.h>
6127 +#include <linux/vserver/cvirt.h>
6129 static int uptime_proc_show(struct seq_file *m, void *v)
6131 @@ -21,6 +22,7 @@ static int uptime_proc_show(struct seq_f
6132 get_monotonic_boottime(&uptime);
6133 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
6136 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6137 (unsigned long) uptime.tv_sec,
6138 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
6139 diff -NurpP --minimal linux-4.9.217/fs/proc_namespace.c linux-4.9.217-vs2.3.9.12/fs/proc_namespace.c
6140 --- linux-4.9.217/fs/proc_namespace.c 2016-12-11 19:17:54.000000000 +0000
6141 +++ linux-4.9.217-vs2.3.9.12/fs/proc_namespace.c 2018-10-20 04:58:14.000000000 +0000
6142 @@ -46,6 +46,8 @@ static int show_sb_opts(struct seq_file
6143 { MS_DIRSYNC, ",dirsync" },
6144 { MS_MANDLOCK, ",mand" },
6145 { MS_LAZYTIME, ",lazytime" },
6146 + { MS_TAGGED, ",tag" },
6147 + { MS_NOTAGCHECK, ",notagcheck" },
6150 const struct proc_fs_info *fs_infop;
6151 @@ -82,6 +84,38 @@ static inline void mangle(struct seq_fil
6152 seq_escape(m, s, " \t\n\\");
6155 +#ifdef CONFIG_VSERVER_EXTRA_MNT_CHECK
6157 +static int mnt_is_reachable(struct vfsmount *vfsmnt)
6160 + struct dentry *point;
6161 + struct mount *mnt = real_mount(vfsmnt);
6162 + struct mount *root_mnt;
6165 + if (mnt == mnt->mnt_ns->root)
6169 + root = current->fs->root;
6170 + root_mnt = real_mount(root.mnt);
6171 + point = root.dentry;
6173 + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
6174 + point = mnt->mnt_mountpoint;
6175 + mnt = mnt->mnt_parent;
6177 + rcu_read_unlock();
6179 + ret = (mnt == root_mnt) && is_subdir(point, root.dentry);
6184 +#define mnt_is_reachable(v) (1)
6187 static void show_type(struct seq_file *m, struct super_block *sb)
6189 mangle(m, sb->s_type->name);
6190 @@ -99,6 +133,17 @@ static int show_vfsmnt(struct seq_file *
6191 struct super_block *sb = mnt_path.dentry->d_sb;
6194 + if (vx_flags(VXF_HIDE_MOUNT, 0))
6196 + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6199 + if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6200 + mnt == current->fs->root.mnt) {
6201 + seq_puts(m, "/dev/root / ");
6205 if (sb->s_op->show_devname) {
6206 err = sb->s_op->show_devname(m, mnt_path.dentry);
6208 @@ -112,6 +157,7 @@ static int show_vfsmnt(struct seq_file *
6214 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
6215 err = show_sb_opts(m, sb);
6216 @@ -133,6 +179,11 @@ static int show_mountinfo(struct seq_fil
6217 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
6220 + if (vx_flags(VXF_HIDE_MOUNT, 0))
6222 + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6225 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
6226 MAJOR(sb->s_dev), MINOR(sb->s_dev));
6227 if (sb->s_op->show_path) {
6228 @@ -195,6 +246,17 @@ static int show_vfsstat(struct seq_file
6229 struct super_block *sb = mnt_path.dentry->d_sb;
6232 + if (vx_flags(VXF_HIDE_MOUNT, 0))
6234 + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6237 + if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6238 + mnt == current->fs->root.mnt) {
6239 + seq_puts(m, "device /dev/root mounted on / ");
6244 if (sb->s_op->show_devname) {
6245 seq_puts(m, "device ");
6246 @@ -216,7 +278,7 @@ static int show_vfsstat(struct seq_file
6252 /* file system type */
6253 seq_puts(m, "with fstype ");
6255 diff -NurpP --minimal linux-4.9.217/fs/quota/dquot.c linux-4.9.217-vs2.3.9.12/fs/quota/dquot.c
6256 --- linux-4.9.217/fs/quota/dquot.c 2020-03-27 00:51:25.790416308 +0000
6257 +++ linux-4.9.217-vs2.3.9.12/fs/quota/dquot.c 2020-04-01 09:40:28.705463971 +0000
6258 @@ -1659,6 +1659,9 @@ int __dquot_alloc_space(struct inode *in
6259 int reserve = flags & DQUOT_SPACE_RESERVE;
6260 struct dquot **dquots;
6262 + if ((ret = dl_alloc_space(inode, number)))
6265 if (!dquot_active(inode)) {
6266 inode_incr_space(inode, number, reserve);
6268 @@ -1711,6 +1714,9 @@ int dquot_alloc_inode(struct inode *inod
6269 struct dquot_warn warn[MAXQUOTAS];
6270 struct dquot * const *dquots;
6272 + if ((ret = dl_alloc_inode(inode)))
6275 if (!dquot_active(inode))
6277 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
6278 @@ -1813,6 +1819,8 @@ void __dquot_free_space(struct inode *in
6279 struct dquot **dquots;
6280 int reserve = flags & DQUOT_SPACE_RESERVE, index;
6282 + dl_free_space(inode, number);
6284 if (!dquot_active(inode)) {
6285 inode_decr_space(inode, number, reserve);
6287 @@ -1857,6 +1865,8 @@ void dquot_free_inode(struct inode *inod
6288 struct dquot * const *dquots;
6291 + dl_free_inode(inode);
6293 if (!dquot_active(inode))
6296 diff -NurpP --minimal linux-4.9.217/fs/quota/quota.c linux-4.9.217-vs2.3.9.12/fs/quota/quota.c
6297 --- linux-4.9.217/fs/quota/quota.c 2020-03-27 00:51:25.840415520 +0000
6298 +++ linux-4.9.217-vs2.3.9.12/fs/quota/quota.c 2018-10-20 05:55:43.000000000 +0000
6300 #include <linux/fs.h>
6301 #include <linux/namei.h>
6302 #include <linux/slab.h>
6303 +#include <linux/vs_context.h>
6304 #include <asm/current.h>
6305 #include <linux/uaccess.h>
6306 #include <linux/kernel.h>
6307 @@ -39,7 +40,7 @@ static int check_quotactl_permission(str
6311 - if (!capable(CAP_SYS_ADMIN))
6312 + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6316 @@ -770,6 +771,46 @@ static int do_quotactl(struct super_bloc
6320 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6322 +#include <linux/vroot.h>
6323 +#include <linux/major.h>
6324 +#include <linux/module.h>
6325 +#include <linux/kallsyms.h>
6326 +#include <linux/vserver/debug.h>
6328 +static vroot_grb_func *vroot_get_real_bdev = NULL;
6330 +static DEFINE_SPINLOCK(vroot_grb_lock);
6332 +int register_vroot_grb(vroot_grb_func *func) {
6335 + spin_lock(&vroot_grb_lock);
6336 + if (!vroot_get_real_bdev) {
6337 + vroot_get_real_bdev = func;
6340 + spin_unlock(&vroot_grb_lock);
6343 +EXPORT_SYMBOL(register_vroot_grb);
6345 +int unregister_vroot_grb(vroot_grb_func *func) {
6346 + int ret = -EINVAL;
6348 + spin_lock(&vroot_grb_lock);
6349 + if (vroot_get_real_bdev) {
6350 + vroot_get_real_bdev = NULL;
6353 + spin_unlock(&vroot_grb_lock);
6356 +EXPORT_SYMBOL(unregister_vroot_grb);
6360 /* Return 1 if 'cmd' will block on frozen filesystem */
6361 static int quotactl_cmd_write(int cmd)
6363 @@ -811,6 +852,22 @@ static struct super_block *quotactl_bloc
6366 return ERR_CAST(bdev);
6367 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6368 + if (bdev && bdev->bd_inode &&
6369 + imajor(bdev->bd_inode) == VROOT_MAJOR) {
6370 + struct block_device *bdnew = (void *)-EINVAL;
6372 + if (vroot_get_real_bdev)
6373 + bdnew = vroot_get_real_bdev(bdev);
6375 + vxdprintk(VXD_CBIT(misc, 0),
6376 + "vroot_get_real_bdev not set");
6378 + if (IS_ERR(bdnew))
6379 + return ERR_PTR(PTR_ERR(bdnew));
6383 if (quotactl_cmd_write(cmd))
6384 sb = get_super_thawed(bdev);
6386 diff -NurpP --minimal linux-4.9.217/fs/stat.c linux-4.9.217-vs2.3.9.12/fs/stat.c
6387 --- linux-4.9.217/fs/stat.c 2020-03-27 00:51:26.590403699 +0000
6388 +++ linux-4.9.217-vs2.3.9.12/fs/stat.c 2018-10-20 04:58:14.000000000 +0000
6389 @@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
6390 stat->nlink = inode->i_nlink;
6391 stat->uid = inode->i_uid;
6392 stat->gid = inode->i_gid;
6393 + stat->tag = inode->i_tag;
6394 stat->rdev = inode->i_rdev;
6395 stat->size = i_size_read(inode);
6396 stat->atime = inode->i_atime;
6397 diff -NurpP --minimal linux-4.9.217/fs/statfs.c linux-4.9.217-vs2.3.9.12/fs/statfs.c
6398 --- linux-4.9.217/fs/statfs.c 2016-12-11 19:17:54.000000000 +0000
6399 +++ linux-4.9.217-vs2.3.9.12/fs/statfs.c 2018-10-20 04:58:14.000000000 +0000
6401 #include <linux/statfs.h>
6402 #include <linux/security.h>
6403 #include <linux/uaccess.h>
6404 +#include <linux/vs_base.h>
6405 +#include <linux/vs_dlimit.h>
6406 #include "internal.h"
6408 static int flags_by_mnt(int mnt_flags)
6409 @@ -60,6 +62,8 @@ static int statfs_by_dentry(struct dentr
6410 retval = dentry->d_sb->s_op->statfs(dentry, buf);
6411 if (retval == 0 && buf->f_frsize == 0)
6412 buf->f_frsize = buf->f_bsize;
6413 + if (!vx_check(0, VS_ADMIN|VS_WATCH))
6414 + vx_vsi_statfs(dentry->d_sb, buf);
6418 diff -NurpP --minimal linux-4.9.217/fs/super.c linux-4.9.217-vs2.3.9.12/fs/super.c
6419 --- linux-4.9.217/fs/super.c 2020-03-27 00:51:26.590403699 +0000
6420 +++ linux-4.9.217-vs2.3.9.12/fs/super.c 2019-10-13 10:11:07.125382902 +0000
6422 #include <linux/fsnotify.h>
6423 #include <linux/lockdep.h>
6424 #include <linux/user_namespace.h>
6425 +#include <linux/magic.h>
6426 +#include <linux/vs_context.h>
6427 #include "internal.h"
6430 @@ -482,7 +484,7 @@ struct super_block *sget_userns(struct f
6432 if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
6433 !(type->fs_flags & FS_USERNS_MOUNT) &&
6434 - !capable(CAP_SYS_ADMIN))
6435 + !vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6436 return ERR_PTR(-EPERM);
6438 spin_lock(&sb_lock);
6439 @@ -563,7 +565,8 @@ struct super_block *sget(struct file_sys
6440 user_ns = &init_user_ns;
6442 /* Ensure the requestor has permissions over the target filesystem */
6443 - if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
6444 + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
6445 + !vx_ns_capable(user_ns, CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6446 return ERR_PTR(-EPERM);
6448 return sget_userns(type, test, set, flags, user_ns, data);
6449 @@ -995,7 +998,8 @@ struct dentry *mount_ns(struct file_syst
6450 /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
6451 * over the namespace.
6453 - if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
6454 + if (!(flags & MS_KERNMOUNT) &&
6455 + !vx_ns_capable(user_ns, CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6456 return ERR_PTR(-EPERM);
6458 sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
6459 @@ -1213,6 +1217,13 @@ mount_fs(struct file_system_type *type,
6461 sb->s_flags |= MS_BORN;
6464 + if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
6466 + (sb->s_magic != PROC_SUPER_MAGIC) &&
6467 + (sb->s_magic != DEVPTS_SUPER_MAGIC))
6470 error = security_sb_kern_mount(sb, flags, secdata);
6473 diff -NurpP --minimal linux-4.9.217/fs/utimes.c linux-4.9.217-vs2.3.9.12/fs/utimes.c
6474 --- linux-4.9.217/fs/utimes.c 2016-12-11 19:17:54.000000000 +0000
6475 +++ linux-4.9.217-vs2.3.9.12/fs/utimes.c 2018-10-20 04:58:14.000000000 +0000
6477 #include <linux/stat.h>
6478 #include <linux/utime.h>
6479 #include <linux/syscalls.h>
6480 +#include <linux/mount.h>
6481 +#include <linux/vs_cowbl.h>
6482 #include <asm/uaccess.h>
6483 #include <asm/unistd.h>
6485 @@ -52,13 +54,19 @@ static int utimes_common(struct path *pa
6488 struct iattr newattrs;
6489 - struct inode *inode = path->dentry->d_inode;
6490 struct inode *delegated_inode = NULL;
6491 + struct inode *inode;
6493 + error = cow_check_and_break(path);
6497 error = mnt_want_write(path->mnt);
6501 + inode = path->dentry->d_inode;
6503 if (times && times[0].tv_nsec == UTIME_NOW &&
6504 times[1].tv_nsec == UTIME_NOW)
6506 diff -NurpP --minimal linux-4.9.217/fs/xattr.c linux-4.9.217-vs2.3.9.12/fs/xattr.c
6507 --- linux-4.9.217/fs/xattr.c 2020-03-27 00:51:27.560388412 +0000
6508 +++ linux-4.9.217-vs2.3.9.12/fs/xattr.c 2018-10-20 05:55:43.000000000 +0000
6510 #include <linux/audit.h>
6511 #include <linux/vmalloc.h>
6512 #include <linux/posix_acl_xattr.h>
6513 +#include <linux/mount.h>
6515 #include <asm/uaccess.h>
6517 @@ -112,7 +113,7 @@ xattr_permission(struct inode *inode, co
6518 * The trusted.* namespace can only be accessed by privileged users.
6520 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
6521 - if (!capable(CAP_SYS_ADMIN))
6522 + if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
6523 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
6526 diff -NurpP --minimal linux-4.9.217/include/linux/capability.h linux-4.9.217-vs2.3.9.12/include/linux/capability.h
6527 --- linux-4.9.217/include/linux/capability.h 2020-03-27 00:51:31.040333567 +0000
6528 +++ linux-4.9.217-vs2.3.9.12/include/linux/capability.h 2018-10-20 04:58:14.000000000 +0000
6529 @@ -78,7 +78,8 @@ extern const kernel_cap_t __cap_init_eff
6530 #else /* HAND-CODED capability initializers */
6532 #define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
6533 -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
6534 +#define CAP_LAST_U32_VALID_MASK ((CAP_TO_MASK(CAP_LAST_CAP + 1) -1) \
6535 + | CAP_TO_MASK(CAP_CONTEXT))
6537 # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
6538 # define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
6539 diff -NurpP --minimal linux-4.9.217/include/linux/cred.h linux-4.9.217-vs2.3.9.12/include/linux/cred.h
6540 --- linux-4.9.217/include/linux/cred.h 2020-03-27 00:51:31.560325368 +0000
6541 +++ linux-4.9.217-vs2.3.9.12/include/linux/cred.h 2019-10-05 14:58:45.680307395 +0000
6542 @@ -156,6 +156,7 @@ extern void exit_creds(struct task_struc
6543 extern int copy_creds(struct task_struct *, unsigned long);
6544 extern const struct cred *get_task_cred(struct task_struct *);
6545 extern struct cred *cred_alloc_blank(void);
6546 +extern struct cred *__prepare_creds(const struct cred *);
6547 extern struct cred *prepare_creds(void);
6548 extern struct cred *prepare_exec_creds(void);
6549 extern int commit_creds(struct cred *);
6550 @@ -216,6 +217,31 @@ static inline bool cap_ambient_invariant
6551 cred->cap_inheritable));
6554 +static inline void set_cred_subscribers(struct cred *cred, int n)
6556 +#ifdef CONFIG_DEBUG_CREDENTIALS
6557 + atomic_set(&cred->subscribers, n);
6561 +static inline int read_cred_subscribers(const struct cred *cred)
6563 +#ifdef CONFIG_DEBUG_CREDENTIALS
6564 + return atomic_read(&cred->subscribers);
6570 +static inline void alter_cred_subscribers(const struct cred *_cred, int n)
6572 +#ifdef CONFIG_DEBUG_CREDENTIALS
6573 + struct cred *cred = (struct cred *) _cred;
6575 + atomic_add(n, &cred->subscribers);
6580 * get_new_cred - Get a reference on a new set of credentials
6581 * @cred: The new credentials to reference
6582 diff -NurpP --minimal linux-4.9.217/include/linux/dcache.h linux-4.9.217-vs2.3.9.12/include/linux/dcache.h
6583 --- linux-4.9.217/include/linux/dcache.h 2020-03-27 00:51:31.570325213 +0000
6584 +++ linux-4.9.217-vs2.3.9.12/include/linux/dcache.h 2018-10-20 04:58:14.000000000 +0000
6585 @@ -308,8 +308,10 @@ extern char *dentry_path(struct dentry *
6587 static inline struct dentry *dget_dlock(struct dentry *dentry)
6591 dentry->d_lockref.count++;
6592 + // vx_dentry_inc(dentry);
6597 diff -NurpP --minimal linux-4.9.217/include/linux/devpts_fs.h linux-4.9.217-vs2.3.9.12/include/linux/devpts_fs.h
6598 --- linux-4.9.217/include/linux/devpts_fs.h 2016-12-11 19:17:54.000000000 +0000
6599 +++ linux-4.9.217-vs2.3.9.12/include/linux/devpts_fs.h 2018-10-20 04:58:14.000000000 +0000
6600 @@ -34,5 +34,4 @@ void devpts_pty_kill(struct dentry *);
6605 #endif /* _LINUX_DEVPTS_FS_H */
6606 diff -NurpP --minimal linux-4.9.217/include/linux/fs.h linux-4.9.217-vs2.3.9.12/include/linux/fs.h
6607 --- linux-4.9.217/include/linux/fs.h 2020-03-27 00:51:31.760322216 +0000
6608 +++ linux-4.9.217-vs2.3.9.12/include/linux/fs.h 2019-10-05 14:58:45.690307237 +0000
6609 @@ -231,6 +231,7 @@ typedef int (dio_iodone_t)(struct kiocb
6610 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
6611 #define ATTR_TIMES_SET (1 << 16)
6612 #define ATTR_TOUCH (1 << 17)
6613 +#define ATTR_TAG (1 << 18)
6616 * Whiteout is represented by a char device. The following constants define the
6617 @@ -253,6 +254,7 @@ struct iattr {
6623 struct timespec ia_atime;
6624 struct timespec ia_mtime;
6625 @@ -612,7 +614,9 @@ struct inode {
6626 unsigned short i_opflags;
6629 - unsigned int i_flags;
6631 + unsigned short i_flags;
6632 + unsigned short i_vflags;
6634 #ifdef CONFIG_FS_POSIX_ACL
6635 struct posix_acl *i_acl;
6636 @@ -641,6 +645,7 @@ struct inode {
6637 unsigned int __i_nlink;
6642 struct timespec i_atime;
6643 struct timespec i_mtime;
6644 @@ -845,14 +850,19 @@ static inline void i_size_write(struct i
6648 +static inline void i_tag_write(struct inode *inode, vtag_t tag)
6650 + inode->i_tag = make_ktag(&init_user_ns, tag);
6653 static inline unsigned iminor(const struct inode *inode)
6655 - return MINOR(inode->i_rdev);
6656 + return MINOR(inode->i_mdev);
6659 static inline unsigned imajor(const struct inode *inode)
6661 - return MAJOR(inode->i_rdev);
6662 + return MAJOR(inode->i_mdev);
6665 extern struct block_device *I_BDEV(struct inode *inode);
6666 @@ -909,6 +919,7 @@ struct file {
6668 struct fown_struct f_owner;
6669 const struct cred *f_cred;
6671 struct file_ra_state f_ra;
6674 @@ -1043,6 +1054,7 @@ struct file_lock {
6675 struct file *fl_file;
6680 struct fasync_struct * fl_fasync; /* for lease break notifications */
6681 /* for lease breaks: */
6682 @@ -1476,6 +1488,11 @@ static inline gid_t i_gid_read(const str
6683 return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
6686 +static inline vtag_t i_tag_read(const struct inode *inode)
6688 + return from_ktag(&init_user_ns, inode->i_tag);
6691 static inline void i_uid_write(struct inode *inode, uid_t uid)
6693 inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
6694 @@ -1765,6 +1782,7 @@ struct inode_operations {
6695 int (*setattr) (struct dentry *, struct iattr *);
6696 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
6697 ssize_t (*listxattr) (struct dentry *, char *, size_t);
6698 + int (*sync_flags) (struct inode *, int, int);
6699 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
6701 int (*update_time)(struct inode *, struct timespec *, int);
6702 @@ -1779,6 +1797,7 @@ ssize_t rw_copy_check_uvector(int type,
6703 unsigned long nr_segs, unsigned long fast_segs,
6704 struct iovec *fast_pointer,
6705 struct iovec **ret_pointer);
6706 +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
6708 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
6709 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
6710 @@ -1850,6 +1869,14 @@ struct super_operations {
6712 #define S_DAX 0 /* Make all the DAX code disappear */
6714 +#define S_IXUNLINK 16384 /* Immutable Invert on unlink */
6716 +/* Linux-VServer related Inode flags */
6720 +#define V_BARRIER 4 /* Barrier for chroot() */
6721 +#define V_COW 8 /* Copy on Write */
6724 * Note that nosuid etc flags are inode-specific: setting some file-system
6725 @@ -1874,10 +1901,13 @@ struct super_operations {
6726 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
6727 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
6728 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
6729 +#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED)
6731 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
6732 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
6733 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
6734 +#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK)
6735 +#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
6736 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
6738 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
6739 @@ -1897,6 +1927,16 @@ static inline bool HAS_UNMAPPED_ID(struc
6740 return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
6743 +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
6745 +#ifdef CONFIG_VSERVER_COWBL
6746 +# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
6747 +# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
6749 +# define IS_COW(inode) (0)
6750 +# define IS_COW_LINK(inode) (0)
6754 * Inode state bits. Protected by inode->i_lock
6756 @@ -2162,6 +2202,9 @@ extern struct kobject *fs_kobj;
6757 extern int locks_mandatory_locked(struct file *);
6758 extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
6760 +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
6761 +#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */
6764 * Candidates for mandatory locking have the setgid bit set
6765 * but no group execute bit - an otherwise meaningless combination.
6766 @@ -2342,7 +2385,7 @@ struct filename {
6770 -extern long vfs_truncate(const struct path *, loff_t);
6771 +extern long vfs_truncate(struct path *, loff_t);
6772 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
6774 extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
6775 @@ -2973,6 +3016,7 @@ extern int dcache_dir_open(struct inode
6776 extern int dcache_dir_close(struct inode *, struct file *);
6777 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
6778 extern int dcache_readdir(struct file *, struct dir_context *);
6779 +extern int dcache_readdir_filter(struct file *, struct dir_context *, int (*)(struct dentry *));
6780 extern int simple_setattr(struct dentry *, struct iattr *);
6781 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
6782 extern int simple_statfs(struct dentry *, struct kstatfs *);
6783 diff -NurpP --minimal linux-4.9.217/include/linux/init_task.h linux-4.9.217-vs2.3.9.12/include/linux/init_task.h
6784 --- linux-4.9.217/include/linux/init_task.h 2016-12-11 19:17:54.000000000 +0000
6785 +++ linux-4.9.217-vs2.3.9.12/include/linux/init_task.h 2018-10-20 04:58:14.000000000 +0000
6786 @@ -271,6 +271,10 @@ extern struct task_group root_task_group
6788 INIT_NUMA_BALANCING(tsk) \
6791 + .vx_info = NULL, \
6793 + .nx_info = NULL, \
6797 diff -NurpP --minimal linux-4.9.217/include/linux/ipc.h linux-4.9.217-vs2.3.9.12/include/linux/ipc.h
6798 --- linux-4.9.217/include/linux/ipc.h 2016-12-11 19:17:54.000000000 +0000
6799 +++ linux-4.9.217-vs2.3.9.12/include/linux/ipc.h 2018-10-20 04:58:14.000000000 +0000
6800 @@ -16,6 +16,7 @@ struct kern_ipc_perm
6808 diff -NurpP --minimal linux-4.9.217/include/linux/memcontrol.h linux-4.9.217-vs2.3.9.12/include/linux/memcontrol.h
6809 --- linux-4.9.217/include/linux/memcontrol.h 2020-03-27 00:51:33.430295898 +0000
6810 +++ linux-4.9.217-vs2.3.9.12/include/linux/memcontrol.h 2018-10-20 04:58:14.000000000 +0000
6811 @@ -92,6 +92,7 @@ enum mem_cgroup_events_target {
6812 MEM_CGROUP_NTARGETS,
6818 #define MEM_CGROUP_ID_SHIFT 16
6819 @@ -402,6 +403,12 @@ static inline bool mem_cgroup_is_descend
6820 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
6823 +extern unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg);
6824 +extern unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg);
6825 +extern unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg);
6826 +extern unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg);
6827 +extern void dump_mem_cgroup(struct mem_cgroup *memcg);
6829 static inline bool mm_match_cgroup(struct mm_struct *mm,
6830 struct mem_cgroup *memcg)
6832 diff -NurpP --minimal linux-4.9.217/include/linux/mount.h linux-4.9.217-vs2.3.9.12/include/linux/mount.h
6833 --- linux-4.9.217/include/linux/mount.h 2020-03-27 00:51:37.020239318 +0000
6834 +++ linux-4.9.217-vs2.3.9.12/include/linux/mount.h 2018-10-20 04:58:14.000000000 +0000
6835 @@ -63,6 +63,9 @@ struct mnt_namespace;
6836 #define MNT_MARKED 0x4000000
6837 #define MNT_UMOUNT 0x8000000
6839 +#define MNT_TAGID 0x10000
6840 +#define MNT_NOTAG 0x20000
6843 struct dentry *mnt_root; /* root of the mounted tree */
6844 struct super_block *mnt_sb; /* pointer to superblock */
6845 diff -NurpP --minimal linux-4.9.217/include/linux/netdevice.h linux-4.9.217-vs2.3.9.12/include/linux/netdevice.h
6846 --- linux-4.9.217/include/linux/netdevice.h 2020-03-27 00:51:37.160237109 +0000
6847 +++ linux-4.9.217-vs2.3.9.12/include/linux/netdevice.h 2019-12-25 15:37:52.148426794 +0000
6848 @@ -2489,6 +2489,7 @@ static inline int dev_recursion_level(vo
6850 struct net_device *dev_get_by_index(struct net *net, int ifindex);
6851 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
6852 +struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex);
6853 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
6854 int netdev_get_name(struct net *net, char *name, int ifindex);
6855 int dev_restart(struct net_device *dev);
6856 diff -NurpP --minimal linux-4.9.217/include/linux/net.h linux-4.9.217-vs2.3.9.12/include/linux/net.h
6857 --- linux-4.9.217/include/linux/net.h 2016-12-11 19:17:54.000000000 +0000
6858 +++ linux-4.9.217-vs2.3.9.12/include/linux/net.h 2018-10-20 04:58:14.000000000 +0000
6859 @@ -44,6 +44,7 @@ struct net;
6860 #define SOCK_NOSPACE 2
6861 #define SOCK_PASSCRED 3
6862 #define SOCK_PASSSEC 4
6863 +#define SOCK_USER_SOCKET 5
6865 #ifndef ARCH_HAS_SOCKET_TYPES
6867 diff -NurpP --minimal linux-4.9.217/include/linux/nsproxy.h linux-4.9.217-vs2.3.9.12/include/linux/nsproxy.h
6868 --- linux-4.9.217/include/linux/nsproxy.h 2016-12-11 19:17:54.000000000 +0000
6869 +++ linux-4.9.217-vs2.3.9.12/include/linux/nsproxy.h 2018-10-20 04:58:14.000000000 +0000
6872 #include <linux/spinlock.h>
6873 #include <linux/sched.h>
6874 +#include <linux/vserver/debug.h>
6876 struct mnt_namespace;
6877 struct uts_namespace;
6878 @@ -65,6 +66,7 @@ extern struct nsproxy init_nsproxy;
6881 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
6882 +struct nsproxy *copy_nsproxy(struct nsproxy *orig);
6883 void exit_task_namespaces(struct task_struct *tsk);
6884 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
6885 void free_nsproxy(struct nsproxy *ns);
6886 @@ -72,16 +74,26 @@ int unshare_nsproxy_namespaces(unsigned
6887 struct cred *, struct fs_struct *);
6888 int __init nsproxy_cache_init(void);
6890 -static inline void put_nsproxy(struct nsproxy *ns)
6891 +#define get_nsproxy(n) __get_nsproxy(n, __FILE__, __LINE__)
6893 +static inline void __get_nsproxy(struct nsproxy *ns,
6894 + const char *_file, int _line)
6896 - if (atomic_dec_and_test(&ns->count)) {
6899 + vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
6900 + ns, atomic_read(&ns->count), _file, _line);
6901 + atomic_inc(&ns->count);
6904 -static inline void get_nsproxy(struct nsproxy *ns)
6905 +#define put_nsproxy(n) __put_nsproxy(n, __FILE__, __LINE__)
6907 +static inline void __put_nsproxy(struct nsproxy *ns,
6908 + const char *_file, int _line)
6910 - atomic_inc(&ns->count);
6911 + vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
6912 + ns, atomic_read(&ns->count), _file, _line);
6913 + if (atomic_dec_and_test(&ns->count)) {
6919 diff -NurpP --minimal linux-4.9.217/include/linux/pid.h linux-4.9.217-vs2.3.9.12/include/linux/pid.h
6920 --- linux-4.9.217/include/linux/pid.h 2020-03-27 00:51:38.850210473 +0000
6921 +++ linux-4.9.217-vs2.3.9.12/include/linux/pid.h 2018-10-20 04:58:14.000000000 +0000
6922 @@ -10,7 +10,8 @@ enum pid_type
6925 /* only valid to __task_pid_nr_ns() */
6932 @@ -172,6 +173,7 @@ static inline pid_t pid_nr(struct pid *p
6935 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
6936 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
6937 pid_t pid_vnr(struct pid *pid);
6939 #define do_each_pid_task(pid, type, task) \
6940 diff -NurpP --minimal linux-4.9.217/include/linux/quotaops.h linux-4.9.217-vs2.3.9.12/include/linux/quotaops.h
6941 --- linux-4.9.217/include/linux/quotaops.h 2020-03-27 00:51:40.950177377 +0000
6942 +++ linux-4.9.217-vs2.3.9.12/include/linux/quotaops.h 2019-12-25 15:37:52.158426633 +0000
6944 #define _LINUX_QUOTAOPS_
6946 #include <linux/fs.h>
6947 +#include <linux/vs_dlimit.h>
6949 #define DQUOT_SPACE_WARN 0x1
6950 #define DQUOT_SPACE_RESERVE 0x2
6951 @@ -224,11 +225,12 @@ static inline void dquot_drop(struct ino
6953 static inline int dquot_alloc_inode(struct inode *inode)
6956 + return dl_alloc_inode(inode);
6959 static inline void dquot_free_inode(struct inode *inode)
6961 + dl_free_inode(inode);
6964 static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
6965 @@ -239,6 +241,10 @@ static inline int dquot_transfer(struct
6966 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
6971 + if ((ret = dl_alloc_space(inode, number)))
6973 if (!(flags & DQUOT_SPACE_RESERVE))
6974 inode_add_bytes(inode, number);
6976 @@ -249,6 +255,7 @@ static inline void __dquot_free_space(st
6978 if (!(flags & DQUOT_SPACE_RESERVE))
6979 inode_sub_bytes(inode, number);
6980 + dl_free_space(inode, number);
6983 static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
6984 diff -NurpP --minimal linux-4.9.217/include/linux/sched.h linux-4.9.217-vs2.3.9.12/include/linux/sched.h
6985 --- linux-4.9.217/include/linux/sched.h 2020-03-27 00:51:41.060175640 +0000
6986 +++ linux-4.9.217-vs2.3.9.12/include/linux/sched.h 2019-10-05 14:58:45.710306917 +0000
6987 @@ -1719,6 +1719,14 @@ struct task_struct {
6989 struct seccomp seccomp;
6991 +/* vserver context data */
6992 + struct vx_info *vx_info;
6993 + struct nx_info *nx_info;
6999 /* Thread group tracking */
7002 @@ -2112,6 +2120,11 @@ struct pid_namespace;
7003 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
7004 struct pid_namespace *ns);
7006 +#include <linux/vserver/base.h>
7007 +#include <linux/vserver/context.h>
7008 +#include <linux/vserver/debug.h>
7009 +#include <linux/vserver/pid.h>
7011 static inline pid_t task_pid_nr(struct task_struct *tsk)
7014 @@ -2125,7 +2138,8 @@ static inline pid_t task_pid_nr_ns(struc
7016 static inline pid_t task_pid_vnr(struct task_struct *tsk)
7018 - return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7019 + // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7020 + return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
7024 diff -NurpP --minimal linux-4.9.217/include/linux/shmem_fs.h linux-4.9.217-vs2.3.9.12/include/linux/shmem_fs.h
7025 --- linux-4.9.217/include/linux/shmem_fs.h 2016-12-11 19:17:54.000000000 +0000
7026 +++ linux-4.9.217-vs2.3.9.12/include/linux/shmem_fs.h 2018-10-20 04:58:14.000000000 +0000
7029 /* inode in-kernel data */
7031 +#define TMPFS_SUPER_MAGIC 0x01021994
7034 struct shmem_inode_info {
7036 unsigned int seals; /* shmem seals */
7037 diff -NurpP --minimal linux-4.9.217/include/linux/stat.h linux-4.9.217-vs2.3.9.12/include/linux/stat.h
7038 --- linux-4.9.217/include/linux/stat.h 2016-12-11 19:17:54.000000000 +0000
7039 +++ linux-4.9.217-vs2.3.9.12/include/linux/stat.h 2018-10-20 04:58:14.000000000 +0000
7040 @@ -25,6 +25,7 @@ struct kstat {
7047 struct timespec atime;
7048 diff -NurpP --minimal linux-4.9.217/include/linux/sunrpc/auth.h linux-4.9.217-vs2.3.9.12/include/linux/sunrpc/auth.h
7049 --- linux-4.9.217/include/linux/sunrpc/auth.h 2016-12-11 19:17:54.000000000 +0000
7050 +++ linux-4.9.217-vs2.3.9.12/include/linux/sunrpc/auth.h 2018-10-20 04:58:14.000000000 +0000
7051 @@ -46,6 +46,7 @@ enum {
7056 struct group_info *group_info;
7057 const char *principal;
7058 unsigned long ac_flags;
7059 diff -NurpP --minimal linux-4.9.217/include/linux/sunrpc/clnt.h linux-4.9.217-vs2.3.9.12/include/linux/sunrpc/clnt.h
7060 --- linux-4.9.217/include/linux/sunrpc/clnt.h 2020-03-27 00:51:41.170173910 +0000
7061 +++ linux-4.9.217-vs2.3.9.12/include/linux/sunrpc/clnt.h 2018-10-20 05:55:43.000000000 +0000
7062 @@ -52,7 +52,8 @@ struct rpc_clnt {
7063 cl_discrtry : 1,/* disconnect before retry */
7064 cl_noretranstimeo: 1,/* No retransmit timeouts */
7065 cl_autobind : 1,/* use getport() */
7066 - cl_chatty : 1;/* be verbose */
7067 + cl_chatty : 1,/* be verbose */
7068 + cl_tag : 1;/* context tagging */
7070 struct rpc_rtt * cl_rtt; /* RTO estimator data */
7071 const struct rpc_timeout *cl_timeout; /* Timeout strategy */
7072 diff -NurpP --minimal linux-4.9.217/include/linux/types.h linux-4.9.217-vs2.3.9.12/include/linux/types.h
7073 --- linux-4.9.217/include/linux/types.h 2016-12-11 19:17:54.000000000 +0000
7074 +++ linux-4.9.217-vs2.3.9.12/include/linux/types.h 2018-10-20 04:58:14.000000000 +0000
7075 @@ -32,6 +32,9 @@ typedef __kernel_uid32_t uid_t;
7076 typedef __kernel_gid32_t gid_t;
7077 typedef __kernel_uid16_t uid16_t;
7078 typedef __kernel_gid16_t gid16_t;
7079 +typedef unsigned int vxid_t;
7080 +typedef unsigned int vnid_t;
7081 +typedef unsigned int vtag_t;
7083 typedef unsigned long uintptr_t;
7085 diff -NurpP --minimal linux-4.9.217/include/linux/uidgid.h linux-4.9.217-vs2.3.9.12/include/linux/uidgid.h
7086 --- linux-4.9.217/include/linux/uidgid.h 2016-12-11 19:17:54.000000000 +0000
7087 +++ linux-4.9.217-vs2.3.9.12/include/linux/uidgid.h 2018-10-20 04:58:14.000000000 +0000
7088 @@ -21,13 +21,17 @@ typedef struct {
7101 #define KUIDT_INIT(value) (kuid_t){ value }
7102 #define KGIDT_INIT(value) (kgid_t){ value }
7103 +#define KTAGT_INIT(value) (ktag_t){ value }
7105 #ifdef CONFIG_MULTIUSER
7106 static inline uid_t __kuid_val(kuid_t uid)
7107 @@ -51,11 +55,18 @@ static inline gid_t __kgid_val(kgid_t gi
7111 +static inline vtag_t __ktag_val(ktag_t tag)
7116 #define GLOBAL_ROOT_UID KUIDT_INIT(0)
7117 #define GLOBAL_ROOT_GID KGIDT_INIT(0)
7118 +#define GLOBAL_ROOT_TAG KTAGT_INIT(0)
7120 #define INVALID_UID KUIDT_INIT(-1)
7121 #define INVALID_GID KGIDT_INIT(-1)
7122 +#define INVALID_TAG KTAGT_INIT(-1)
7124 static inline bool uid_eq(kuid_t left, kuid_t right)
7126 @@ -67,6 +78,11 @@ static inline bool gid_eq(kgid_t left, k
7127 return __kgid_val(left) == __kgid_val(right);
7130 +static inline bool tag_eq(ktag_t left, ktag_t right)
7132 + return __ktag_val(left) == __ktag_val(right);
7135 static inline bool uid_gt(kuid_t left, kuid_t right)
7137 return __kuid_val(left) > __kuid_val(right);
7138 @@ -117,13 +133,21 @@ static inline bool gid_valid(kgid_t gid)
7139 return __kgid_val(gid) != (gid_t) -1;
7142 +static inline bool tag_valid(ktag_t tag)
7144 + return !tag_eq(tag, INVALID_TAG);
7147 #ifdef CONFIG_USER_NS
7149 extern kuid_t make_kuid(struct user_namespace *from, uid_t uid);
7150 extern kgid_t make_kgid(struct user_namespace *from, gid_t gid);
7151 +extern ktag_t make_ktag(struct user_namespace *from, gid_t gid);
7153 extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
7154 extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
7155 +extern vtag_t from_ktag(struct user_namespace *to, ktag_t tag);
7157 extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
7158 extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
7160 @@ -149,6 +173,11 @@ static inline kgid_t make_kgid(struct us
7161 return KGIDT_INIT(gid);
7164 +static inline ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
7166 + return KTAGT_INIT(tag);
7169 static inline uid_t from_kuid(struct user_namespace *to, kuid_t kuid)
7171 return __kuid_val(kuid);
7172 @@ -159,6 +188,11 @@ static inline gid_t from_kgid(struct use
7173 return __kgid_val(kgid);
7176 +static inline vtag_t from_ktag(struct user_namespace *to, ktag_t ktag)
7178 + return __ktag_val(ktag);
7181 static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid)
7183 uid_t uid = from_kuid(to, kuid);
7184 diff -NurpP --minimal linux-4.9.217/include/linux/vroot.h linux-4.9.217-vs2.3.9.12/include/linux/vroot.h
7185 --- linux-4.9.217/include/linux/vroot.h 1970-01-01 00:00:00.000000000 +0000
7186 +++ linux-4.9.217-vs2.3.9.12/include/linux/vroot.h 2018-10-20 04:58:14.000000000 +0000
7190 + * include/linux/vroot.h
7192 + * written by Herbert P?tzl, 9/11/2002
7193 + * ported to 2.6 by Herbert P?tzl, 30/12/2004
7195 + * Copyright (C) 2002-2007 by Herbert P?tzl.
7196 + * Redistribution of this file is permitted under the
7197 + * GNU General Public License.
7200 +#ifndef _LINUX_VROOT_H
7201 +#define _LINUX_VROOT_H
7206 +/* Possible states of device */
7212 +struct vroot_device {
7216 + struct semaphore vr_ctl_mutex;
7217 + struct block_device *vr_device;
7222 +typedef struct block_device *(vroot_grb_func)(struct block_device *);
7224 +extern int register_vroot_grb(vroot_grb_func *);
7225 +extern int unregister_vroot_grb(vroot_grb_func *);
7227 +#endif /* __KERNEL__ */
7229 +#define MAX_VROOT_DEFAULT 8
7232 + * IOCTL commands --- we will commandeer 0x56 ('V')
7235 +#define VROOT_SET_DEV 0x5600
7236 +#define VROOT_CLR_DEV 0x5601
7238 +#endif /* _LINUX_VROOT_H */
7239 diff -NurpP --minimal linux-4.9.217/include/linux/vs_base.h linux-4.9.217-vs2.3.9.12/include/linux/vs_base.h
7240 --- linux-4.9.217/include/linux/vs_base.h 1970-01-01 00:00:00.000000000 +0000
7241 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_base.h 2018-10-20 04:58:14.000000000 +0000
7246 +#include "vserver/base.h"
7247 +#include "vserver/check.h"
7248 +#include "vserver/debug.h"
7251 +#warning duplicate inclusion
7253 diff -NurpP --minimal linux-4.9.217/include/linux/vs_context.h linux-4.9.217-vs2.3.9.12/include/linux/vs_context.h
7254 --- linux-4.9.217/include/linux/vs_context.h 1970-01-01 00:00:00.000000000 +0000
7255 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_context.h 2018-10-20 04:58:14.000000000 +0000
7257 +#ifndef _VS_CONTEXT_H
7258 +#define _VS_CONTEXT_H
7260 +#include "vserver/base.h"
7261 +#include "vserver/check.h"
7262 +#include "vserver/context.h"
7263 +#include "vserver/history.h"
7264 +#include "vserver/debug.h"
7266 +#include <linux/sched.h>
7269 +#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
7271 +static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
7272 + const char *_file, int _line, void *_here)
7277 + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
7278 + vxi, vxi ? vxi->vx_id : 0,
7279 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7281 + __vxh_get_vx_info(vxi, _here);
7283 + atomic_inc(&vxi->vx_usecnt);
7288 +extern void free_vx_info(struct vx_info *);
7290 +#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
7292 +static inline void __put_vx_info(struct vx_info *vxi,
7293 + const char *_file, int _line, void *_here)
7298 + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
7299 + vxi, vxi ? vxi->vx_id : 0,
7300 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7302 + __vxh_put_vx_info(vxi, _here);
7304 + if (atomic_dec_and_test(&vxi->vx_usecnt))
7305 + free_vx_info(vxi);
7309 +#define init_vx_info(p, i) \
7310 + __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7312 +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7313 + const char *_file, int _line, void *_here)
7316 + vxlprintk(VXD_CBIT(xid, 3),
7317 + "init_vx_info(%p[#%d.%d])",
7318 + vxi, vxi ? vxi->vx_id : 0,
7319 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7321 + __vxh_init_vx_info(vxi, vxp, _here);
7323 + atomic_inc(&vxi->vx_usecnt);
7329 +#define set_vx_info(p, i) \
7330 + __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7332 +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7333 + const char *_file, int _line, void *_here)
7335 + struct vx_info *vxo;
7340 + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
7341 + vxi, vxi ? vxi->vx_id : 0,
7342 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7344 + __vxh_set_vx_info(vxi, vxp, _here);
7346 + atomic_inc(&vxi->vx_usecnt);
7347 + vxo = xchg(vxp, vxi);
7352 +#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
7354 +static inline void __clr_vx_info(struct vx_info **vxp,
7355 + const char *_file, int _line, void *_here)
7357 + struct vx_info *vxo;
7359 + vxo = xchg(vxp, NULL);
7363 + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
7364 + vxo, vxo ? vxo->vx_id : 0,
7365 + vxo ? atomic_read(&vxo->vx_usecnt) : 0,
7367 + __vxh_clr_vx_info(vxo, vxp, _here);
7369 + if (atomic_dec_and_test(&vxo->vx_usecnt))
7370 + free_vx_info(vxo);
7374 +#define claim_vx_info(v, p) \
7375 + __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7377 +static inline void __claim_vx_info(struct vx_info *vxi,
7378 + struct task_struct *task,
7379 + const char *_file, int _line, void *_here)
7381 + vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
7382 + vxi, vxi ? vxi->vx_id : 0,
7383 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7384 + vxi ? atomic_read(&vxi->vx_tasks) : 0,
7385 + task, _file, _line);
7386 + __vxh_claim_vx_info(vxi, task, _here);
7388 + atomic_inc(&vxi->vx_tasks);
7392 +extern void unhash_vx_info(struct vx_info *);
7394 +#define release_vx_info(v, p) \
7395 + __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7397 +static inline void __release_vx_info(struct vx_info *vxi,
7398 + struct task_struct *task,
7399 + const char *_file, int _line, void *_here)
7401 + vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
7402 + vxi, vxi ? vxi->vx_id : 0,
7403 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7404 + vxi ? atomic_read(&vxi->vx_tasks) : 0,
7405 + task, _file, _line);
7406 + __vxh_release_vx_info(vxi, task, _here);
7410 + if (atomic_dec_and_test(&vxi->vx_tasks))
7411 + unhash_vx_info(vxi);
7415 +#define task_get_vx_info(p) \
7416 + __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
7418 +static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
7419 + const char *_file, int _line, void *_here)
7421 + struct vx_info *vxi;
7424 + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
7426 + vxi = __get_vx_info(p->vx_info, _file, _line, _here);
7432 +static inline void __wakeup_vx_info(struct vx_info *vxi)
7434 + if (waitqueue_active(&vxi->vx_wait))
7435 + wake_up_interruptible(&vxi->vx_wait);
7439 +#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
7441 +static inline void __enter_vx_info(struct vx_info *vxi,
7442 + struct vx_info_save *vxis, const char *_file, int _line)
7444 + vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
7445 + vxi, vxi ? vxi->vx_id : 0, vxis, current,
7446 + current->xid, current->vx_info, _file, _line);
7447 + vxis->vxi = xchg(¤t->vx_info, vxi);
7448 + vxis->xid = current->xid;
7449 + current->xid = vxi ? vxi->vx_id : 0;
7452 +#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
7454 +static inline void __leave_vx_info(struct vx_info_save *vxis,
7455 + const char *_file, int _line)
7457 + vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
7458 + vxis, vxis->xid, vxis->vxi, current,
7459 + current->xid, current->vx_info, _file, _line);
7460 + (void)xchg(¤t->vx_info, vxis->vxi);
7461 + current->xid = vxis->xid;
7465 +static inline void __enter_vx_admin(struct vx_info_save *vxis)
7467 + vxis->vxi = xchg(¤t->vx_info, NULL);
7468 + vxis->xid = xchg(¤t->xid, (vxid_t)0);
7471 +static inline void __leave_vx_admin(struct vx_info_save *vxis)
7473 + (void)xchg(¤t->xid, vxis->xid);
7474 + (void)xchg(¤t->vx_info, vxis->vxi);
7477 +#define task_is_init(p) \
7478 + __task_is_init(p, __FILE__, __LINE__, __HERE__)
7480 +static inline int __task_is_init(struct task_struct *p,
7481 + const char *_file, int _line, void *_here)
7483 + int is_init = is_global_init(p);
7487 + is_init = p->vx_info->vx_initpid == p->pid;
7492 +extern void exit_vx_info(struct task_struct *, int);
7493 +extern void exit_vx_info_early(struct task_struct *, int);
7497 +#warning duplicate inclusion
7499 diff -NurpP --minimal linux-4.9.217/include/linux/vs_cowbl.h linux-4.9.217-vs2.3.9.12/include/linux/vs_cowbl.h
7500 --- linux-4.9.217/include/linux/vs_cowbl.h 1970-01-01 00:00:00.000000000 +0000
7501 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_cowbl.h 2018-10-20 04:58:14.000000000 +0000
7503 +#ifndef _VS_COWBL_H
7504 +#define _VS_COWBL_H
7506 +#include <linux/fs.h>
7507 +#include <linux/dcache.h>
7508 +#include <linux/namei.h>
7509 +#include <linux/slab.h>
7511 +extern struct dentry *cow_break_link(const char *pathname);
7513 +static inline int cow_check_and_break(struct path *path)
7515 + struct inode *inode = path->dentry->d_inode;
7518 + /* do we need this check? */
7519 + if (IS_RDONLY(inode))
7522 + if (IS_COW(inode)) {
7523 + if (IS_COW_LINK(inode)) {
7524 + struct dentry *new_dentry, *old_dentry = path->dentry;
7527 + buf = kmalloc(PATH_MAX, GFP_KERNEL);
7531 + pp = d_path(path, buf, PATH_MAX);
7532 + new_dentry = cow_break_link(pp);
7534 + if (!IS_ERR(new_dentry)) {
7535 + path->dentry = new_dentry;
7538 + error = PTR_ERR(new_dentry);
7540 + inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
7541 + inode->i_ctime = CURRENT_TIME;
7542 + mark_inode_dirty(inode);
7549 +#warning duplicate inclusion
7551 diff -NurpP --minimal linux-4.9.217/include/linux/vs_cvirt.h linux-4.9.217-vs2.3.9.12/include/linux/vs_cvirt.h
7552 --- linux-4.9.217/include/linux/vs_cvirt.h 1970-01-01 00:00:00.000000000 +0000
7553 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_cvirt.h 2018-10-20 04:58:14.000000000 +0000
7555 +#ifndef _VS_CVIRT_H
7556 +#define _VS_CVIRT_H
7558 +#include "vserver/cvirt.h"
7559 +#include "vserver/context.h"
7560 +#include "vserver/base.h"
7561 +#include "vserver/check.h"
7562 +#include "vserver/debug.h"
7565 +static inline void vx_activate_task(struct task_struct *p)
7567 + struct vx_info *vxi;
7569 + if ((vxi = p->vx_info)) {
7570 + vx_update_load(vxi);
7571 + atomic_inc(&vxi->cvirt.nr_running);
7575 +static inline void vx_deactivate_task(struct task_struct *p)
7577 + struct vx_info *vxi;
7579 + if ((vxi = p->vx_info)) {
7580 + vx_update_load(vxi);
7581 + atomic_dec(&vxi->cvirt.nr_running);
7585 +static inline void vx_uninterruptible_inc(struct task_struct *p)
7587 + struct vx_info *vxi;
7589 + if ((vxi = p->vx_info))
7590 + atomic_inc(&vxi->cvirt.nr_uninterruptible);
7593 +static inline void vx_uninterruptible_dec(struct task_struct *p)
7595 + struct vx_info *vxi;
7597 + if ((vxi = p->vx_info))
7598 + atomic_dec(&vxi->cvirt.nr_uninterruptible);
7603 +#warning duplicate inclusion
7605 diff -NurpP --minimal linux-4.9.217/include/linux/vs_device.h linux-4.9.217-vs2.3.9.12/include/linux/vs_device.h
7606 --- linux-4.9.217/include/linux/vs_device.h 1970-01-01 00:00:00.000000000 +0000
7607 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_device.h 2018-10-20 04:58:14.000000000 +0000
7609 +#ifndef _VS_DEVICE_H
7610 +#define _VS_DEVICE_H
7612 +#include "vserver/base.h"
7613 +#include "vserver/device.h"
7614 +#include "vserver/debug.h"
7617 +#ifdef CONFIG_VSERVER_DEVICE
7619 +int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
7621 +#define vs_device_perm(v, d, m, p) \
7622 + ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
7627 +int vs_map_device(struct vx_info *vxi,
7628 + dev_t device, dev_t *target, umode_t mode)
7635 +#define vs_device_perm(v, d, m, p) ((p) == (p))
7640 +#define vs_map_chrdev(d, t, p) \
7641 + ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
7642 +#define vs_map_blkdev(d, t, p) \
7643 + ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
7645 +#define vs_chrdev_perm(d, p) \
7646 + vs_device_perm(current_vx_info(), d, S_IFCHR, p)
7647 +#define vs_blkdev_perm(d, p) \
7648 + vs_device_perm(current_vx_info(), d, S_IFBLK, p)
7652 +#warning duplicate inclusion
7654 diff -NurpP --minimal linux-4.9.217/include/linux/vs_dlimit.h linux-4.9.217-vs2.3.9.12/include/linux/vs_dlimit.h
7655 --- linux-4.9.217/include/linux/vs_dlimit.h 1970-01-01 00:00:00.000000000 +0000
7656 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_dlimit.h 2018-10-20 04:58:14.000000000 +0000
7658 +#ifndef _VS_DLIMIT_H
7659 +#define _VS_DLIMIT_H
7661 +#include <linux/fs.h>
7663 +#include "vserver/dlimit.h"
7664 +#include "vserver/base.h"
7665 +#include "vserver/debug.h"
7668 +#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
7670 +static inline struct dl_info *__get_dl_info(struct dl_info *dli,
7671 + const char *_file, int _line)
7675 + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
7676 + dli, dli ? dli->dl_tag : 0,
7677 + dli ? atomic_read(&dli->dl_usecnt) : 0,
7679 + atomic_inc(&dli->dl_usecnt);
7684 +#define free_dl_info(i) \
7685 + call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
7687 +#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
7689 +static inline void __put_dl_info(struct dl_info *dli,
7690 + const char *_file, int _line)
7694 + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
7695 + dli, dli ? dli->dl_tag : 0,
7696 + dli ? atomic_read(&dli->dl_usecnt) : 0,
7698 + if (atomic_dec_and_test(&dli->dl_usecnt))
7699 + free_dl_info(dli);
7703 +#define __dlimit_char(d) ((d) ? '*' : ' ')
7705 +static inline int __dl_alloc_space(struct super_block *sb,
7706 + vtag_t tag, dlsize_t nr, const char *file, int line)
7708 + struct dl_info *dli = NULL;
7713 + dli = locate_dl_info(sb, tag);
7717 + spin_lock(&dli->dl_lock);
7718 + ret = (dli->dl_space_used + nr > dli->dl_space_total);
7720 + dli->dl_space_used += nr;
7721 + spin_unlock(&dli->dl_lock);
7724 + vxlprintk(VXD_CBIT(dlim, 1),
7725 + "ALLOC (%p,#%d)%c %lld bytes (%d)",
7726 + sb, tag, __dlimit_char(dli), (long long)nr,
7728 + return ret ? -ENOSPC : 0;
7731 +static inline void __dl_free_space(struct super_block *sb,
7732 + vtag_t tag, dlsize_t nr, const char *_file, int _line)
7734 + struct dl_info *dli = NULL;
7738 + dli = locate_dl_info(sb, tag);
7742 + spin_lock(&dli->dl_lock);
7743 + if (dli->dl_space_used > nr)
7744 + dli->dl_space_used -= nr;
7746 + dli->dl_space_used = 0;
7747 + spin_unlock(&dli->dl_lock);
7750 + vxlprintk(VXD_CBIT(dlim, 1),
7751 + "FREE (%p,#%d)%c %lld bytes",
7752 + sb, tag, __dlimit_char(dli), (long long)nr,
7756 +static inline int __dl_alloc_inode(struct super_block *sb,
7757 + vtag_t tag, const char *_file, int _line)
7759 + struct dl_info *dli;
7762 + dli = locate_dl_info(sb, tag);
7766 + spin_lock(&dli->dl_lock);
7767 + dli->dl_inodes_used++;
7768 + ret = (dli->dl_inodes_used > dli->dl_inodes_total);
7769 + spin_unlock(&dli->dl_lock);
7772 + vxlprintk(VXD_CBIT(dlim, 0),
7773 + "ALLOC (%p,#%d)%c inode (%d)",
7774 + sb, tag, __dlimit_char(dli), ret, _file, _line);
7775 + return ret ? -ENOSPC : 0;
7778 +static inline void __dl_free_inode(struct super_block *sb,
7779 + vtag_t tag, const char *_file, int _line)
7781 + struct dl_info *dli;
7783 + dli = locate_dl_info(sb, tag);
7787 + spin_lock(&dli->dl_lock);
7788 + if (dli->dl_inodes_used > 1)
7789 + dli->dl_inodes_used--;
7791 + dli->dl_inodes_used = 0;
7792 + spin_unlock(&dli->dl_lock);
7795 + vxlprintk(VXD_CBIT(dlim, 0),
7796 + "FREE (%p,#%d)%c inode",
7797 + sb, tag, __dlimit_char(dli), _file, _line);
7800 +static inline void __dl_adjust_block(struct super_block *sb, vtag_t tag,
7801 + unsigned long long *free_blocks, unsigned long long *root_blocks,
7802 + const char *_file, int _line)
7804 + struct dl_info *dli;
7805 + uint64_t broot, bfree;
7807 + dli = locate_dl_info(sb, tag);
7811 + spin_lock(&dli->dl_lock);
7812 + broot = (dli->dl_space_total -
7813 + (dli->dl_space_total >> 10) * dli->dl_nrlmult)
7814 + >> sb->s_blocksize_bits;
7815 + bfree = (dli->dl_space_total - dli->dl_space_used)
7816 + >> sb->s_blocksize_bits;
7817 + spin_unlock(&dli->dl_lock);
7819 + vxlprintk(VXD_CBIT(dlim, 2),
7820 + "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
7821 + (long long)bfree, (long long)broot,
7822 + *free_blocks, *root_blocks, dli->dl_nrlmult,
7824 + if (free_blocks) {
7825 + if (*free_blocks > bfree)
7826 + *free_blocks = bfree;
7828 + if (root_blocks) {
7829 + if (*root_blocks > broot)
7830 + *root_blocks = broot;
7835 +#define dl_prealloc_space(in, bytes) \
7836 + __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7837 + __FILE__, __LINE__ )
7839 +#define dl_alloc_space(in, bytes) \
7840 + __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7841 + __FILE__, __LINE__ )
7843 +#define dl_reserve_space(in, bytes) \
7844 + __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7845 + __FILE__, __LINE__ )
7847 +#define dl_claim_space(in, bytes) (0)
7849 +#define dl_release_space(in, bytes) \
7850 + __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7851 + __FILE__, __LINE__ )
7853 +#define dl_free_space(in, bytes) \
7854 + __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7855 + __FILE__, __LINE__ )
7859 +#define dl_alloc_inode(in) \
7860 + __dl_alloc_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
7862 +#define dl_free_inode(in) \
7863 + __dl_free_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
7866 +#define dl_adjust_block(sb, tag, fb, rb) \
7867 + __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
7871 +#warning duplicate inclusion
7873 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/base.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/base.h
7874 --- linux-4.9.217/include/linux/vserver/base.h 1970-01-01 00:00:00.000000000 +0000
7875 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/base.h 2018-10-20 04:58:14.000000000 +0000
7877 +#ifndef _VSERVER_BASE_H
7878 +#define _VSERVER_BASE_H
7881 +/* context state changes */
7893 +#define vx_task_xid(t) ((t)->xid)
7895 +#define vx_current_xid() vx_task_xid(current)
7897 +#define current_vx_info() (current->vx_info)
7900 +#define nx_task_nid(t) ((t)->nid)
7902 +#define nx_current_nid() nx_task_nid(current)
7904 +#define current_nx_info() (current->nx_info)
7907 +/* generic flag merging */
7909 +#define vs_check_flags(v, m, f) (((v) & (m)) ^ (f))
7911 +#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
7913 +#define vs_mask_mask(v, f, m) (((v) & ~(m)) | ((v) & (f) & (m)))
7915 +#define vs_check_bit(v, n) ((v) & (1LL << (n)))
7918 +/* context flags */
7920 +#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
7922 +#define vx_current_flags() __vx_flags(current_vx_info())
7924 +#define vx_info_flags(v, m, f) \
7925 + vs_check_flags(__vx_flags(v), m, f)
7927 +#define task_vx_flags(t, m, f) \
7928 + ((t) && vx_info_flags((t)->vx_info, m, f))
7930 +#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
7935 +#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
7937 +#define vx_current_ccaps() __vx_ccaps(current_vx_info())
7939 +#define vx_info_ccaps(v, c) (__vx_ccaps(v) & (c))
7941 +#define vx_ccaps(c) vx_info_ccaps(current_vx_info(), (c))
7945 +/* network flags */
7947 +#define __nx_flags(n) ((n) ? (n)->nx_flags : 0)
7949 +#define nx_current_flags() __nx_flags(current_nx_info())
7951 +#define nx_info_flags(n, m, f) \
7952 + vs_check_flags(__nx_flags(n), m, f)
7954 +#define task_nx_flags(t, m, f) \
7955 + ((t) && nx_info_flags((t)->nx_info, m, f))
7957 +#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
7962 +#define __nx_ncaps(n) ((n) ? (n)->nx_ncaps : 0)
7964 +#define nx_current_ncaps() __nx_ncaps(current_nx_info())
7966 +#define nx_info_ncaps(n, c) (__nx_ncaps(n) & (c))
7968 +#define nx_ncaps(c) nx_info_ncaps(current_nx_info(), c)
7971 +/* context mask capabilities */
7973 +#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
7975 +#define vx_info_mcaps(v, c) (__vx_mcaps(v) & (c))
7977 +#define vx_mcaps(c) vx_info_mcaps(current_vx_info(), c)
7980 +/* context bcap mask */
7982 +#define __vx_bcaps(v) ((v)->vx_bcaps)
7984 +#define vx_current_bcaps() __vx_bcaps(current_vx_info())
7987 +/* mask given bcaps */
7989 +#define vx_info_mbcaps(v, c) ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
7991 +#define vx_mbcaps(c) vx_info_mbcaps(current_vx_info(), c)
7994 +/* masked cap_bset */
7996 +#define vx_info_cap_bset(v) vx_info_mbcaps(v, current->cap_bset)
7998 +#define vx_current_cap_bset() vx_info_cap_bset(current_vx_info())
8001 +#define vx_info_mbcap(v, b) \
8002 + (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
8003 + vx_info_bcaps(v, b) : (b))
8005 +#define task_vx_mbcap(t, b) \
8006 + vx_info_mbcap((t)->vx_info, (t)->b)
8008 +#define vx_mbcap(b) task_vx_mbcap(current, b)
8011 +#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
8013 +#define vx_capable(b, c) (capable(b) || \
8014 + (cap_raised(current_cap(), b) && vx_ccaps(c)))
8016 +#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
8017 + (cap_raised(current_cap(), b) && vx_ccaps(c)))
8019 +#define nx_capable(b, c) (capable(b) || \
8020 + (cap_raised(current_cap(), b) && nx_ncaps(c)))
8022 +#define nx_ns_capable(n, b, c) (ns_capable(n, b) || \
8023 + (cap_raised(current_cap(), b) && nx_ncaps(c)))
8025 +#define vx_task_initpid(t, n) \
8026 + ((t)->vx_info && \
8027 + ((t)->vx_info->vx_initpid == (n)))
8029 +#define vx_current_initpid(n) vx_task_initpid(current, n)
8032 +/* context unshare mask */
8034 +#define __vx_umask(v) ((v)->vx_umask)
8036 +#define vx_current_umask() __vx_umask(current_vx_info())
8038 +#define vx_can_unshare(b, f) (capable(b) || \
8039 + (cap_raised(current_cap(), b) && \
8040 + !((f) & ~vx_current_umask())))
8042 +#define vx_ns_can_unshare(n, b, f) (ns_capable(n, b) || \
8043 + (cap_raised(current_cap(), b) && \
8044 + !((f) & ~vx_current_umask())))
8046 +#define __vx_wmask(v) ((v)->vx_wmask)
8048 +#define vx_current_wmask() __vx_wmask(current_vx_info())
8051 +#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
8053 +#define vx_info_state(v, m) (__vx_state(v) & (m))
8056 +#define __nx_state(n) ((n) ? ((n)->nx_state) : 0)
8058 +#define nx_info_state(n, m) (__nx_state(n) & (m))
8061 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cacct_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct_cmd.h
8062 --- linux-4.9.217/include/linux/vserver/cacct_cmd.h 1970-01-01 00:00:00.000000000 +0000
8063 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct_cmd.h 2018-10-20 04:58:14.000000000 +0000
8065 +#ifndef _VSERVER_CACCT_CMD_H
8066 +#define _VSERVER_CACCT_CMD_H
8069 +#include <linux/compiler.h>
8070 +#include <uapi/vserver/cacct_cmd.h>
8072 +extern int vc_sock_stat(struct vx_info *, void __user *);
8074 +#endif /* _VSERVER_CACCT_CMD_H */
8075 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cacct_def.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct_def.h
8076 --- linux-4.9.217/include/linux/vserver/cacct_def.h 1970-01-01 00:00:00.000000000 +0000
8077 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct_def.h 2018-10-20 04:58:14.000000000 +0000
8079 +#ifndef _VSERVER_CACCT_DEF_H
8080 +#define _VSERVER_CACCT_DEF_H
8082 +#include <asm/atomic.h>
8083 +#include <linux/vserver/cacct.h>
8086 +struct _vx_sock_acc {
8087 + atomic_long_t count;
8088 + atomic_long_t total;
8091 +/* context sub struct */
8094 + struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
8096 + atomic_t page[6][8];
8099 +#ifdef CONFIG_VSERVER_DEBUG
8101 +static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
8105 + printk("\t_vx_cacct:");
8106 + for (i = 0; i < 6; i++) {
8107 + struct _vx_sock_acc *ptr = cacct->sock[i];
8109 + printk("\t [%d] =", i);
8110 + for (j = 0; j < 3; j++) {
8111 + printk(" [%d] = %8lu, %8lu", j,
8112 + atomic_long_read(&ptr[j].count),
8113 + atomic_long_read(&ptr[j].total));
8121 +#endif /* _VSERVER_CACCT_DEF_H */
8122 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cacct.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct.h
8123 --- linux-4.9.217/include/linux/vserver/cacct.h 1970-01-01 00:00:00.000000000 +0000
8124 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct.h 2018-10-20 04:58:14.000000000 +0000
8126 +#ifndef _VSERVER_CACCT_H
8127 +#define _VSERVER_CACCT_H
8130 +enum sock_acc_field {
8131 + VXA_SOCK_UNSPEC = 0,
8137 + VXA_SOCK_SIZE /* array size */
8140 +#endif /* _VSERVER_CACCT_H */
8141 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cacct_int.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct_int.h
8142 --- linux-4.9.217/include/linux/vserver/cacct_int.h 1970-01-01 00:00:00.000000000 +0000
8143 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cacct_int.h 2018-10-20 04:58:14.000000000 +0000
8145 +#ifndef _VSERVER_CACCT_INT_H
8146 +#define _VSERVER_CACCT_INT_H
8149 +unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
8151 + return atomic_long_read(&cacct->sock[type][pos].count);
8156 +unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
8158 + return atomic_long_read(&cacct->sock[type][pos].total);
8161 +#endif /* _VSERVER_CACCT_INT_H */
8162 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/check.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/check.h
8163 --- linux-4.9.217/include/linux/vserver/check.h 1970-01-01 00:00:00.000000000 +0000
8164 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/check.h 2018-10-20 04:58:14.000000000 +0000
8166 +#ifndef _VSERVER_CHECK_H
8167 +#define _VSERVER_CHECK_H
8170 +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
8172 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
8173 +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
8175 +#define MIN_D_CONTEXT 65536
8178 +/* check conditions */
8180 +#define VS_ADMIN 0x0001
8181 +#define VS_WATCH 0x0002
8182 +#define VS_HIDE 0x0004
8183 +#define VS_HOSTID 0x0008
8185 +#define VS_IDENT 0x0010
8186 +#define VS_EQUIV 0x0020
8187 +#define VS_PARENT 0x0040
8188 +#define VS_CHILD 0x0080
8190 +#define VS_ARG_MASK 0x00F0
8192 +#define VS_DYNAMIC 0x0100
8193 +#define VS_STATIC 0x0200
8195 +#define VS_ATR_MASK 0x0F00
8197 +#ifdef CONFIG_VSERVER_PRIVACY
8198 +#define VS_ADMIN_P (0)
8199 +#define VS_WATCH_P (0)
8201 +#define VS_ADMIN_P VS_ADMIN
8202 +#define VS_WATCH_P VS_WATCH
8205 +#define VS_HARDIRQ 0x1000
8206 +#define VS_SOFTIRQ 0x2000
8207 +#define VS_IRQ 0x4000
8209 +#define VS_IRQ_MASK 0xF000
8211 +#include <linux/hardirq.h>
8214 + * check current context for ADMIN/WATCH and
8215 + * optionally against supplied argument
8217 +static inline int __vs_check(int cid, int id, unsigned int mode)
8219 + if (mode & VS_ARG_MASK) {
8220 + if ((mode & VS_IDENT) && (id == cid))
8223 + if (mode & VS_ATR_MASK) {
8224 + if ((mode & VS_DYNAMIC) &&
8225 + (id >= MIN_D_CONTEXT) &&
8226 + (id <= MAX_S_CONTEXT))
8228 + if ((mode & VS_STATIC) &&
8229 + (id > 1) && (id < MIN_D_CONTEXT))
8232 + if (mode & VS_IRQ_MASK) {
8233 + if ((mode & VS_IRQ) && unlikely(in_interrupt()))
8235 + if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
8237 + if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
8240 + return (((mode & VS_ADMIN) && (cid == 0)) ||
8241 + ((mode & VS_WATCH) && (cid == 1)) ||
8242 + ((mode & VS_HOSTID) && (id == 0)));
8245 +#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
8247 +#define vx_weak_check(c, m) ((m) ? vx_check(c, m) : 1)
8250 +#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
8252 +#define nx_weak_check(c, m) ((m) ? nx_check(c, m) : 1)
8255 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/context_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/context_cmd.h
8256 --- linux-4.9.217/include/linux/vserver/context_cmd.h 1970-01-01 00:00:00.000000000 +0000
8257 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/context_cmd.h 2018-10-20 04:58:14.000000000 +0000
8259 +#ifndef _VSERVER_CONTEXT_CMD_H
8260 +#define _VSERVER_CONTEXT_CMD_H
8262 +#include <uapi/vserver/context_cmd.h>
8264 +extern int vc_task_xid(uint32_t);
8266 +extern int vc_vx_info(struct vx_info *, void __user *);
8268 +extern int vc_ctx_stat(struct vx_info *, void __user *);
8270 +extern int vc_ctx_create(uint32_t, void __user *);
8271 +extern int vc_ctx_migrate(struct vx_info *, void __user *);
8273 +extern int vc_get_cflags(struct vx_info *, void __user *);
8274 +extern int vc_set_cflags(struct vx_info *, void __user *);
8276 +extern int vc_get_ccaps(struct vx_info *, void __user *);
8277 +extern int vc_set_ccaps(struct vx_info *, void __user *);
8279 +extern int vc_get_bcaps(struct vx_info *, void __user *);
8280 +extern int vc_set_bcaps(struct vx_info *, void __user *);
8282 +extern int vc_get_umask(struct vx_info *, void __user *);
8283 +extern int vc_set_umask(struct vx_info *, void __user *);
8285 +extern int vc_get_wmask(struct vx_info *, void __user *);
8286 +extern int vc_set_wmask(struct vx_info *, void __user *);
8288 +extern int vc_get_badness(struct vx_info *, void __user *);
8289 +extern int vc_set_badness(struct vx_info *, void __user *);
8291 +#endif /* _VSERVER_CONTEXT_CMD_H */
8292 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/context.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/context.h
8293 --- linux-4.9.217/include/linux/vserver/context.h 1970-01-01 00:00:00.000000000 +0000
8294 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/context.h 2018-10-20 04:58:14.000000000 +0000
8296 +#ifndef _VSERVER_CONTEXT_H
8297 +#define _VSERVER_CONTEXT_H
8300 +#include <linux/list.h>
8301 +#include <linux/spinlock.h>
8302 +#include <linux/rcupdate.h>
8303 +#include <uapi/vserver/context.h>
8305 +#include "limit_def.h"
8306 +#include "sched_def.h"
8307 +#include "cvirt_def.h"
8308 +#include "cacct_def.h"
8309 +#include "device_def.h"
8311 +#define VX_SPACES 2
8313 +struct _vx_info_pc {
8314 + struct _vx_sched_pc sched_pc;
8315 + struct _vx_cvirt_pc cvirt_pc;
8319 + unsigned long vx_nsmask; /* assignment mask */
8320 + struct nsproxy *vx_nsproxy; /* private namespaces */
8321 + struct fs_struct *vx_fs; /* private namespace fs */
8322 + const struct cred *vx_cred; /* task credentials */
8326 + struct hlist_node vx_hlist; /* linked list of contexts */
8327 + vxid_t vx_id; /* context id */
8328 + atomic_t vx_usecnt; /* usage count */
8329 + atomic_t vx_tasks; /* tasks count */
8330 + struct vx_info *vx_parent; /* parent context */
8331 + int vx_state; /* context state */
8333 + struct _vx_space space[VX_SPACES]; /* namespace store */
8335 + uint64_t vx_flags; /* context flags */
8336 + uint64_t vx_ccaps; /* context caps (vserver) */
8337 + uint64_t vx_umask; /* unshare mask (guest) */
8338 + uint64_t vx_wmask; /* warn mask (guest) */
8339 + kernel_cap_t vx_bcaps; /* bounding caps (system) */
8341 + struct task_struct *vx_reaper; /* guest reaper process */
8342 + pid_t vx_initpid; /* PID of guest init */
8343 + int64_t vx_badness_bias; /* OOM points bias */
8345 + struct _vx_limit limit; /* vserver limits */
8346 + struct _vx_sched sched; /* vserver scheduler */
8347 + struct _vx_cvirt cvirt; /* virtual/bias stuff */
8348 + struct _vx_cacct cacct; /* context accounting */
8350 + struct _vx_device dmap; /* default device map targets */
8353 + struct _vx_info_pc info_pc; /* per cpu data */
8355 + struct _vx_info_pc *ptr_pc; /* per cpu array */
8358 + wait_queue_head_t vx_wait; /* context exit waitqueue */
8359 + int reboot_cmd; /* last sys_reboot() cmd */
8360 + int exit_code; /* last process exit code */
8362 + char vx_name[65]; /* vserver name */
8366 +#define vx_ptr_pc(vxi) (&(vxi)->info_pc)
8367 +#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v
8369 +#define vx_ptr_pc(vxi) ((vxi)->ptr_pc)
8370 +#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v
8373 +#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id())
8376 +struct vx_info_save {
8377 + struct vx_info *vxi;
8384 +#define VXS_HASHED 0x0001
8385 +#define VXS_PAUSED 0x0010
8386 +#define VXS_SHUTDOWN 0x0100
8387 +#define VXS_HELPER 0x1000
8388 +#define VXS_RELEASED 0x8000
8391 +extern void claim_vx_info(struct vx_info *, struct task_struct *);
8392 +extern void release_vx_info(struct vx_info *, struct task_struct *);
8394 +extern struct vx_info *lookup_vx_info(int);
8395 +extern struct vx_info *lookup_or_create_vx_info(int);
8397 +extern int get_xid_list(int, unsigned int *, int);
8398 +extern int xid_is_hashed(vxid_t);
8400 +extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
8402 +extern long vs_state_change(struct vx_info *, unsigned int);
8405 +#endif /* _VSERVER_CONTEXT_H */
8406 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cvirt_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cvirt_cmd.h
8407 --- linux-4.9.217/include/linux/vserver/cvirt_cmd.h 1970-01-01 00:00:00.000000000 +0000
8408 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cvirt_cmd.h 2018-10-20 04:58:14.000000000 +0000
8410 +#ifndef _VSERVER_CVIRT_CMD_H
8411 +#define _VSERVER_CVIRT_CMD_H
8414 +#include <linux/compiler.h>
8415 +#include <uapi/vserver/cvirt_cmd.h>
8417 +extern int vc_set_vhi_name(struct vx_info *, void __user *);
8418 +extern int vc_get_vhi_name(struct vx_info *, void __user *);
8420 +extern int vc_virt_stat(struct vx_info *, void __user *);
8422 +#endif /* _VSERVER_CVIRT_CMD_H */
8423 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cvirt_def.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cvirt_def.h
8424 --- linux-4.9.217/include/linux/vserver/cvirt_def.h 1970-01-01 00:00:00.000000000 +0000
8425 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cvirt_def.h 2018-10-20 04:58:14.000000000 +0000
8427 +#ifndef _VSERVER_CVIRT_DEF_H
8428 +#define _VSERVER_CVIRT_DEF_H
8430 +#include <linux/jiffies.h>
8431 +#include <linux/spinlock.h>
8432 +#include <linux/wait.h>
8433 +#include <linux/time.h>
8434 +#include <asm/atomic.h>
8437 +struct _vx_usage_stat {
8447 +struct _vx_syslog {
8448 + wait_queue_head_t log_wait;
8449 + spinlock_t logbuf_lock; /* lock for the log buffer */
8451 + unsigned long log_start; /* next char to be read by syslog() */
8452 + unsigned long con_start; /* next char to be sent to consoles */
8453 + unsigned long log_end; /* most-recently-written-char + 1 */
8454 + unsigned long logged_chars; /* #chars since last read+clear operation */
8456 + char log_buf[1024];
8460 +/* context sub struct */
8463 + atomic_t nr_threads; /* number of current threads */
8464 + atomic_t nr_running; /* number of running threads */
8465 + atomic_t nr_uninterruptible; /* number of uninterruptible threads */
8467 + atomic_t nr_onhold; /* processes on hold */
8468 + uint32_t onhold_last; /* jiffies when put on hold */
8470 + struct timespec64 bias_ts; /* time offset to the host */
8471 + struct timespec64 bias_idle;
8472 + struct timespec64 bias_uptime; /* context creation point */
8473 + uint64_t bias_clock; /* offset in clock_t */
8475 + spinlock_t load_lock; /* lock for the load averages */
8476 + atomic_t load_updates; /* nr of load updates done so far */
8477 + uint32_t load_last; /* last time load was calculated */
8478 + uint32_t load[3]; /* load averages 1,5,15 */
8480 + atomic_t total_forks; /* number of forks so far */
8482 + struct _vx_syslog syslog;
8485 +struct _vx_cvirt_pc {
8486 + struct _vx_usage_stat cpustat;
8490 +#ifdef CONFIG_VSERVER_DEBUG
8492 +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
8494 + printk("\t_vx_cvirt:\n");
8495 + printk("\t threads: %4d, %4d, %4d, %4d\n",
8496 + atomic_read(&cvirt->nr_threads),
8497 + atomic_read(&cvirt->nr_running),
8498 + atomic_read(&cvirt->nr_uninterruptible),
8499 + atomic_read(&cvirt->nr_onhold));
8500 + /* add rest here */
8501 + printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
8506 +#endif /* _VSERVER_CVIRT_DEF_H */
8507 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/cvirt.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/cvirt.h
8508 --- linux-4.9.217/include/linux/vserver/cvirt.h 1970-01-01 00:00:00.000000000 +0000
8509 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/cvirt.h 2019-10-13 16:04:08.203030205 +0000
8511 +#ifndef _VSERVER_CVIRT_H
8512 +#define _VSERVER_CVIRT_H
8516 +void vx_update_load(struct vx_info *);
8519 +int vx_do_syslog(int, char __user *, int);
8521 +#endif /* _VSERVER_CVIRT_H */
8522 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/debug_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/debug_cmd.h
8523 --- linux-4.9.217/include/linux/vserver/debug_cmd.h 1970-01-01 00:00:00.000000000 +0000
8524 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/debug_cmd.h 2018-10-20 04:58:14.000000000 +0000
8526 +#ifndef _VSERVER_DEBUG_CMD_H
8527 +#define _VSERVER_DEBUG_CMD_H
8529 +#include <uapi/vserver/debug_cmd.h>
8532 +#ifdef CONFIG_COMPAT
8534 +#include <asm/compat.h>
8536 +struct vcmd_read_history_v0_x32 {
8539 + compat_uptr_t data_ptr;
8542 +struct vcmd_read_monitor_v0_x32 {
8545 + compat_uptr_t data_ptr;
8548 +#endif /* CONFIG_COMPAT */
8550 +extern int vc_dump_history(uint32_t);
8552 +extern int vc_read_history(uint32_t, void __user *);
8553 +extern int vc_read_monitor(uint32_t, void __user *);
8555 +#ifdef CONFIG_COMPAT
8557 +extern int vc_read_history_x32(uint32_t, void __user *);
8558 +extern int vc_read_monitor_x32(uint32_t, void __user *);
8560 +#endif /* CONFIG_COMPAT */
8562 +#endif /* _VSERVER_DEBUG_CMD_H */
8563 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/debug.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/debug.h
8564 --- linux-4.9.217/include/linux/vserver/debug.h 1970-01-01 00:00:00.000000000 +0000
8565 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/debug.h 2018-10-20 04:58:14.000000000 +0000
8567 +#ifndef _VSERVER_DEBUG_H
8568 +#define _VSERVER_DEBUG_H
8571 +#define VXD_CBIT(n, m) (vs_debug_ ## n & (1 << (m)))
8572 +#define VXD_CMIN(n, m) (vs_debug_ ## n > (m))
8573 +#define VXD_MASK(n, m) (vs_debug_ ## n & (m))
8575 +#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \
8576 + imajor((d)->bd_inode), iminor((d)->bd_inode)
8577 +#define VXF_DEV "%p[%lu,%d:%d]"
8579 +#if defined(CONFIG_QUOTES_UTF8)
8580 +#define VS_Q_LQM "\xc2\xbb"
8581 +#define VS_Q_RQM "\xc2\xab"
8582 +#elif defined(CONFIG_QUOTES_ASCII)
8583 +#define VS_Q_LQM "\x27"
8584 +#define VS_Q_RQM "\x27"
8586 +#define VS_Q_LQM "\xbb"
8587 +#define VS_Q_RQM "\xab"
8590 +#define VS_Q(f) VS_Q_LQM f VS_Q_RQM
8593 +#define vxd_path(p) \
8594 + ({ static char _buffer[PATH_MAX]; \
8595 + d_path(p, _buffer, sizeof(_buffer)); })
8597 +#define vxd_cond_path(n) \
8598 + ((n) ? vxd_path(&(n)->path) : "<null>" )
8601 +#ifdef CONFIG_VSERVER_DEBUG
8603 +extern unsigned int vs_debug_switch;
8604 +extern unsigned int vs_debug_xid;
8605 +extern unsigned int vs_debug_nid;
8606 +extern unsigned int vs_debug_tag;
8607 +extern unsigned int vs_debug_net;
8608 +extern unsigned int vs_debug_limit;
8609 +extern unsigned int vs_debug_cres;
8610 +extern unsigned int vs_debug_dlim;
8611 +extern unsigned int vs_debug_quota;
8612 +extern unsigned int vs_debug_cvirt;
8613 +extern unsigned int vs_debug_space;
8614 +extern unsigned int vs_debug_perm;
8615 +extern unsigned int vs_debug_misc;
8618 +#define VX_LOGLEVEL "vxD: "
8619 +#define VX_PROC_FMT "%p: "
8620 +#define VX_PROCESS current
8622 +#define vxdprintk(c, f, x...) \
8625 + printk(VX_LOGLEVEL VX_PROC_FMT f "\n", \
8626 + VX_PROCESS , ##x); \
8629 +#define vxlprintk(c, f, x...) \
8632 + printk(VX_LOGLEVEL f " @%s:%d\n", x); \
8635 +#define vxfprintk(c, f, x...) \
8638 + printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
8644 +void dump_vx_info(struct vx_info *, int);
8645 +void dump_vx_info_inactive(int);
8647 +#else /* CONFIG_VSERVER_DEBUG */
8649 +#define vs_debug_switch 0
8650 +#define vs_debug_xid 0
8651 +#define vs_debug_nid 0
8652 +#define vs_debug_tag 0
8653 +#define vs_debug_net 0
8654 +#define vs_debug_limit 0
8655 +#define vs_debug_cres 0
8656 +#define vs_debug_dlim 0
8657 +#define vs_debug_quota 0
8658 +#define vs_debug_cvirt 0
8659 +#define vs_debug_space 0
8660 +#define vs_debug_perm 0
8661 +#define vs_debug_misc 0
8663 +#define vxdprintk(x...) do { } while (0)
8664 +#define vxlprintk(x...) do { } while (0)
8665 +#define vxfprintk(x...) do { } while (0)
8667 +#endif /* CONFIG_VSERVER_DEBUG */
8670 +#ifdef CONFIG_VSERVER_WARN
8672 +#define VX_WARNLEVEL KERN_WARNING "vxW: "
8673 +#define VX_WARN_TASK "[" VS_Q("%s") ",%u:#%u|%u|%u] "
8674 +#define VX_WARN_XID "[xid #%u] "
8675 +#define VX_WARN_NID "[nid #%u] "
8676 +#define VX_WARN_TAG "[tag #%u] "
8678 +#define vxwprintk(c, f, x...) \
8681 + printk(VX_WARNLEVEL f "\n", ##x); \
8684 +#else /* CONFIG_VSERVER_WARN */
8686 +#define vxwprintk(x...) do { } while (0)
8688 +#endif /* CONFIG_VSERVER_WARN */
8690 +#define vxwprintk_task(c, f, x...) \
8691 + vxwprintk(c, VX_WARN_TASK f, \
8692 + current->comm, current->pid, \
8693 + current->xid, current->nid, \
8694 + current->tag, ##x)
8695 +#define vxwprintk_xid(c, f, x...) \
8696 + vxwprintk(c, VX_WARN_XID f, current->xid, x)
8697 +#define vxwprintk_nid(c, f, x...) \
8698 + vxwprintk(c, VX_WARN_NID f, current->nid, x)
8699 +#define vxwprintk_tag(c, f, x...) \
8700 + vxwprintk(c, VX_WARN_TAG f, current->tag, x)
8702 +#ifdef CONFIG_VSERVER_DEBUG
8703 +#define vxd_assert_lock(l) assert_spin_locked(l)
8704 +#define vxd_assert(c, f, x...) vxlprintk(!(c), \
8705 + "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
8707 +#define vxd_assert_lock(l) do { } while (0)
8708 +#define vxd_assert(c, f, x...) do { } while (0)
8712 +#endif /* _VSERVER_DEBUG_H */
8713 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/device_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/device_cmd.h
8714 --- linux-4.9.217/include/linux/vserver/device_cmd.h 1970-01-01 00:00:00.000000000 +0000
8715 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/device_cmd.h 2018-10-20 04:58:14.000000000 +0000
8717 +#ifndef _VSERVER_DEVICE_CMD_H
8718 +#define _VSERVER_DEVICE_CMD_H
8720 +#include <uapi/vserver/device_cmd.h>
8723 +#ifdef CONFIG_COMPAT
8725 +#include <asm/compat.h>
8727 +struct vcmd_set_mapping_v0_x32 {
8728 + compat_uptr_t device_ptr;
8729 + compat_uptr_t target_ptr;
8733 +#endif /* CONFIG_COMPAT */
8735 +#include <linux/compiler.h>
8737 +extern int vc_set_mapping(struct vx_info *, void __user *);
8738 +extern int vc_unset_mapping(struct vx_info *, void __user *);
8740 +#ifdef CONFIG_COMPAT
8742 +extern int vc_set_mapping_x32(struct vx_info *, void __user *);
8743 +extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
8745 +#endif /* CONFIG_COMPAT */
8747 +#endif /* _VSERVER_DEVICE_CMD_H */
8748 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/device_def.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/device_def.h
8749 --- linux-4.9.217/include/linux/vserver/device_def.h 1970-01-01 00:00:00.000000000 +0000
8750 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/device_def.h 2018-10-20 04:58:14.000000000 +0000
8752 +#ifndef _VSERVER_DEVICE_DEF_H
8753 +#define _VSERVER_DEVICE_DEF_H
8755 +#include <linux/types.h>
8757 +struct vx_dmap_target {
8762 +struct _vx_device {
8763 +#ifdef CONFIG_VSERVER_DEVICE
8764 + struct vx_dmap_target targets[2];
8768 +#endif /* _VSERVER_DEVICE_DEF_H */
8769 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/device.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/device.h
8770 --- linux-4.9.217/include/linux/vserver/device.h 1970-01-01 00:00:00.000000000 +0000
8771 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/device.h 2018-10-20 04:58:14.000000000 +0000
8773 +#ifndef _VSERVER_DEVICE_H
8774 +#define _VSERVER_DEVICE_H
8777 +#include <uapi/vserver/device.h>
8779 +#else /* _VSERVER_DEVICE_H */
8780 +#warning duplicate inclusion
8781 +#endif /* _VSERVER_DEVICE_H */
8782 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/dlimit_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/dlimit_cmd.h
8783 --- linux-4.9.217/include/linux/vserver/dlimit_cmd.h 1970-01-01 00:00:00.000000000 +0000
8784 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/dlimit_cmd.h 2018-10-20 04:58:14.000000000 +0000
8786 +#ifndef _VSERVER_DLIMIT_CMD_H
8787 +#define _VSERVER_DLIMIT_CMD_H
8789 +#include <uapi/vserver/dlimit_cmd.h>
8792 +#ifdef CONFIG_COMPAT
8794 +#include <asm/compat.h>
8796 +struct vcmd_ctx_dlimit_base_v0_x32 {
8797 + compat_uptr_t name_ptr;
8801 +struct vcmd_ctx_dlimit_v0_x32 {
8802 + compat_uptr_t name_ptr;
8803 + uint32_t space_used; /* used space in kbytes */
8804 + uint32_t space_total; /* maximum space in kbytes */
8805 + uint32_t inodes_used; /* used inodes */
8806 + uint32_t inodes_total; /* maximum inodes */
8807 + uint32_t reserved; /* reserved for root in % */
8811 +#endif /* CONFIG_COMPAT */
8813 +#include <linux/compiler.h>
8815 +extern int vc_add_dlimit(uint32_t, void __user *);
8816 +extern int vc_rem_dlimit(uint32_t, void __user *);
8818 +extern int vc_set_dlimit(uint32_t, void __user *);
8819 +extern int vc_get_dlimit(uint32_t, void __user *);
8821 +#ifdef CONFIG_COMPAT
8823 +extern int vc_add_dlimit_x32(uint32_t, void __user *);
8824 +extern int vc_rem_dlimit_x32(uint32_t, void __user *);
8826 +extern int vc_set_dlimit_x32(uint32_t, void __user *);
8827 +extern int vc_get_dlimit_x32(uint32_t, void __user *);
8829 +#endif /* CONFIG_COMPAT */
8831 +#endif /* _VSERVER_DLIMIT_CMD_H */
8832 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/dlimit.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/dlimit.h
8833 --- linux-4.9.217/include/linux/vserver/dlimit.h 1970-01-01 00:00:00.000000000 +0000
8834 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/dlimit.h 2018-10-20 04:58:14.000000000 +0000
8836 +#ifndef _VSERVER_DLIMIT_H
8837 +#define _VSERVER_DLIMIT_H
8839 +#include "switch.h"
8844 +/* keep in sync with CDLIM_INFINITY */
8846 +#define DLIM_INFINITY (~0ULL)
8848 +#include <linux/spinlock.h>
8849 +#include <linux/rcupdate.h>
8851 +struct super_block;
8854 + struct hlist_node dl_hlist; /* linked list of contexts */
8855 + struct rcu_head dl_rcu; /* the rcu head */
8856 + vtag_t dl_tag; /* context tag */
8857 + atomic_t dl_usecnt; /* usage count */
8858 + atomic_t dl_refcnt; /* reference count */
8860 + struct super_block *dl_sb; /* associated superblock */
8862 + spinlock_t dl_lock; /* protect the values */
8864 + unsigned long long dl_space_used; /* used space in bytes */
8865 + unsigned long long dl_space_total; /* maximum space in bytes */
8866 + unsigned long dl_inodes_used; /* used inodes */
8867 + unsigned long dl_inodes_total; /* maximum inodes */
8869 + unsigned int dl_nrlmult; /* non root limit mult */
8874 +extern void rcu_free_dl_info(struct rcu_head *);
8875 +extern void unhash_dl_info(struct dl_info *);
8877 +extern struct dl_info *locate_dl_info(struct super_block *, vtag_t);
8882 +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
8884 +typedef uint64_t dlsize_t;
8886 +#endif /* __KERNEL__ */
8887 +#else /* _VSERVER_DLIMIT_H */
8888 +#warning duplicate inclusion
8889 +#endif /* _VSERVER_DLIMIT_H */
8890 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/global.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/global.h
8891 --- linux-4.9.217/include/linux/vserver/global.h 1970-01-01 00:00:00.000000000 +0000
8892 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/global.h 2018-10-20 04:58:14.000000000 +0000
8894 +#ifndef _VSERVER_GLOBAL_H
8895 +#define _VSERVER_GLOBAL_H
8898 +extern atomic_t vx_global_ctotal;
8899 +extern atomic_t vx_global_cactive;
8901 +extern atomic_t nx_global_ctotal;
8902 +extern atomic_t nx_global_cactive;
8904 +extern atomic_t vs_global_nsproxy;
8905 +extern atomic_t vs_global_fs;
8906 +extern atomic_t vs_global_mnt_ns;
8907 +extern atomic_t vs_global_uts_ns;
8908 +extern atomic_t vs_global_ipc_ns;
8909 +extern atomic_t vs_global_user_ns;
8910 +extern atomic_t vs_global_pid_ns;
8913 +#endif /* _VSERVER_GLOBAL_H */
8914 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/history.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/history.h
8915 --- linux-4.9.217/include/linux/vserver/history.h 1970-01-01 00:00:00.000000000 +0000
8916 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/history.h 2018-10-20 04:58:14.000000000 +0000
8918 +#ifndef _VSERVER_HISTORY_H
8919 +#define _VSERVER_HISTORY_H
8924 + VXH_THROW_OOPS = 1,
8931 + VXH_CLAIM_VX_INFO,
8932 + VXH_RELEASE_VX_INFO,
8933 + VXH_ALLOC_VX_INFO,
8934 + VXH_DEALLOC_VX_INFO,
8936 + VXH_UNHASH_VX_INFO,
8938 + VXH_LOOKUP_VX_INFO,
8939 + VXH_CREATE_VX_INFO,
8943 + struct vx_info *ptr;
8949 +struct _vxhe_set_clr {
8953 +struct _vxhe_loc_lookup {
8957 +struct _vx_hist_entry {
8959 + unsigned short seq;
8960 + unsigned short type;
8961 + struct _vxhe_vxi vxi;
8963 + struct _vxhe_set_clr sc;
8964 + struct _vxhe_loc_lookup ll;
8968 +#ifdef CONFIG_VSERVER_HISTORY
8970 +extern unsigned volatile int vxh_active;
8972 +struct _vx_hist_entry *vxh_advance(void *loc);
8976 +void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
8978 + entry->vxi.ptr = vxi;
8980 + entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
8981 + entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
8982 + entry->vxi.xid = vxi->vx_id;
8987 +#define __HERE__ current_text_addr()
8989 +#define __VXH_BODY(__type, __data, __here) \
8990 + struct _vx_hist_entry *entry; \
8992 + preempt_disable(); \
8993 + entry = vxh_advance(__here); \
8995 + entry->type = __type; \
8999 + /* pass vxi only */
9001 +#define __VXH_SMPL \
9002 + __vxh_copy_vxi(entry, vxi)
9005 +void __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
9007 + __VXH_BODY(__type, __VXH_SMPL, __here)
9010 + /* pass vxi and data (void *) */
9012 +#define __VXH_DATA \
9013 + __vxh_copy_vxi(entry, vxi); \
9014 + entry->sc.data = data
9017 +void __vxh_data(struct vx_info *vxi, void *data,
9018 + int __type, void *__here)
9020 + __VXH_BODY(__type, __VXH_DATA, __here)
9023 + /* pass vxi and arg (long) */
9025 +#define __VXH_LONG \
9026 + __vxh_copy_vxi(entry, vxi); \
9027 + entry->ll.arg = arg
9030 +void __vxh_long(struct vx_info *vxi, long arg,
9031 + int __type, void *__here)
9033 + __VXH_BODY(__type, __VXH_LONG, __here)
9038 +void __vxh_throw_oops(void *__here)
9040 + __VXH_BODY(VXH_THROW_OOPS, {}, __here);
9041 + /* prevent further acquisition */
9046 +#define vxh_throw_oops() __vxh_throw_oops(__HERE__);
9048 +#define __vxh_get_vx_info(v, h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
9049 +#define __vxh_put_vx_info(v, h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
9051 +#define __vxh_init_vx_info(v, d, h) \
9052 + __vxh_data(v, d, VXH_INIT_VX_INFO, h);
9053 +#define __vxh_set_vx_info(v, d, h) \
9054 + __vxh_data(v, d, VXH_SET_VX_INFO, h);
9055 +#define __vxh_clr_vx_info(v, d, h) \
9056 + __vxh_data(v, d, VXH_CLR_VX_INFO, h);
9058 +#define __vxh_claim_vx_info(v, d, h) \
9059 + __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
9060 +#define __vxh_release_vx_info(v, d, h) \
9061 + __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
9063 +#define vxh_alloc_vx_info(v) \
9064 + __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
9065 +#define vxh_dealloc_vx_info(v) \
9066 + __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
9068 +#define vxh_hash_vx_info(v) \
9069 + __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
9070 +#define vxh_unhash_vx_info(v) \
9071 + __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
9073 +#define vxh_loc_vx_info(v, l) \
9074 + __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
9075 +#define vxh_lookup_vx_info(v, l) \
9076 + __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
9077 +#define vxh_create_vx_info(v, l) \
9078 + __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
9080 +extern void vxh_dump_history(void);
9083 +#else /* CONFIG_VSERVER_HISTORY */
9087 +#define vxh_throw_oops() do { } while (0)
9089 +#define __vxh_get_vx_info(v, h) do { } while (0)
9090 +#define __vxh_put_vx_info(v, h) do { } while (0)
9092 +#define __vxh_init_vx_info(v, d, h) do { } while (0)
9093 +#define __vxh_set_vx_info(v, d, h) do { } while (0)
9094 +#define __vxh_clr_vx_info(v, d, h) do { } while (0)
9096 +#define __vxh_claim_vx_info(v, d, h) do { } while (0)
9097 +#define __vxh_release_vx_info(v, d, h) do { } while (0)
9099 +#define vxh_alloc_vx_info(v) do { } while (0)
9100 +#define vxh_dealloc_vx_info(v) do { } while (0)
9102 +#define vxh_hash_vx_info(v) do { } while (0)
9103 +#define vxh_unhash_vx_info(v) do { } while (0)
9105 +#define vxh_loc_vx_info(v, l) do { } while (0)
9106 +#define vxh_lookup_vx_info(v, l) do { } while (0)
9107 +#define vxh_create_vx_info(v, l) do { } while (0)
9109 +#define vxh_dump_history() do { } while (0)
9112 +#endif /* CONFIG_VSERVER_HISTORY */
9114 +#endif /* _VSERVER_HISTORY_H */
9115 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/inode_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/inode_cmd.h
9116 --- linux-4.9.217/include/linux/vserver/inode_cmd.h 1970-01-01 00:00:00.000000000 +0000
9117 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/inode_cmd.h 2018-10-20 04:58:14.000000000 +0000
9119 +#ifndef _VSERVER_INODE_CMD_H
9120 +#define _VSERVER_INODE_CMD_H
9122 +#include <uapi/vserver/inode_cmd.h>
9126 +#ifdef CONFIG_COMPAT
9128 +#include <asm/compat.h>
9130 +struct vcmd_ctx_iattr_v1_x32 {
9131 + compat_uptr_t name_ptr;
9137 +#endif /* CONFIG_COMPAT */
9139 +#include <linux/compiler.h>
9141 +extern int vc_get_iattr(void __user *);
9142 +extern int vc_set_iattr(void __user *);
9144 +extern int vc_fget_iattr(uint32_t, void __user *);
9145 +extern int vc_fset_iattr(uint32_t, void __user *);
9147 +#ifdef CONFIG_COMPAT
9149 +extern int vc_get_iattr_x32(void __user *);
9150 +extern int vc_set_iattr_x32(void __user *);
9152 +#endif /* CONFIG_COMPAT */
9154 +#endif /* _VSERVER_INODE_CMD_H */
9155 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/inode.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/inode.h
9156 --- linux-4.9.217/include/linux/vserver/inode.h 1970-01-01 00:00:00.000000000 +0000
9157 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/inode.h 2018-10-20 04:58:14.000000000 +0000
9159 +#ifndef _VSERVER_INODE_H
9160 +#define _VSERVER_INODE_H
9162 +#include <uapi/vserver/inode.h>
9165 +#ifdef CONFIG_VSERVER_PROC_SECURE
9166 +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
9167 +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
9169 +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
9170 +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
9173 +#define vx_hide_check(c, m) (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
9175 +#else /* _VSERVER_INODE_H */
9176 +#warning duplicate inclusion
9177 +#endif /* _VSERVER_INODE_H */
9178 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/limit_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit_cmd.h
9179 --- linux-4.9.217/include/linux/vserver/limit_cmd.h 1970-01-01 00:00:00.000000000 +0000
9180 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit_cmd.h 2018-10-20 04:58:14.000000000 +0000
9182 +#ifndef _VSERVER_LIMIT_CMD_H
9183 +#define _VSERVER_LIMIT_CMD_H
9185 +#include <uapi/vserver/limit_cmd.h>
9188 +#ifdef CONFIG_IA32_EMULATION
9190 +struct vcmd_ctx_rlimit_v0_x32 {
9193 + uint64_t softlimit;
9195 +} __attribute__ ((packed));
9197 +#endif /* CONFIG_IA32_EMULATION */
9199 +#include <linux/compiler.h>
9201 +extern int vc_get_rlimit_mask(uint32_t, void __user *);
9202 +extern int vc_get_rlimit(struct vx_info *, void __user *);
9203 +extern int vc_set_rlimit(struct vx_info *, void __user *);
9204 +extern int vc_reset_hits(struct vx_info *, void __user *);
9205 +extern int vc_reset_minmax(struct vx_info *, void __user *);
9207 +extern int vc_rlimit_stat(struct vx_info *, void __user *);
9209 +#ifdef CONFIG_IA32_EMULATION
9211 +extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
9212 +extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
9214 +#endif /* CONFIG_IA32_EMULATION */
9216 +#endif /* _VSERVER_LIMIT_CMD_H */
9217 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/limit_def.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit_def.h
9218 --- linux-4.9.217/include/linux/vserver/limit_def.h 1970-01-01 00:00:00.000000000 +0000
9219 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit_def.h 2018-10-20 04:58:14.000000000 +0000
9221 +#ifndef _VSERVER_LIMIT_DEF_H
9222 +#define _VSERVER_LIMIT_DEF_H
9224 +#include <asm/atomic.h>
9225 +#include <asm/resource.h>
9230 +struct _vx_res_limit {
9231 + rlim_t soft; /* Context soft limit */
9232 + rlim_t hard; /* Context hard limit */
9234 + rlim_atomic_t rcur; /* Current value */
9235 + rlim_t rmin; /* Context minimum */
9236 + rlim_t rmax; /* Context maximum */
9238 + atomic_t lhit; /* Limit hits */
9241 +/* context sub struct */
9244 + struct _vx_res_limit res[NUM_LIMITS];
9247 +#ifdef CONFIG_VSERVER_DEBUG
9249 +static inline void __dump_vx_limit(struct _vx_limit *limit)
9253 + printk("\t_vx_limit:");
9254 + for (i = 0; i < NUM_LIMITS; i++) {
9255 + printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
9256 + i, (unsigned long)__rlim_get(limit, i),
9257 + (unsigned long)__rlim_rmin(limit, i),
9258 + (unsigned long)__rlim_rmax(limit, i),
9259 + (long)__rlim_soft(limit, i),
9260 + (long)__rlim_hard(limit, i),
9261 + atomic_read(&__rlim_lhit(limit, i)));
9267 +#endif /* _VSERVER_LIMIT_DEF_H */
9268 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/limit.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit.h
9269 --- linux-4.9.217/include/linux/vserver/limit.h 1970-01-01 00:00:00.000000000 +0000
9270 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit.h 2018-10-20 04:58:14.000000000 +0000
9272 +#ifndef _VSERVER_LIMIT_H
9273 +#define _VSERVER_LIMIT_H
9275 +#include <uapi/vserver/limit.h>
9278 +#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
9280 +/* keep in sync with CRLIM_INFINITY */
9282 +#define VLIM_INFINITY (~0ULL)
9284 +#include <asm/atomic.h>
9285 +#include <asm/resource.h>
9287 +#ifndef RLIM_INFINITY
9288 +#warning RLIM_INFINITY is undefined
9291 +#define __rlim_val(l, r, v) ((l)->res[r].v)
9293 +#define __rlim_soft(l, r) __rlim_val(l, r, soft)
9294 +#define __rlim_hard(l, r) __rlim_val(l, r, hard)
9296 +#define __rlim_rcur(l, r) __rlim_val(l, r, rcur)
9297 +#define __rlim_rmin(l, r) __rlim_val(l, r, rmin)
9298 +#define __rlim_rmax(l, r) __rlim_val(l, r, rmax)
9300 +#define __rlim_lhit(l, r) __rlim_val(l, r, lhit)
9301 +#define __rlim_hit(l, r) atomic_inc(&__rlim_lhit(l, r))
9303 +typedef atomic_long_t rlim_atomic_t;
9304 +typedef unsigned long rlim_t;
9306 +#define __rlim_get(l, r) atomic_long_read(&__rlim_rcur(l, r))
9307 +#define __rlim_set(l, r, v) atomic_long_set(&__rlim_rcur(l, r), v)
9308 +#define __rlim_inc(l, r) atomic_long_inc(&__rlim_rcur(l, r))
9309 +#define __rlim_dec(l, r) atomic_long_dec(&__rlim_rcur(l, r))
9310 +#define __rlim_add(l, r, v) atomic_long_add(v, &__rlim_rcur(l, r))
9311 +#define __rlim_sub(l, r, v) atomic_long_sub(v, &__rlim_rcur(l, r))
9314 +#if (RLIM_INFINITY == VLIM_INFINITY)
9315 +#define VX_VLIM(r) ((long long)(long)(r))
9316 +#define VX_RLIM(v) ((rlim_t)(v))
9318 +#define VX_VLIM(r) (((r) == RLIM_INFINITY) \
9319 + ? VLIM_INFINITY : (long long)(r))
9320 +#define VX_RLIM(v) (((v) == VLIM_INFINITY) \
9321 + ? RLIM_INFINITY : (rlim_t)(v))
9326 +#ifdef CONFIG_MEMCG
9327 +void vx_vsi_meminfo(struct sysinfo *);
9328 +void vx_vsi_swapinfo(struct sysinfo *);
9329 +long vx_vsi_cached(struct sysinfo *);
9330 +#else /* !CONFIG_MEMCG */
9331 +#define vx_vsi_meminfo(s) do { } while (0)
9332 +#define vx_vsi_swapinfo(s) do { } while (0)
9333 +#define vx_vsi_cached(s) (0L)
9334 +#endif /* !CONFIG_MEMCG */
9336 +#define NUM_LIMITS 24
9338 +#endif /* _VSERVER_LIMIT_H */
9339 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/limit_int.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit_int.h
9340 --- linux-4.9.217/include/linux/vserver/limit_int.h 1970-01-01 00:00:00.000000000 +0000
9341 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/limit_int.h 2018-10-20 04:58:14.000000000 +0000
9343 +#ifndef _VSERVER_LIMIT_INT_H
9344 +#define _VSERVER_LIMIT_INT_H
9346 +#define VXD_RCRES_COND(r) VXD_CBIT(cres, r)
9347 +#define VXD_RLIMIT_COND(r) VXD_CBIT(limit, r)
9349 +extern const char *vlimit_name[NUM_LIMITS];
9351 +static inline void __vx_acc_cres(struct vx_info *vxi,
9352 + int res, int dir, void *_data, char *_file, int _line)
9354 + if (VXD_RCRES_COND(res))
9355 + vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
9356 + (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
9357 + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
9358 + (dir > 0) ? "++" : "--", _data, _file, _line);
9363 + __rlim_inc(&vxi->limit, res);
9365 + __rlim_dec(&vxi->limit, res);
9368 +static inline void __vx_add_cres(struct vx_info *vxi,
9369 + int res, int amount, void *_data, char *_file, int _line)
9371 + if (VXD_RCRES_COND(res))
9372 + vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
9373 + (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
9374 + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
9375 + amount, _data, _file, _line);
9380 + __rlim_add(&vxi->limit, res, amount);
9384 +int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
9386 + int cond = (value > __rlim_rmax(limit, res));
9389 + __rlim_rmax(limit, res) = value;
9394 +int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
9396 + int cond = (value < __rlim_rmin(limit, res));
9399 + __rlim_rmin(limit, res) = value;
9404 +void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
9406 + if (!__vx_cres_adjust_max(limit, res, value))
9407 + __vx_cres_adjust_min(limit, res, value);
9412 + +1 ... no limit hit
9413 + -1 ... over soft limit
9414 + 0 ... over hard limit */
9416 +static inline int __vx_cres_avail(struct vx_info *vxi,
9417 + int res, int num, char *_file, int _line)
9419 + struct _vx_limit *limit;
9422 + if (VXD_RLIMIT_COND(res))
9423 + vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
9424 + (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
9425 + (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
9426 + (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
9427 + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
9428 + num, _file, _line);
9432 + limit = &vxi->limit;
9433 + value = __rlim_get(limit, res);
9435 + if (!__vx_cres_adjust_max(limit, res, value))
9436 + __vx_cres_adjust_min(limit, res, value);
9441 + if (__rlim_soft(limit, res) == RLIM_INFINITY)
9443 + if (value + num <= __rlim_soft(limit, res))
9446 + if (__rlim_hard(limit, res) == RLIM_INFINITY)
9448 + if (value + num <= __rlim_hard(limit, res))
9451 + __rlim_hit(limit, res);
9456 +static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
9459 +rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
9461 + rlim_t value, sum = 0;
9464 + while ((res = *array++)) {
9465 + value = __rlim_get(limit, res);
9466 + __vx_cres_fixup(limit, res, value);
9473 +rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
9475 + rlim_t value = __vx_cres_array_sum(limit, array + 1);
9478 + if (value == __rlim_get(limit, res))
9481 + __rlim_set(limit, res, value);
9482 + /* now adjust min/max */
9483 + if (!__vx_cres_adjust_max(limit, res, value))
9484 + __vx_cres_adjust_min(limit, res, value);
9489 +static inline int __vx_cres_array_avail(struct vx_info *vxi,
9490 + const int *array, int num, char *_file, int _line)
9492 + struct _vx_limit *limit;
9501 + limit = &vxi->limit;
9503 + value = __vx_cres_array_sum(limit, array + 1);
9505 + __rlim_set(limit, res, value);
9506 + __vx_cres_fixup(limit, res, value);
9508 + return __vx_cres_avail(vxi, res, num, _file, _line);
9512 +static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
9517 + /* complex resources first */
9518 + if ((id < 0) || (id == RLIMIT_RSS))
9519 + __vx_cres_array_fixup(limit, VLA_RSS);
9521 + for (res = 0; res < NUM_LIMITS; res++) {
9522 + if ((id > 0) && (res != id))
9525 + value = __rlim_get(limit, res);
9526 + __vx_cres_fixup(limit, res, value);
9528 + /* not supposed to happen, maybe warn? */
9529 + if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
9530 + __rlim_rmax(limit, res) = __rlim_hard(limit, res);
9535 +#endif /* _VSERVER_LIMIT_INT_H */
9536 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/monitor.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/monitor.h
9537 --- linux-4.9.217/include/linux/vserver/monitor.h 1970-01-01 00:00:00.000000000 +0000
9538 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/monitor.h 2018-10-20 04:58:14.000000000 +0000
9540 +#ifndef _VSERVER_MONITOR_H
9541 +#define _VSERVER_MONITOR_H
9543 +#include <uapi/vserver/monitor.h>
9545 +#endif /* _VSERVER_MONITOR_H */
9546 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/network_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/network_cmd.h
9547 --- linux-4.9.217/include/linux/vserver/network_cmd.h 1970-01-01 00:00:00.000000000 +0000
9548 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/network_cmd.h 2018-10-20 04:58:14.000000000 +0000
9550 +#ifndef _VSERVER_NETWORK_CMD_H
9551 +#define _VSERVER_NETWORK_CMD_H
9553 +#include <uapi/vserver/network_cmd.h>
9555 +extern int vc_task_nid(uint32_t);
9557 +extern int vc_nx_info(struct nx_info *, void __user *);
9559 +extern int vc_net_create(uint32_t, void __user *);
9560 +extern int vc_net_migrate(struct nx_info *, void __user *);
9562 +extern int vc_net_add(struct nx_info *, void __user *);
9563 +extern int vc_net_remove(struct nx_info *, void __user *);
9565 +extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
9566 +extern int vc_net_add_ipv4(struct nx_info *, void __user *);
9568 +extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
9569 +extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
9571 +extern int vc_net_add_ipv6(struct nx_info *, void __user *);
9572 +extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
9574 +extern int vc_add_match_ipv4(struct nx_info *, void __user *);
9575 +extern int vc_get_match_ipv4(struct nx_info *, void __user *);
9577 +extern int vc_add_match_ipv6(struct nx_info *, void __user *);
9578 +extern int vc_get_match_ipv6(struct nx_info *, void __user *);
9580 +extern int vc_get_nflags(struct nx_info *, void __user *);
9581 +extern int vc_set_nflags(struct nx_info *, void __user *);
9583 +extern int vc_get_ncaps(struct nx_info *, void __user *);
9584 +extern int vc_set_ncaps(struct nx_info *, void __user *);
9586 +#endif /* _VSERVER_CONTEXT_CMD_H */
9587 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/network.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/network.h
9588 --- linux-4.9.217/include/linux/vserver/network.h 1970-01-01 00:00:00.000000000 +0000
9589 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/network.h 2018-10-20 04:58:14.000000000 +0000
9591 +#ifndef _VSERVER_NETWORK_H
9592 +#define _VSERVER_NETWORK_H
9595 +#include <linux/list.h>
9596 +#include <linux/spinlock.h>
9597 +#include <linux/rcupdate.h>
9598 +#include <linux/in.h>
9599 +#include <linux/in6.h>
9600 +#include <asm/atomic.h>
9601 +#include <uapi/vserver/network.h>
9603 +struct nx_addr_v4 {
9604 + struct nx_addr_v4 *next;
9605 + struct in_addr ip[2];
9606 + struct in_addr mask;
9611 +struct nx_addr_v6 {
9612 + struct nx_addr_v6 *next;
9613 + struct in6_addr ip;
9614 + struct in6_addr mask;
9621 + struct hlist_node nx_hlist; /* linked list of nxinfos */
9622 + vnid_t nx_id; /* vnet id */
9623 + atomic_t nx_usecnt; /* usage count */
9624 + atomic_t nx_tasks; /* tasks count */
9625 + int nx_state; /* context state */
9627 + uint64_t nx_flags; /* network flag word */
9628 + uint64_t nx_ncaps; /* network capabilities */
9630 + spinlock_t addr_lock; /* protect address changes */
9631 + struct in_addr v4_lback; /* Loopback address */
9632 + struct in_addr v4_bcast; /* Broadcast address */
9633 + struct nx_addr_v4 v4; /* First/Single ipv4 address */
9635 + struct nx_addr_v6 v6; /* First/Single ipv6 address */
9637 + char nx_name[65]; /* network context name */
9643 +#define NXS_HASHED 0x0001
9644 +#define NXS_SHUTDOWN 0x0100
9645 +#define NXS_RELEASED 0x8000
9647 +extern struct nx_info *lookup_nx_info(int);
9649 +extern int get_nid_list(int, unsigned int *, int);
9650 +extern int nid_is_hashed(vnid_t);
9652 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
9654 +extern long vs_net_change(struct nx_info *, unsigned int);
9659 +#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
9661 +#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
9663 +#define NX_IPV6(n) (0)
9666 +#endif /* _VSERVER_NETWORK_H */
9667 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/percpu.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/percpu.h
9668 --- linux-4.9.217/include/linux/vserver/percpu.h 1970-01-01 00:00:00.000000000 +0000
9669 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/percpu.h 2018-10-20 04:58:14.000000000 +0000
9671 +#ifndef _VSERVER_PERCPU_H
9672 +#define _VSERVER_PERCPU_H
9674 +#include "cvirt_def.h"
9675 +#include "sched_def.h"
9677 +struct _vx_percpu {
9678 + struct _vx_cvirt_pc cvirt;
9679 + struct _vx_sched_pc sched;
9682 +#define PERCPU_PERCTX (sizeof(struct _vx_percpu))
9684 +#endif /* _VSERVER_PERCPU_H */
9685 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/pid.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/pid.h
9686 --- linux-4.9.217/include/linux/vserver/pid.h 1970-01-01 00:00:00.000000000 +0000
9687 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/pid.h 2018-10-20 04:58:14.000000000 +0000
9689 +#ifndef _VSERVER_PID_H
9690 +#define _VSERVER_PID_H
9692 +/* pid faking stuff */
9694 +#define vx_info_map_pid(v, p) \
9695 + __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
9696 +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
9697 +#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
9698 +#define vx_map_tgid(p) vx_map_pid(p)
9700 +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
9701 + const char *func, const char *file, int line)
9703 + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
9704 + vxfprintk(VXD_CBIT(cvirt, 2),
9705 + "vx_map_tgid: %p/%llx: %d -> %d",
9706 + vxi, (long long)vxi->vx_flags, pid,
9707 + (pid && pid == vxi->vx_initpid) ? 1 : pid,
9708 + func, file, line);
9711 + if (pid == vxi->vx_initpid)
9717 +#define vx_info_rmap_pid(v, p) \
9718 + __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
9719 +#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
9720 +#define vx_rmap_tgid(p) vx_rmap_pid(p)
9722 +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
9723 + const char *func, const char *file, int line)
9725 + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
9726 + vxfprintk(VXD_CBIT(cvirt, 2),
9727 + "vx_rmap_tgid: %p/%llx: %d -> %d",
9728 + vxi, (long long)vxi->vx_flags, pid,
9729 + (pid == 1) ? vxi->vx_initpid : pid,
9730 + func, file, line);
9731 + if ((pid == 1) && vxi->vx_initpid)
9732 + return vxi->vx_initpid;
9733 + if (pid == vxi->vx_initpid)
9740 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/sched_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/sched_cmd.h
9741 --- linux-4.9.217/include/linux/vserver/sched_cmd.h 1970-01-01 00:00:00.000000000 +0000
9742 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/sched_cmd.h 2018-10-20 04:58:14.000000000 +0000
9744 +#ifndef _VSERVER_SCHED_CMD_H
9745 +#define _VSERVER_SCHED_CMD_H
9748 +#include <linux/compiler.h>
9749 +#include <uapi/vserver/sched_cmd.h>
9751 +extern int vc_set_prio_bias(struct vx_info *, void __user *);
9752 +extern int vc_get_prio_bias(struct vx_info *, void __user *);
9754 +#endif /* _VSERVER_SCHED_CMD_H */
9755 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/sched_def.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/sched_def.h
9756 --- linux-4.9.217/include/linux/vserver/sched_def.h 1970-01-01 00:00:00.000000000 +0000
9757 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/sched_def.h 2018-10-20 04:58:14.000000000 +0000
9759 +#ifndef _VSERVER_SCHED_DEF_H
9760 +#define _VSERVER_SCHED_DEF_H
9762 +#include <linux/spinlock.h>
9763 +#include <linux/jiffies.h>
9764 +#include <linux/cpumask.h>
9765 +#include <asm/atomic.h>
9766 +#include <asm/param.h>
9769 +/* context sub struct */
9772 + int prio_bias; /* bias offset for priority */
9774 + cpumask_t update; /* CPUs which should update */
9777 +struct _vx_sched_pc {
9778 + int prio_bias; /* bias offset for priority */
9780 + uint64_t user_ticks; /* token tick events */
9781 + uint64_t sys_ticks; /* token tick events */
9782 + uint64_t hold_ticks; /* token ticks paused */
9786 +#ifdef CONFIG_VSERVER_DEBUG
9788 +static inline void __dump_vx_sched(struct _vx_sched *sched)
9790 + printk("\t_vx_sched:\n");
9791 + printk("\t priority = %4d\n", sched->prio_bias);
9796 +#endif /* _VSERVER_SCHED_DEF_H */
9797 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/sched.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/sched.h
9798 --- linux-4.9.217/include/linux/vserver/sched.h 1970-01-01 00:00:00.000000000 +0000
9799 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/sched.h 2018-10-20 04:58:14.000000000 +0000
9801 +#ifndef _VSERVER_SCHED_H
9802 +#define _VSERVER_SCHED_H
9809 +void vx_vsi_uptime(struct timespec *, struct timespec *);
9814 +void vx_update_load(struct vx_info *);
9817 +void vx_update_sched_param(struct _vx_sched *sched,
9818 + struct _vx_sched_pc *sched_pc);
9820 +#endif /* __KERNEL__ */
9821 +#else /* _VSERVER_SCHED_H */
9822 +#warning duplicate inclusion
9823 +#endif /* _VSERVER_SCHED_H */
9824 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/signal_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/signal_cmd.h
9825 --- linux-4.9.217/include/linux/vserver/signal_cmd.h 1970-01-01 00:00:00.000000000 +0000
9826 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/signal_cmd.h 2018-10-20 04:58:14.000000000 +0000
9828 +#ifndef _VSERVER_SIGNAL_CMD_H
9829 +#define _VSERVER_SIGNAL_CMD_H
9831 +#include <uapi/vserver/signal_cmd.h>
9834 +extern int vc_ctx_kill(struct vx_info *, void __user *);
9835 +extern int vc_wait_exit(struct vx_info *, void __user *);
9838 +extern int vc_get_pflags(uint32_t pid, void __user *);
9839 +extern int vc_set_pflags(uint32_t pid, void __user *);
9841 +#endif /* _VSERVER_SIGNAL_CMD_H */
9842 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/signal.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/signal.h
9843 --- linux-4.9.217/include/linux/vserver/signal.h 1970-01-01 00:00:00.000000000 +0000
9844 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/signal.h 2018-10-20 04:58:14.000000000 +0000
9846 +#ifndef _VSERVER_SIGNAL_H
9847 +#define _VSERVER_SIGNAL_H
9854 +int vx_info_kill(struct vx_info *, int, int);
9856 +#endif /* __KERNEL__ */
9857 +#else /* _VSERVER_SIGNAL_H */
9858 +#warning duplicate inclusion
9859 +#endif /* _VSERVER_SIGNAL_H */
9860 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/space_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/space_cmd.h
9861 --- linux-4.9.217/include/linux/vserver/space_cmd.h 1970-01-01 00:00:00.000000000 +0000
9862 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/space_cmd.h 2018-10-20 04:58:14.000000000 +0000
9864 +#ifndef _VSERVER_SPACE_CMD_H
9865 +#define _VSERVER_SPACE_CMD_H
9867 +#include <uapi/vserver/space_cmd.h>
9870 +extern int vc_enter_space_v1(struct vx_info *, void __user *);
9871 +extern int vc_set_space_v1(struct vx_info *, void __user *);
9872 +extern int vc_enter_space(struct vx_info *, void __user *);
9873 +extern int vc_set_space(struct vx_info *, void __user *);
9874 +extern int vc_get_space_mask(void __user *, int);
9876 +#endif /* _VSERVER_SPACE_CMD_H */
9877 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/space.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/space.h
9878 --- linux-4.9.217/include/linux/vserver/space.h 1970-01-01 00:00:00.000000000 +0000
9879 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/space.h 2018-10-20 04:58:14.000000000 +0000
9881 +#ifndef _VSERVER_SPACE_H
9882 +#define _VSERVER_SPACE_H
9884 +#include <linux/types.h>
9888 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
9890 +#else /* _VSERVER_SPACE_H */
9891 +#warning duplicate inclusion
9892 +#endif /* _VSERVER_SPACE_H */
9893 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/switch.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/switch.h
9894 --- linux-4.9.217/include/linux/vserver/switch.h 1970-01-01 00:00:00.000000000 +0000
9895 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/switch.h 2018-10-20 04:58:14.000000000 +0000
9897 +#ifndef _VSERVER_SWITCH_H
9898 +#define _VSERVER_SWITCH_H
9901 +#include <linux/errno.h>
9902 +#include <uapi/vserver/switch.h>
9904 +#endif /* _VSERVER_SWITCH_H */
9905 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/tag_cmd.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/tag_cmd.h
9906 --- linux-4.9.217/include/linux/vserver/tag_cmd.h 1970-01-01 00:00:00.000000000 +0000
9907 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/tag_cmd.h 2018-10-20 04:58:14.000000000 +0000
9909 +#ifndef _VSERVER_TAG_CMD_H
9910 +#define _VSERVER_TAG_CMD_H
9912 +#include <uapi/vserver/tag_cmd.h>
9914 +extern int vc_task_tag(uint32_t);
9916 +extern int vc_tag_migrate(uint32_t);
9918 +#endif /* _VSERVER_TAG_CMD_H */
9919 diff -NurpP --minimal linux-4.9.217/include/linux/vserver/tag.h linux-4.9.217-vs2.3.9.12/include/linux/vserver/tag.h
9920 --- linux-4.9.217/include/linux/vserver/tag.h 1970-01-01 00:00:00.000000000 +0000
9921 +++ linux-4.9.217-vs2.3.9.12/include/linux/vserver/tag.h 2018-10-20 04:58:14.000000000 +0000
9926 +#include <linux/types.h>
9927 +#include <linux/uidgid.h>
9930 +#define DX_TAG(in) (IS_TAGGED(in))
9933 +#ifdef CONFIG_TAG_NFSD
9934 +#define DX_TAG_NFSD 1
9936 +#define DX_TAG_NFSD 0
9940 +#ifdef CONFIG_TAGGING_NONE
9942 +#define MAX_UID 0xFFFFFFFF
9943 +#define MAX_GID 0xFFFFFFFF
9945 +#define INOTAG_TAG(cond, uid, gid, tag) (0)
9947 +#define TAGINO_UID(cond, uid, tag) (uid)
9948 +#define TAGINO_GID(cond, gid, tag) (gid)
9953 +#ifdef CONFIG_TAGGING_GID16
9955 +#define MAX_UID 0xFFFFFFFF
9956 +#define MAX_GID 0x0000FFFF
9958 +#define INOTAG_TAG(cond, uid, gid, tag) \
9959 + ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
9961 +#define TAGINO_UID(cond, uid, tag) (uid)
9962 +#define TAGINO_GID(cond, gid, tag) \
9963 + ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
9968 +#ifdef CONFIG_TAGGING_ID24
9970 +#define MAX_UID 0x00FFFFFF
9971 +#define MAX_GID 0x00FFFFFF
9973 +#define INOTAG_TAG(cond, uid, gid, tag) \
9974 + ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
9976 +#define TAGINO_UID(cond, uid, tag) \
9977 + ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
9978 +#define TAGINO_GID(cond, gid, tag) \
9979 + ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
9984 +#ifdef CONFIG_TAGGING_UID16
9986 +#define MAX_UID 0x0000FFFF
9987 +#define MAX_GID 0xFFFFFFFF
9989 +#define INOTAG_TAG(cond, uid, gid, tag) \
9990 + ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
9992 +#define TAGINO_UID(cond, uid, tag) \
9993 + ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
9994 +#define TAGINO_GID(cond, gid, tag) (gid)
9999 +#ifdef CONFIG_TAGGING_INTERN
10001 +#define MAX_UID 0xFFFFFFFF
10002 +#define MAX_GID 0xFFFFFFFF
10004 +#define INOTAG_TAG(cond, uid, gid, tag) \
10005 + ((cond) ? (tag) : 0)
10007 +#define TAGINO_UID(cond, uid, tag) (uid)
10008 +#define TAGINO_GID(cond, gid, tag) (gid)
10013 +#ifndef CONFIG_TAGGING_NONE
10014 +#define dx_current_fstag(sb) \
10015 + ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
10017 +#define dx_current_fstag(sb) (0)
10020 +#ifndef CONFIG_TAGGING_INTERN
10021 +#define TAGINO_TAG(cond, tag) (0)
10023 +#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0)
10026 +#define TAGINO_KUID(cond, kuid, ktag) \
10027 + KUIDT_INIT(TAGINO_UID(cond, __kuid_val(kuid), __ktag_val(ktag)))
10028 +#define TAGINO_KGID(cond, kgid, ktag) \
10029 + KGIDT_INIT(TAGINO_GID(cond, __kgid_val(kgid), __ktag_val(ktag)))
10030 +#define TAGINO_KTAG(cond, ktag) \
10031 + KTAGT_INIT(TAGINO_TAG(cond, __ktag_val(ktag)))
10034 +#define INOTAG_UID(cond, uid, gid) \
10035 + ((cond) ? ((uid) & MAX_UID) : (uid))
10036 +#define INOTAG_GID(cond, uid, gid) \
10037 + ((cond) ? ((gid) & MAX_GID) : (gid))
10039 +#define INOTAG_KUID(cond, kuid, kgid) \
10040 + KUIDT_INIT(INOTAG_UID(cond, __kuid_val(kuid), __kgid_val(kgid)))
10041 +#define INOTAG_KGID(cond, kuid, kgid) \
10042 + KGIDT_INIT(INOTAG_GID(cond, __kuid_val(kuid), __kgid_val(kgid)))
10043 +#define INOTAG_KTAG(cond, kuid, kgid, ktag) \
10044 + KTAGT_INIT(INOTAG_TAG(cond, \
10045 + __kuid_val(kuid), __kgid_val(kgid), __ktag_val(ktag)))
10048 +static inline uid_t dx_map_uid(uid_t uid)
10050 + if ((uid > MAX_UID) && (uid != -1))
10052 + return (uid & MAX_UID);
10055 +static inline gid_t dx_map_gid(gid_t gid)
10057 + if ((gid > MAX_GID) && (gid != -1))
10059 + return (gid & MAX_GID);
10067 +#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
10069 +int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
10070 + unsigned long *flags);
10072 +#ifdef CONFIG_PROPAGATE
10074 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
10076 +#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
10079 +#define dx_propagate_tag(n, i) do { } while (0)
10082 +#endif /* _DX_TAG_H */
10083 diff -NurpP --minimal linux-4.9.217/include/linux/vs_inet6.h linux-4.9.217-vs2.3.9.12/include/linux/vs_inet6.h
10084 --- linux-4.9.217/include/linux/vs_inet6.h 1970-01-01 00:00:00.000000000 +0000
10085 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_inet6.h 2018-10-20 04:58:14.000000000 +0000
10087 +#ifndef _VS_INET6_H
10088 +#define _VS_INET6_H
10090 +#include "vserver/base.h"
10091 +#include "vserver/network.h"
10092 +#include "vserver/debug.h"
10094 +#include <net/ipv6.h>
10096 +#define NXAV6(a) &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
10097 +#define NXAV6_FMT "[%pI6/%pI6/%d:%04x]"
10100 +#ifdef CONFIG_IPV6
10103 +int v6_addr_match(struct nx_addr_v6 *nxa,
10104 + const struct in6_addr *addr, uint16_t mask)
10108 + switch (nxa->type & mask) {
10109 + case NXA_TYPE_MASK:
10110 + ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
10112 + case NXA_TYPE_ADDR:
10113 + ret = ipv6_addr_equal(&nxa->ip, addr);
10115 + case NXA_TYPE_ANY:
10119 + vxdprintk(VXD_CBIT(net, 0),
10120 + "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
10121 + nxa, NXAV6(nxa), addr, mask, ret);
10126 +int v6_addr_in_nx_info(struct nx_info *nxi,
10127 + const struct in6_addr *addr, uint16_t mask)
10129 + struct nx_addr_v6 *nxa;
10130 + unsigned long irqflags;
10136 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
10137 + for (nxa = &nxi->v6; nxa; nxa = nxa->next)
10138 + if (v6_addr_match(nxa, addr, mask))
10142 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
10144 + vxdprintk(VXD_CBIT(net, 0),
10145 + "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
10146 + nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
10151 +int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
10153 + /* FIXME: needs full range checks */
10154 + return v6_addr_match(nxa, &addr->ip, mask);
10158 +int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
10160 + struct nx_addr_v6 *ptr;
10161 + unsigned long irqflags;
10164 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
10165 + for (ptr = &nxi->v6; ptr; ptr = ptr->next)
10166 + if (v6_nx_addr_match(ptr, nxa, mask))
10170 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
10176 + * Check if a given address matches for a socket
10178 + * nxi: the socket's nx_info if any
10179 + * addr: to be verified address
10182 +int v6_sock_addr_match (
10183 + struct nx_info *nxi,
10184 + struct inet_sock *inet,
10185 + struct in6_addr *addr)
10187 + struct sock *sk = &inet->sk;
10188 + const struct in6_addr *saddr = inet6_rcv_saddr(sk);
10190 + if (!ipv6_addr_any(addr) &&
10191 + ipv6_addr_equal(saddr, addr))
10193 + if (ipv6_addr_any(saddr))
10194 + return v6_addr_in_nx_info(nxi, addr, -1);
10199 + * check if address is covered by socket
10201 + * sk: the socket to check against
10202 + * addr: the address in question (must be != 0)
10206 +int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
10208 + struct nx_info *nxi = sk->sk_nx_info;
10209 + const struct in6_addr *saddr = inet6_rcv_saddr(sk);
10211 + vxdprintk(VXD_CBIT(net, 5),
10212 + "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
10213 + sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
10214 + (sk->sk_socket?sk->sk_socket->flags:0));
10216 + if (!ipv6_addr_any(saddr)) { /* direct address match */
10217 + return v6_addr_match(nxa, saddr, -1);
10218 + } else if (nxi) { /* match against nx_info */
10219 + return v6_nx_addr_in_nx_info(nxi, nxa, -1);
10220 + } else { /* unrestricted any socket */
10226 +/* inet related checks and helpers */
10230 +struct net_device;
10234 +#include <linux/netdevice.h>
10235 +#include <linux/inetdevice.h>
10236 +#include <net/inet_timewait_sock.h>
10239 +int dev_in_nx_info(struct net_device *, struct nx_info *);
10240 +int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
10241 +int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
10246 +int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
10252 + return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
10256 +int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
10258 + vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
10259 + nxi, nxi ? nxi->nx_id : 0, ifa,
10260 + nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
10262 + if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
10264 + if (v6_ifa_in_nx_info(ifa, nxi))
10270 +struct nx_v6_sock_addr {
10271 + struct in6_addr saddr; /* Address used for validation */
10272 + struct in6_addr baddr; /* Address used for socket bind */
10276 +int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
10277 + struct nx_v6_sock_addr *nsa)
10279 + // struct sock *sk = &inet->sk;
10280 + // struct nx_info *nxi = sk->sk_nx_info;
10281 + struct in6_addr saddr = addr->sin6_addr;
10282 + struct in6_addr baddr = saddr;
10284 + nsa->saddr = saddr;
10285 + nsa->baddr = baddr;
10290 +void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
10292 + // struct sock *sk = &inet->sk;
10293 + // struct in6_addr *saddr = inet6_rcv_saddr(sk);
10295 + // *saddr = nsa->baddr;
10296 + // inet->inet_saddr = nsa->baddr;
10300 +int nx_info_has_v6(struct nx_info *nxi)
10304 + if (NX_IPV6(nxi))
10309 +#else /* CONFIG_IPV6 */
10312 +int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
10319 +int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
10325 +int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
10331 +int nx_info_has_v6(struct nx_info *nxi)
10337 +int v6_addr_in_nx_info(struct nx_info *nxi,
10338 + const struct in6_addr *addr, uint16_t mask)
10343 +#endif /* CONFIG_IPV6 */
10345 +#define current_nx_info_has_v6() \
10346 + nx_info_has_v6(current_nx_info())
10349 +#warning duplicate inclusion
10351 diff -NurpP --minimal linux-4.9.217/include/linux/vs_inet.h linux-4.9.217-vs2.3.9.12/include/linux/vs_inet.h
10352 --- linux-4.9.217/include/linux/vs_inet.h 1970-01-01 00:00:00.000000000 +0000
10353 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_inet.h 2018-10-20 04:58:14.000000000 +0000
10355 +#ifndef _VS_INET_H
10356 +#define _VS_INET_H
10358 +#include "vserver/base.h"
10359 +#include "vserver/network.h"
10360 +#include "vserver/debug.h"
10362 +#define IPI_LOOPBACK htonl(INADDR_LOOPBACK)
10364 +#define NXAV4(a) NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
10365 + NIPQUAD((a)->mask), (a)->type
10366 +#define NXAV4_FMT "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
10368 +#define NIPQUAD(addr) \
10369 + ((unsigned char *)&addr)[0], \
10370 + ((unsigned char *)&addr)[1], \
10371 + ((unsigned char *)&addr)[2], \
10372 + ((unsigned char *)&addr)[3]
10374 +#define NIPQUAD_FMT "%u.%u.%u.%u"
10378 +int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
10380 + __be32 ip = nxa->ip[0].s_addr;
10381 + __be32 mask = nxa->mask.s_addr;
10382 + __be32 bcast = ip | ~mask;
10385 + switch (nxa->type & tmask) {
10386 + case NXA_TYPE_MASK:
10387 + ret = (ip == (addr & mask));
10389 + case NXA_TYPE_ADDR:
10393 + /* fall through to broadcast */
10394 + case NXA_MOD_BCAST:
10395 + ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
10397 + case NXA_TYPE_RANGE:
10398 + ret = ((nxa->ip[0].s_addr <= addr) &&
10399 + (nxa->ip[1].s_addr > addr));
10401 + case NXA_TYPE_ANY:
10406 + vxdprintk(VXD_CBIT(net, 0),
10407 + "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
10408 + nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
10413 +int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
10415 + struct nx_addr_v4 *nxa;
10416 + unsigned long irqflags;
10423 + /* allow 127.0.0.1 when remapping lback */
10424 + if ((tmask & NXA_LOOPBACK) &&
10425 + (addr == IPI_LOOPBACK) &&
10426 + nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
10429 + /* check for lback address */
10430 + if ((tmask & NXA_MOD_LBACK) &&
10431 + (nxi->v4_lback.s_addr == addr))
10434 + /* check for broadcast address */
10435 + if ((tmask & NXA_MOD_BCAST) &&
10436 + (nxi->v4_bcast.s_addr == addr))
10440 + /* check for v4 addresses */
10441 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
10442 + for (nxa = &nxi->v4; nxa; nxa = nxa->next)
10443 + if (v4_addr_match(nxa, addr, tmask))
10447 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
10449 + vxdprintk(VXD_CBIT(net, 0),
10450 + "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
10451 + nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
10456 +int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
10458 + /* FIXME: needs full range checks */
10459 + return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
10463 +int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
10465 + struct nx_addr_v4 *ptr;
10466 + unsigned long irqflags;
10469 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
10470 + for (ptr = &nxi->v4; ptr; ptr = ptr->next)
10471 + if (v4_nx_addr_match(ptr, nxa, mask))
10475 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
10479 +#include <net/inet_sock.h>
10482 + * Check if a given address matches for a socket
10484 + * nxi: the socket's nx_info if any
10485 + * addr: to be verified address
10488 +int v4_sock_addr_match (
10489 + struct nx_info *nxi,
10490 + struct inet_sock *inet,
10493 + __be32 saddr = inet->inet_rcv_saddr;
10494 + __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
10496 + if (addr && (saddr == addr || bcast == addr))
10499 + return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
10504 +/* inet related checks and helpers */
10508 +struct net_device;
10511 +#ifdef CONFIG_INET
10513 +#include <linux/netdevice.h>
10514 +#include <linux/inetdevice.h>
10515 +#include <net/inet_sock.h>
10516 +#include <net/inet_timewait_sock.h>
10519 +int dev_in_nx_info(struct net_device *, struct nx_info *);
10520 +int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
10521 +int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
10525 + * check if address is covered by socket
10527 + * sk: the socket to check against
10528 + * addr: the address in question (must be != 0)
10532 +int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
10534 + struct nx_info *nxi = sk->sk_nx_info;
10535 + __be32 saddr = sk->sk_rcv_saddr;
10537 + vxdprintk(VXD_CBIT(net, 5),
10538 + "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
10539 + sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
10540 + (sk->sk_socket?sk->sk_socket->flags:0));
10542 + if (saddr) { /* direct address match */
10543 + return v4_addr_match(nxa, saddr, -1);
10544 + } else if (nxi) { /* match against nx_info */
10545 + return v4_nx_addr_in_nx_info(nxi, nxa, -1);
10546 + } else { /* unrestricted any socket */
10554 +int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
10556 + vxdprintk(VXD_CBIT(net, 1),
10557 + "nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
10558 + nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
10559 + nxi ? dev_in_nx_info(dev, nxi) : 0);
10561 + if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
10563 + if (dev_in_nx_info(dev, nxi))
10570 +int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
10576 + return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
10580 +int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
10582 + vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
10583 + nxi, nxi ? nxi->nx_id : 0, ifa,
10584 + nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
10586 + if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
10588 + if (v4_ifa_in_nx_info(ifa, nxi))
10594 +struct nx_v4_sock_addr {
10595 + __be32 saddr; /* Address used for validation */
10596 + __be32 baddr; /* Address used for socket bind */
10600 +int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
10601 + struct nx_v4_sock_addr *nsa)
10603 + struct sock *sk = &inet->sk;
10604 + struct nx_info *nxi = sk->sk_nx_info;
10605 + __be32 saddr = addr->sin_addr.s_addr;
10606 + __be32 baddr = saddr;
10608 + vxdprintk(VXD_CBIT(net, 3),
10609 + "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
10610 + sk, sk->sk_nx_info, sk->sk_socket,
10611 + (sk->sk_socket ? sk->sk_socket->flags : 0),
10615 + if (saddr == INADDR_ANY) {
10616 + if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
10617 + baddr = nxi->v4.ip[0].s_addr;
10618 + } else if (saddr == IPI_LOOPBACK) {
10619 + if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
10620 + baddr = nxi->v4_lback.s_addr;
10621 + } else if (!ipv4_is_multicast(saddr) ||
10622 + !nx_info_ncaps(nxi, NXC_MULTICAST)) {
10623 + /* normal address bind */
10624 + if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
10625 + return -EADDRNOTAVAIL;
10629 + vxdprintk(VXD_CBIT(net, 3),
10630 + "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
10631 + sk, NIPQUAD(saddr), NIPQUAD(baddr));
10633 + nsa->saddr = saddr;
10634 + nsa->baddr = baddr;
10639 +void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
10641 + inet->inet_saddr = nsa->baddr;
10642 + inet->inet_rcv_saddr = nsa->baddr;
10647 + * helper to simplify inet_lookup_listener
10649 + * nxi: the socket's nx_info if any
10650 + * addr: to be verified address
10651 + * saddr: socket address
10653 +static inline int v4_inet_addr_match (
10654 + struct nx_info *nxi,
10658 + if (addr && (saddr == addr))
10661 + return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
10665 +static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
10667 + if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
10668 + (addr == nxi->v4_lback.s_addr))
10669 + return IPI_LOOPBACK;
10674 +int nx_info_has_v4(struct nx_info *nxi)
10678 + if (NX_IPV4(nxi))
10680 + if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
10685 +#else /* CONFIG_INET */
10688 +int nx_dev_visible(struct nx_info *n, struct net_device *d)
10694 +int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
10700 +int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
10706 +int nx_info_has_v4(struct nx_info *nxi)
10711 +#endif /* CONFIG_INET */
10713 +#define current_nx_info_has_v4() \
10714 + nx_info_has_v4(current_nx_info())
10717 +// #warning duplicate inclusion
10719 diff -NurpP --minimal linux-4.9.217/include/linux/vs_limit.h linux-4.9.217-vs2.3.9.12/include/linux/vs_limit.h
10720 --- linux-4.9.217/include/linux/vs_limit.h 1970-01-01 00:00:00.000000000 +0000
10721 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_limit.h 2018-10-20 04:58:14.000000000 +0000
10723 +#ifndef _VS_LIMIT_H
10724 +#define _VS_LIMIT_H
10726 +#include "vserver/limit.h"
10727 +#include "vserver/base.h"
10728 +#include "vserver/context.h"
10729 +#include "vserver/debug.h"
10730 +#include "vserver/context.h"
10731 +#include "vserver/limit_int.h"
10734 +#define vx_acc_cres(v, d, p, r) \
10735 + __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
10737 +#define vx_acc_cres_cond(x, d, p, r) \
10738 + __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
10739 + r, d, p, __FILE__, __LINE__)
10742 +#define vx_add_cres(v, a, p, r) \
10743 + __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
10744 +#define vx_sub_cres(v, a, p, r) vx_add_cres(v, -(a), p, r)
10746 +#define vx_add_cres_cond(x, a, p, r) \
10747 + __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
10748 + r, a, p, __FILE__, __LINE__)
10749 +#define vx_sub_cres_cond(x, a, p, r) vx_add_cres_cond(x, -(a), p, r)
10752 +/* process and file limits */
10754 +#define vx_nproc_inc(p) \
10755 + vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
10757 +#define vx_nproc_dec(p) \
10758 + vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
10760 +#define vx_files_inc(f) \
10761 + vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
10763 +#define vx_files_dec(f) \
10764 + vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
10766 +#define vx_locks_inc(l) \
10767 + vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
10769 +#define vx_locks_dec(l) \
10770 + vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
10772 +#define vx_openfd_inc(f) \
10773 + vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
10775 +#define vx_openfd_dec(f) \
10776 + vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
10779 +#define vx_cres_avail(v, n, r) \
10780 + __vx_cres_avail(v, r, n, __FILE__, __LINE__)
10783 +#define vx_nproc_avail(n) \
10784 + vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
10786 +#define vx_files_avail(n) \
10787 + vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
10789 +#define vx_locks_avail(n) \
10790 + vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
10792 +#define vx_openfd_avail(n) \
10793 + vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
10796 +/* dentry limits */
10798 +#define vx_dentry_inc(d) do { \
10799 + if (d_count(d) == 1) \
10800 + vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY); \
10803 +#define vx_dentry_dec(d) do { \
10804 + if (d_count(d) == 0) \
10805 + vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY); \
10808 +#define vx_dentry_avail(n) \
10809 + vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
10812 +/* socket limits */
10814 +#define vx_sock_inc(s) \
10815 + vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
10817 +#define vx_sock_dec(s) \
10818 + vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
10820 +#define vx_sock_avail(n) \
10821 + vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
10824 +/* ipc resource limits */
10826 +#define vx_ipcmsg_add(v, u, a) \
10827 + vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
10829 +#define vx_ipcmsg_sub(v, u, a) \
10830 + vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
10832 +#define vx_ipcmsg_avail(v, a) \
10833 + vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
10836 +#define vx_ipcshm_add(v, k, a) \
10837 + vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
10839 +#define vx_ipcshm_sub(v, k, a) \
10840 + vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
10842 +#define vx_ipcshm_avail(v, a) \
10843 + vx_cres_avail(v, a, VLIMIT_SHMEM)
10846 +#define vx_semary_inc(a) \
10847 + vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
10849 +#define vx_semary_dec(a) \
10850 + vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
10853 +#define vx_nsems_add(a,n) \
10854 + vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
10856 +#define vx_nsems_sub(a,n) \
10857 + vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
10861 +#warning duplicate inclusion
10863 diff -NurpP --minimal linux-4.9.217/include/linux/vs_network.h linux-4.9.217-vs2.3.9.12/include/linux/vs_network.h
10864 --- linux-4.9.217/include/linux/vs_network.h 1970-01-01 00:00:00.000000000 +0000
10865 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_network.h 2018-10-20 04:58:14.000000000 +0000
10867 +#ifndef _NX_VS_NETWORK_H
10868 +#define _NX_VS_NETWORK_H
10870 +#include "vserver/context.h"
10871 +#include "vserver/network.h"
10872 +#include "vserver/base.h"
10873 +#include "vserver/check.h"
10874 +#include "vserver/debug.h"
10876 +#include <linux/sched.h>
10879 +#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
10881 +static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
10882 + const char *_file, int _line)
10887 + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
10888 + nxi, nxi ? nxi->nx_id : 0,
10889 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10892 + atomic_inc(&nxi->nx_usecnt);
10897 +extern void free_nx_info(struct nx_info *);
10899 +#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
10901 +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
10906 + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
10907 + nxi, nxi ? nxi->nx_id : 0,
10908 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10911 + if (atomic_dec_and_test(&nxi->nx_usecnt))
10912 + free_nx_info(nxi);
10916 +#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
10918 +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
10919 + const char *_file, int _line)
10922 + vxlprintk(VXD_CBIT(nid, 3),
10923 + "init_nx_info(%p[#%d.%d])",
10924 + nxi, nxi ? nxi->nx_id : 0,
10925 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10928 + atomic_inc(&nxi->nx_usecnt);
10934 +#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
10936 +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
10937 + const char *_file, int _line)
10939 + struct nx_info *nxo;
10944 + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
10945 + nxi, nxi ? nxi->nx_id : 0,
10946 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10949 + atomic_inc(&nxi->nx_usecnt);
10950 + nxo = xchg(nxp, nxi);
10954 +#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
10956 +static inline void __clr_nx_info(struct nx_info **nxp,
10957 + const char *_file, int _line)
10959 + struct nx_info *nxo;
10961 + nxo = xchg(nxp, NULL);
10965 + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
10966 + nxo, nxo ? nxo->nx_id : 0,
10967 + nxo ? atomic_read(&nxo->nx_usecnt) : 0,
10970 + if (atomic_dec_and_test(&nxo->nx_usecnt))
10971 + free_nx_info(nxo);
10975 +#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
10977 +static inline void __claim_nx_info(struct nx_info *nxi,
10978 + struct task_struct *task, const char *_file, int _line)
10980 + vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
10981 + nxi, nxi ? nxi->nx_id : 0,
10982 + nxi?atomic_read(&nxi->nx_usecnt):0,
10983 + nxi?atomic_read(&nxi->nx_tasks):0,
10984 + task, _file, _line);
10986 + atomic_inc(&nxi->nx_tasks);
10990 +extern void unhash_nx_info(struct nx_info *);
10992 +#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
10994 +static inline void __release_nx_info(struct nx_info *nxi,
10995 + struct task_struct *task, const char *_file, int _line)
10997 + vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
10998 + nxi, nxi ? nxi->nx_id : 0,
10999 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
11000 + nxi ? atomic_read(&nxi->nx_tasks) : 0,
11001 + task, _file, _line);
11005 + if (atomic_dec_and_test(&nxi->nx_tasks))
11006 + unhash_nx_info(nxi);
11010 +#define task_get_nx_info(i) __task_get_nx_info(i, __FILE__, __LINE__)
11012 +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
11013 + const char *_file, int _line)
11015 + struct nx_info *nxi;
11018 + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
11019 + p, _file, _line);
11020 + nxi = __get_nx_info(p->nx_info, _file, _line);
11026 +static inline void exit_nx_info(struct task_struct *p)
11029 + release_nx_info(p->nx_info, p);
11034 +#warning duplicate inclusion
11036 diff -NurpP --minimal linux-4.9.217/include/linux/vs_pid.h linux-4.9.217-vs2.3.9.12/include/linux/vs_pid.h
11037 --- linux-4.9.217/include/linux/vs_pid.h 1970-01-01 00:00:00.000000000 +0000
11038 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_pid.h 2018-10-20 04:58:14.000000000 +0000
11043 +#include "vserver/base.h"
11044 +#include "vserver/check.h"
11045 +#include "vserver/context.h"
11046 +#include "vserver/debug.h"
11047 +#include "vserver/pid.h"
11048 +#include <linux/pid_namespace.h>
11051 +#define VXF_FAKE_INIT (VXF_INFO_INIT | VXF_STATE_INIT)
11054 +int vx_proc_task_visible(struct task_struct *task)
11056 + if ((task->pid == 1) &&
11057 + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
11058 + /* show a blend through init */
11060 + if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
11067 +#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
11071 +struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
11073 + struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
11075 + if (task && !vx_proc_task_visible(task)) {
11076 + vxdprintk(VXD_CBIT(misc, 6),
11077 + "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
11078 + task, task->xid, task->pid,
11079 + current, current->xid, current->pid);
11080 + put_task_struct(task);
11088 +#warning duplicate inclusion
11090 diff -NurpP --minimal linux-4.9.217/include/linux/vs_sched.h linux-4.9.217-vs2.3.9.12/include/linux/vs_sched.h
11091 --- linux-4.9.217/include/linux/vs_sched.h 1970-01-01 00:00:00.000000000 +0000
11092 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_sched.h 2018-10-20 04:58:14.000000000 +0000
11094 +#ifndef _VS_SCHED_H
11095 +#define _VS_SCHED_H
11097 +#include "vserver/base.h"
11098 +#include "vserver/context.h"
11099 +#include "vserver/sched.h"
11102 +#define MAX_PRIO_BIAS 20
11103 +#define MIN_PRIO_BIAS -20
11106 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
11108 + struct vx_info *vxi = p->vx_info;
11111 + prio += vx_cpu(vxi, sched_pc).prio_bias;
11115 +static inline void vx_account_user(struct vx_info *vxi,
11116 + cputime_t cputime, int nice)
11120 + vx_cpu(vxi, sched_pc).user_ticks += cputime;
11123 +static inline void vx_account_system(struct vx_info *vxi,
11124 + cputime_t cputime, int idle)
11128 + vx_cpu(vxi, sched_pc).sys_ticks += cputime;
11132 +#warning duplicate inclusion
11134 diff -NurpP --minimal linux-4.9.217/include/linux/vs_socket.h linux-4.9.217-vs2.3.9.12/include/linux/vs_socket.h
11135 --- linux-4.9.217/include/linux/vs_socket.h 1970-01-01 00:00:00.000000000 +0000
11136 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_socket.h 2018-10-20 04:58:14.000000000 +0000
11138 +#ifndef _VS_SOCKET_H
11139 +#define _VS_SOCKET_H
11141 +#include "vserver/debug.h"
11142 +#include "vserver/base.h"
11143 +#include "vserver/cacct.h"
11144 +#include "vserver/context.h"
11145 +#include "vserver/tag.h"
11148 +/* socket accounting */
11150 +#include <linux/socket.h>
11152 +static inline int vx_sock_type(int family)
11154 + switch (family) {
11156 + return VXA_SOCK_UNSPEC;
11158 + return VXA_SOCK_UNIX;
11160 + return VXA_SOCK_INET;
11162 + return VXA_SOCK_INET6;
11164 + return VXA_SOCK_PACKET;
11166 + return VXA_SOCK_OTHER;
11170 +#define vx_acc_sock(v, f, p, s) \
11171 + __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
11173 +static inline void __vx_acc_sock(struct vx_info *vxi,
11174 + int family, int pos, int size, char *file, int line)
11177 + int type = vx_sock_type(family);
11179 + atomic_long_inc(&vxi->cacct.sock[type][pos].count);
11180 + atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
11184 +#define vx_sock_recv(sk, s) \
11185 + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
11186 +#define vx_sock_send(sk, s) \
11187 + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
11188 +#define vx_sock_fail(sk, s) \
11189 + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
11192 +#define sock_vx_init(s) do { \
11193 + (s)->sk_xid = 0; \
11194 + (s)->sk_vx_info = NULL; \
11197 +#define sock_nx_init(s) do { \
11198 + (s)->sk_nid = 0; \
11199 + (s)->sk_nx_info = NULL; \
11203 +#warning duplicate inclusion
11205 diff -NurpP --minimal linux-4.9.217/include/linux/vs_tag.h linux-4.9.217-vs2.3.9.12/include/linux/vs_tag.h
11206 --- linux-4.9.217/include/linux/vs_tag.h 1970-01-01 00:00:00.000000000 +0000
11207 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_tag.h 2018-10-20 04:58:14.000000000 +0000
11212 +#include <linux/vserver/tag.h>
11214 +/* check conditions */
11216 +#define DX_ADMIN 0x0001
11217 +#define DX_WATCH 0x0002
11218 +#define DX_HOSTID 0x0008
11220 +#define DX_IDENT 0x0010
11222 +#define DX_ARG_MASK 0x0010
11225 +#define dx_task_tag(t) ((t)->tag)
11227 +#define dx_current_tag() dx_task_tag(current)
11229 +#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
11231 +#define dx_weak_check(c, m) ((m) ? dx_check(c, m) : 1)
11235 + * check current context for ADMIN/WATCH and
11236 + * optionally against supplied argument
11238 +static inline int __dx_check(vtag_t cid, vtag_t id, unsigned int mode)
11240 + if (mode & DX_ARG_MASK) {
11241 + if ((mode & DX_IDENT) && (id == cid))
11244 + return (((mode & DX_ADMIN) && (cid == 0)) ||
11245 + ((mode & DX_WATCH) && (cid == 1)) ||
11246 + ((mode & DX_HOSTID) && (id == 0)));
11250 +int dx_permission(const struct inode *inode, int mask);
11254 +#warning duplicate inclusion
11256 diff -NurpP --minimal linux-4.9.217/include/linux/vs_time.h linux-4.9.217-vs2.3.9.12/include/linux/vs_time.h
11257 --- linux-4.9.217/include/linux/vs_time.h 1970-01-01 00:00:00.000000000 +0000
11258 +++ linux-4.9.217-vs2.3.9.12/include/linux/vs_time.h 2018-10-20 04:58:14.000000000 +0000
11260 +#ifndef _VS_TIME_H
11261 +#define _VS_TIME_H
11264 +/* time faking stuff */
11266 +#ifdef CONFIG_VSERVER_VTIME
11268 +extern void vx_adjust_timespec(struct timespec *ts);
11269 +extern int vx_settimeofday(const struct timespec *ts);
11270 +extern int vx_settimeofday64(const struct timespec64 *ts);
11273 +#define vx_adjust_timespec(t) do { } while (0)
11274 +#define vx_settimeofday(t) do_settimeofday(t)
11275 +#define vx_settimeofday64(t) do_settimeofday64(t)
11279 +#warning duplicate inclusion
11281 diff -NurpP --minimal linux-4.9.217/include/net/addrconf.h linux-4.9.217-vs2.3.9.12/include/net/addrconf.h
11282 --- linux-4.9.217/include/net/addrconf.h 2020-03-27 00:51:41.560167763 +0000
11283 +++ linux-4.9.217-vs2.3.9.12/include/net/addrconf.h 2018-10-20 04:58:14.000000000 +0000
11284 @@ -85,7 +85,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(str
11286 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
11287 const struct in6_addr *daddr, unsigned int srcprefs,
11288 - struct in6_addr *saddr);
11289 + struct in6_addr *saddr, struct nx_info *nxi);
11290 int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
11292 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
11293 diff -NurpP --minimal linux-4.9.217/include/net/af_unix.h linux-4.9.217-vs2.3.9.12/include/net/af_unix.h
11294 --- linux-4.9.217/include/net/af_unix.h 2016-12-11 19:17:54.000000000 +0000
11295 +++ linux-4.9.217-vs2.3.9.12/include/net/af_unix.h 2018-10-20 04:58:14.000000000 +0000
11297 #include <linux/socket.h>
11298 #include <linux/un.h>
11299 #include <linux/mutex.h>
11300 +// #include <linux/vs_base.h>
11301 #include <net/sock.h>
11303 void unix_inflight(struct user_struct *user, struct file *fp);
11304 diff -NurpP --minimal linux-4.9.217/include/net/inet_timewait_sock.h linux-4.9.217-vs2.3.9.12/include/net/inet_timewait_sock.h
11305 --- linux-4.9.217/include/net/inet_timewait_sock.h 2020-03-27 00:51:41.750164768 +0000
11306 +++ linux-4.9.217-vs2.3.9.12/include/net/inet_timewait_sock.h 2018-10-20 04:58:14.000000000 +0000
11307 @@ -72,6 +72,10 @@ struct inet_timewait_sock {
11308 #define tw_num __tw_common.skc_num
11309 #define tw_cookie __tw_common.skc_cookie
11310 #define tw_dr __tw_common.skc_tw_dr
11311 +#define tw_xid __tw_common.skc_xid
11312 +#define tw_vx_info __tw_common.skc_vx_info
11313 +#define tw_nid __tw_common.skc_nid
11314 +#define tw_nx_info __tw_common.skc_nx_info
11317 volatile unsigned char tw_substate;
11318 diff -NurpP --minimal linux-4.9.217/include/net/ip6_route.h linux-4.9.217-vs2.3.9.12/include/net/ip6_route.h
11319 --- linux-4.9.217/include/net/ip6_route.h 2020-03-27 00:51:41.750164768 +0000
11320 +++ linux-4.9.217-vs2.3.9.12/include/net/ip6_route.h 2018-10-20 04:58:14.000000000 +0000
11321 @@ -26,6 +26,7 @@ struct route_info {
11322 #include <linux/ip.h>
11323 #include <linux/ipv6.h>
11324 #include <linux/route.h>
11325 +#include <linux/vs_inet6.h>
11327 #define RT6_LOOKUP_F_IFACE 0x00000001
11328 #define RT6_LOOKUP_F_REACHABLE 0x00000002
11329 @@ -98,17 +99,19 @@ int ip6_del_rt(struct rt6_info *);
11330 static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
11331 const struct in6_addr *daddr,
11332 unsigned int prefs,
11333 - struct in6_addr *saddr)
11334 + struct in6_addr *saddr,
11335 + struct nx_info *nxi)
11337 struct inet6_dev *idev =
11338 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
11341 - if (rt && rt->rt6i_prefsrc.plen)
11342 + if (rt && rt->rt6i_prefsrc.plen && (!nxi ||
11343 + v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
11344 *saddr = rt->rt6i_prefsrc.addr;
11346 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
11347 - daddr, prefs, saddr);
11348 + daddr, prefs, saddr, nxi);
11352 diff -NurpP --minimal linux-4.9.217/include/net/route.h linux-4.9.217-vs2.3.9.12/include/net/route.h
11353 --- linux-4.9.217/include/net/route.h 2020-03-27 00:51:41.900162405 +0000
11354 +++ linux-4.9.217-vs2.3.9.12/include/net/route.h 2018-10-20 04:58:14.000000000 +0000
11355 @@ -226,6 +226,9 @@ static inline void ip_rt_put(struct rtab
11356 dst_release(&rt->dst);
11359 +#include <linux/vs_base.h>
11360 +#include <linux/vs_inet.h>
11362 #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
11364 extern const __u8 ip_tos2prio[16];
11365 @@ -273,6 +276,9 @@ static inline void ip_route_connect_init
11366 protocol, flow_flags, dst, src, dport, sport);
11369 +extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
11370 + struct flowi4 *);
11372 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
11373 __be32 dst, __be32 src, u32 tos,
11374 int oif, u8 protocol,
11375 @@ -281,11 +287,25 @@ static inline struct rtable *ip_route_co
11377 struct net *net = sock_net(sk);
11379 + struct nx_info *nx_info = current_nx_info();
11381 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
11384 - if (!dst || !src) {
11386 + nx_info = sk->sk_nx_info;
11388 + vxdprintk(VXD_CBIT(net, 4),
11389 + "ip_route_connect(%p) %p,%p;%lx",
11390 + sk, nx_info, sk->sk_socket,
11391 + (sk->sk_socket?sk->sk_socket->flags:0));
11393 + rt = ip_v4_find_src(net, nx_info, fl4);
11398 + if (!fl4->daddr || !fl4->saddr) {
11399 rt = __ip_route_output_key(net, fl4);
11402 diff -NurpP --minimal linux-4.9.217/include/net/sock.h linux-4.9.217-vs2.3.9.12/include/net/sock.h
11403 --- linux-4.9.217/include/net/sock.h 2020-03-27 00:51:41.960161456 +0000
11404 +++ linux-4.9.217-vs2.3.9.12/include/net/sock.h 2020-04-01 09:40:32.325403988 +0000
11405 @@ -187,6 +187,10 @@ struct sock_common {
11406 struct in6_addr skc_v6_daddr;
11407 struct in6_addr skc_v6_rcv_saddr;
11410 + struct vx_info *skc_vx_info;
11412 + struct nx_info *skc_nx_info;
11414 atomic64_t skc_cookie;
11416 @@ -337,8 +341,12 @@ struct sock {
11417 #define sk_prot __sk_common.skc_prot
11418 #define sk_net __sk_common.skc_net
11419 #define sk_v6_daddr __sk_common.skc_v6_daddr
11420 -#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11421 +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11422 #define sk_cookie __sk_common.skc_cookie
11423 +#define sk_xid __sk_common.skc_xid
11424 +#define sk_vx_info __sk_common.skc_vx_info
11425 +#define sk_nid __sk_common.skc_nid
11426 +#define sk_nx_info __sk_common.skc_nx_info
11427 #define sk_incoming_cpu __sk_common.skc_incoming_cpu
11428 #define sk_flags __sk_common.skc_flags
11429 #define sk_rxhash __sk_common.skc_rxhash
11430 diff -NurpP --minimal linux-4.9.217/include/uapi/Kbuild linux-4.9.217-vs2.3.9.12/include/uapi/Kbuild
11431 --- linux-4.9.217/include/uapi/Kbuild 2016-12-11 19:17:54.000000000 +0000
11432 +++ linux-4.9.217-vs2.3.9.12/include/uapi/Kbuild 2018-10-20 04:58:14.000000000 +0000
11433 @@ -13,3 +13,4 @@ header-y += drm/
11437 +header-y += vserver/
11438 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/btrfs_tree.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/btrfs_tree.h
11439 --- linux-4.9.217/include/uapi/linux/btrfs_tree.h 2020-03-27 00:51:42.430154051 +0000
11440 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/btrfs_tree.h 2019-02-22 08:37:55.713049558 +0000
11441 @@ -564,11 +564,14 @@ struct btrfs_inode_item {
11442 /* modification sequence number for NFS */
11447 * a little future expansion, for more than this we can
11448 * just grow the inode item and version it
11450 - __le64 reserved[4];
11451 + __le16 reserved16;
11452 + __le32 reserved32;
11453 + __le64 reserved[3];
11454 struct btrfs_timespec atime;
11455 struct btrfs_timespec ctime;
11456 struct btrfs_timespec mtime;
11457 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/capability.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/capability.h
11458 --- linux-4.9.217/include/uapi/linux/capability.h 2016-12-11 19:17:54.000000000 +0000
11459 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/capability.h 2018-10-20 04:58:14.000000000 +0000
11460 @@ -257,6 +257,7 @@ struct vfs_cap_data {
11461 arbitrary SCSI commands */
11462 /* Allow setting encryption key on loopback filesystem */
11463 /* Allow setting zone reclaim policy */
11464 +/* Allow the selection of a security context */
11466 #define CAP_SYS_ADMIN 21
11468 @@ -352,7 +353,12 @@ struct vfs_cap_data {
11470 #define CAP_LAST_CAP CAP_AUDIT_READ
11472 -#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
11473 +/* Allow context manipulations */
11474 +/* Allow changing context info on files */
11476 +#define CAP_CONTEXT 63
11478 +#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
11481 * Bit location of each capability (used by user-space library and kernel)
11482 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/fs.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/fs.h
11483 --- linux-4.9.217/include/uapi/linux/fs.h 2020-03-27 00:51:42.450153736 +0000
11484 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/fs.h 2018-10-20 04:58:14.000000000 +0000
11485 @@ -130,6 +130,9 @@ struct inodes_stat_t {
11486 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
11487 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
11488 #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
11489 +#define MS_TAGGED (1<<8) /* use generic inode tagging */
11490 +#define MS_NOTAGCHECK (1<<9) /* don't check tags */
11491 +#define MS_TAGID (1<<26) /* use specific tag for this mount */
11493 /* These sb flags are internal to the kernel */
11494 #define MS_SUBMOUNT (1<<26)
11495 @@ -313,13 +316,16 @@ struct fscrypt_policy {
11496 #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
11497 #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
11498 #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
11499 +#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
11500 #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
11501 #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
11502 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
11504 -#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
11505 -#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
11506 +#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
11507 +#define FS_COW_FL 0x20000000 /* Copy on Write marker */
11509 +#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
11510 +#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
11512 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
11513 #define SYNC_FILE_RANGE_WRITE 2
11514 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/gfs2_ondisk.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/gfs2_ondisk.h
11515 --- linux-4.9.217/include/uapi/linux/gfs2_ondisk.h 2016-12-11 19:17:54.000000000 +0000
11516 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/gfs2_ondisk.h 2018-10-20 04:58:14.000000000 +0000
11517 @@ -225,6 +225,9 @@ enum {
11520 gfs2fl_TopLevel = 10,
11521 + gfs2fl_IXUnlink = 16,
11522 + gfs2fl_Barrier = 17,
11524 gfs2fl_TruncInProg = 29,
11525 gfs2fl_InheritDirectio = 30,
11526 gfs2fl_InheritJdata = 31,
11527 @@ -242,6 +245,9 @@ enum {
11528 #define GFS2_DIF_SYNC 0x00000100
11529 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
11530 #define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */
11531 +#define GFS2_DIF_IXUNLINK 0x00010000
11532 +#define GFS2_DIF_BARRIER 0x00020000
11533 +#define GFS2_DIF_COW 0x00040000
11534 #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
11535 #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */
11536 #define GFS2_DIF_INHERIT_JDATA 0x80000000
11537 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/if_tun.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/if_tun.h
11538 --- linux-4.9.217/include/uapi/linux/if_tun.h 2016-12-11 19:17:54.000000000 +0000
11539 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/if_tun.h 2018-10-20 04:58:14.000000000 +0000
11542 #define TUNSETVNETBE _IOW('T', 222, int)
11543 #define TUNGETVNETBE _IOR('T', 223, int)
11544 +#define TUNSETNID _IOW('T', 224, int)
11546 /* TUNSETIFF ifr flags */
11547 #define IFF_TUN 0x0001
11548 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/major.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/major.h
11549 --- linux-4.9.217/include/uapi/linux/major.h 2016-12-11 19:17:54.000000000 +0000
11550 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/major.h 2018-10-20 04:58:14.000000000 +0000
11552 #define HD_MAJOR IDE0_MAJOR
11553 #define PTY_SLAVE_MAJOR 3
11554 #define TTY_MAJOR 4
11555 +#define VROOT_MAJOR 4
11556 #define TTYAUX_MAJOR 5
11558 #define VCS_MAJOR 7
11559 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/nfs_mount.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/nfs_mount.h
11560 --- linux-4.9.217/include/uapi/linux/nfs_mount.h 2016-12-11 19:17:54.000000000 +0000
11561 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/nfs_mount.h 2018-10-20 04:58:14.000000000 +0000
11562 @@ -63,7 +63,8 @@ struct nfs_mount_data {
11563 #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 non-text parsed mount data only */
11564 #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
11565 #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
11566 -#define NFS_MOUNT_FLAGMASK 0xFFFF
11567 +#define NFS_MOUNT_TAGGED 0x10000 /* context tagging */
11568 +#define NFS_MOUNT_FLAGMASK 0x1FFFF
11570 /* The following are for internal use only */
11571 #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
11572 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/reboot.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/reboot.h
11573 --- linux-4.9.217/include/uapi/linux/reboot.h 2016-12-11 19:17:54.000000000 +0000
11574 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/reboot.h 2018-10-20 04:58:14.000000000 +0000
11576 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
11577 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
11578 #define LINUX_REBOOT_CMD_KEXEC 0x45584543
11580 +#define LINUX_REBOOT_CMD_OOM 0xDEADBEEF
11583 #endif /* _UAPI_LINUX_REBOOT_H */
11584 diff -NurpP --minimal linux-4.9.217/include/uapi/linux/sysctl.h linux-4.9.217-vs2.3.9.12/include/uapi/linux/sysctl.h
11585 --- linux-4.9.217/include/uapi/linux/sysctl.h 2016-12-11 19:17:54.000000000 +0000
11586 +++ linux-4.9.217-vs2.3.9.12/include/uapi/linux/sysctl.h 2018-10-20 04:58:14.000000000 +0000
11587 @@ -58,6 +58,7 @@ enum
11588 CTL_ABI=9, /* Binary emulation */
11589 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
11590 CTL_ARLAN=254, /* arlan wireless driver */
11591 + CTL_VSERVER=4242, /* Linux-VServer debug */
11592 CTL_S390DBF=5677, /* s390 debug */
11593 CTL_SUNRPC=7249, /* sunrpc debug */
11594 CTL_PM=9899, /* frv power management */
11595 @@ -92,6 +93,7 @@ enum
11597 KERN_PANIC=15, /* int: panic timeout */
11598 KERN_REALROOTDEV=16, /* real root device to mount after initrd */
11599 + KERN_VSHELPER=17, /* string: path to vshelper policy agent */
11601 KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
11602 KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
11603 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/cacct_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/cacct_cmd.h
11604 --- linux-4.9.217/include/uapi/vserver/cacct_cmd.h 1970-01-01 00:00:00.000000000 +0000
11605 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/cacct_cmd.h 2018-10-20 04:58:14.000000000 +0000
11607 +#ifndef _UAPI_VS_CACCT_CMD_H
11608 +#define _UAPI_VS_CACCT_CMD_H
11611 +/* virtual host info name commands */
11613 +#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0)
11615 +struct vcmd_sock_stat_v0 {
11617 + uint32_t count[3];
11618 + uint64_t total[3];
11621 +#endif /* _UAPI_VS_CACCT_CMD_H */
11622 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/context_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/context_cmd.h
11623 --- linux-4.9.217/include/uapi/vserver/context_cmd.h 1970-01-01 00:00:00.000000000 +0000
11624 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/context_cmd.h 2018-10-20 04:58:14.000000000 +0000
11626 +#ifndef _UAPI_VS_CONTEXT_CMD_H
11627 +#define _UAPI_VS_CONTEXT_CMD_H
11630 +/* vinfo commands */
11632 +#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
11635 +#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
11637 +struct vcmd_vx_info_v0 {
11639 + uint32_t initpid;
11640 + /* more to come */
11644 +#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0)
11646 +struct vcmd_ctx_stat_v0 {
11649 + /* more to come */
11653 +/* context commands */
11655 +#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0)
11656 +#define VCMD_ctx_create VC_CMD(VPROC, 1, 1)
11658 +struct vcmd_ctx_create {
11659 + uint64_t flagword;
11662 +#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0)
11663 +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1)
11665 +struct vcmd_ctx_migrate {
11666 + uint64_t flagword;
11671 +/* flag commands */
11673 +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
11674 +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
11676 +struct vcmd_ctx_flags_v0 {
11677 + uint64_t flagword;
11683 +/* context caps commands */
11685 +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1)
11686 +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1)
11688 +struct vcmd_ctx_caps_v1 {
11695 +/* bcaps commands */
11697 +#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0)
11698 +#define VCMD_set_bcaps VC_CMD(FLAGS, 10, 0)
11700 +struct vcmd_bcaps {
11707 +/* umask commands */
11709 +#define VCMD_get_umask VC_CMD(FLAGS, 13, 0)
11710 +#define VCMD_set_umask VC_CMD(FLAGS, 14, 0)
11712 +struct vcmd_umask {
11719 +/* wmask commands */
11721 +#define VCMD_get_wmask VC_CMD(FLAGS, 15, 0)
11722 +#define VCMD_set_wmask VC_CMD(FLAGS, 16, 0)
11724 +struct vcmd_wmask {
11733 +#define VCMD_get_badness VC_CMD(MEMCTRL, 5, 0)
11734 +#define VCMD_set_badness VC_CMD(MEMCTRL, 6, 0)
11736 +struct vcmd_badness_v0 {
11740 +#endif /* _UAPI_VS_CONTEXT_CMD_H */
11741 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/context.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/context.h
11742 --- linux-4.9.217/include/uapi/vserver/context.h 1970-01-01 00:00:00.000000000 +0000
11743 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/context.h 2018-10-20 04:58:14.000000000 +0000
11745 +#ifndef _UAPI_VS_CONTEXT_H
11746 +#define _UAPI_VS_CONTEXT_H
11748 +#include <linux/types.h>
11749 +#include <linux/capability.h>
11752 +/* context flags */
11754 +#define VXF_INFO_SCHED 0x00000002
11755 +#define VXF_INFO_NPROC 0x00000004
11756 +#define VXF_INFO_PRIVATE 0x00000008
11758 +#define VXF_INFO_INIT 0x00000010
11759 +#define VXF_INFO_HIDE 0x00000020
11760 +#define VXF_INFO_ULIMIT 0x00000040
11761 +#define VXF_INFO_NSPACE 0x00000080
11763 +#define VXF_SCHED_HARD 0x00000100
11764 +#define VXF_SCHED_PRIO 0x00000200
11765 +#define VXF_SCHED_PAUSE 0x00000400
11767 +#define VXF_VIRT_MEM 0x00010000
11768 +#define VXF_VIRT_UPTIME 0x00020000
11769 +#define VXF_VIRT_CPU 0x00040000
11770 +#define VXF_VIRT_LOAD 0x00080000
11771 +#define VXF_VIRT_TIME 0x00100000
11773 +#define VXF_HIDE_MOUNT 0x01000000
11774 +/* was VXF_HIDE_NETIF 0x02000000 */
11775 +#define VXF_HIDE_VINFO 0x04000000
11777 +#define VXF_STATE_SETUP (1ULL << 32)
11778 +#define VXF_STATE_INIT (1ULL << 33)
11779 +#define VXF_STATE_ADMIN (1ULL << 34)
11781 +#define VXF_SC_HELPER (1ULL << 36)
11782 +#define VXF_REBOOT_KILL (1ULL << 37)
11783 +#define VXF_PERSISTENT (1ULL << 38)
11785 +#define VXF_FORK_RSS (1ULL << 48)
11786 +#define VXF_PROLIFIC (1ULL << 49)
11788 +#define VXF_IGNEG_NICE (1ULL << 52)
11790 +#define VXF_ONE_TIME (0x0007ULL << 32)
11792 +#define VXF_INIT_SET (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
11795 +/* context migration */
11797 +#define VXM_SET_INIT 0x00000001
11798 +#define VXM_SET_REAPER 0x00000002
11800 +/* context caps */
11802 +#define VXC_SET_UTSNAME 0x00000001
11803 +#define VXC_SET_RLIMIT 0x00000002
11804 +#define VXC_FS_SECURITY 0x00000004
11805 +#define VXC_FS_TRUSTED 0x00000008
11806 +#define VXC_TIOCSTI 0x00000010
11808 +/* was VXC_RAW_ICMP 0x00000100 */
11809 +#define VXC_SYSLOG 0x00001000
11810 +#define VXC_OOM_ADJUST 0x00002000
11811 +#define VXC_AUDIT_CONTROL 0x00004000
11813 +#define VXC_SECURE_MOUNT 0x00010000
11814 +/* #define VXC_SECURE_REMOUNT 0x00020000 */
11815 +#define VXC_BINARY_MOUNT 0x00040000
11816 +#define VXC_DEV_MOUNT 0x00080000
11818 +#define VXC_QUOTA_CTL 0x00100000
11819 +#define VXC_ADMIN_MAPPER 0x00200000
11820 +#define VXC_ADMIN_CLOOP 0x00400000
11822 +#define VXC_KTHREAD 0x01000000
11823 +#define VXC_NAMESPACE 0x02000000
11825 +#endif /* _UAPI_VS_CONTEXT_H */
11826 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/cvirt_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/cvirt_cmd.h
11827 --- linux-4.9.217/include/uapi/vserver/cvirt_cmd.h 1970-01-01 00:00:00.000000000 +0000
11828 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/cvirt_cmd.h 2018-10-20 04:58:14.000000000 +0000
11830 +#ifndef _UAPI_VS_CVIRT_CMD_H
11831 +#define _UAPI_VS_CVIRT_CMD_H
11834 +/* virtual host info name commands */
11836 +#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0)
11837 +#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0)
11839 +struct vcmd_vhi_name_v0 {
11845 +enum vhi_name_field {
11846 + VHIN_CONTEXT = 0,
11857 +#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0)
11859 +struct vcmd_virt_stat_v0 {
11862 + uint32_t nr_threads;
11863 + uint32_t nr_running;
11864 + uint32_t nr_uninterruptible;
11865 + uint32_t nr_onhold;
11866 + uint32_t nr_forks;
11867 + uint32_t load[3];
11870 +#endif /* _UAPI_VS_CVIRT_CMD_H */
11871 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/debug_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/debug_cmd.h
11872 --- linux-4.9.217/include/uapi/vserver/debug_cmd.h 1970-01-01 00:00:00.000000000 +0000
11873 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/debug_cmd.h 2018-10-20 04:58:14.000000000 +0000
11875 +#ifndef _UAPI_VS_DEBUG_CMD_H
11876 +#define _UAPI_VS_DEBUG_CMD_H
11879 +/* debug commands */
11881 +#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
11883 +#define VCMD_read_history VC_CMD(DEBUG, 5, 0)
11884 +#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0)
11886 +struct vcmd_read_history_v0 {
11889 + char __user *data;
11892 +struct vcmd_read_monitor_v0 {
11895 + char __user *data;
11898 +#endif /* _UAPI_VS_DEBUG_CMD_H */
11899 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/device_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/device_cmd.h
11900 --- linux-4.9.217/include/uapi/vserver/device_cmd.h 1970-01-01 00:00:00.000000000 +0000
11901 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/device_cmd.h 2018-10-20 04:58:14.000000000 +0000
11903 +#ifndef _UAPI_VS_DEVICE_CMD_H
11904 +#define _UAPI_VS_DEVICE_CMD_H
11907 +/* device vserver commands */
11909 +#define VCMD_set_mapping VC_CMD(DEVICE, 1, 0)
11910 +#define VCMD_unset_mapping VC_CMD(DEVICE, 2, 0)
11912 +struct vcmd_set_mapping_v0 {
11913 + const char __user *device;
11914 + const char __user *target;
11918 +#endif /* _UAPI_VS_DEVICE_CMD_H */
11919 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/device.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/device.h
11920 --- linux-4.9.217/include/uapi/vserver/device.h 1970-01-01 00:00:00.000000000 +0000
11921 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/device.h 2018-10-20 04:58:14.000000000 +0000
11923 +#ifndef _UAPI_VS_DEVICE_H
11924 +#define _UAPI_VS_DEVICE_H
11927 +#define DATTR_CREATE 0x00000001
11928 +#define DATTR_OPEN 0x00000002
11930 +#define DATTR_REMAP 0x00000010
11932 +#define DATTR_MASK 0x00000013
11934 +#endif /* _UAPI_VS_DEVICE_H */
11935 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/dlimit_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/dlimit_cmd.h
11936 --- linux-4.9.217/include/uapi/vserver/dlimit_cmd.h 1970-01-01 00:00:00.000000000 +0000
11937 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/dlimit_cmd.h 2018-10-20 04:58:14.000000000 +0000
11939 +#ifndef _UAPI_VS_DLIMIT_CMD_H
11940 +#define _UAPI_VS_DLIMIT_CMD_H
11943 +/* dlimit vserver commands */
11945 +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
11946 +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
11948 +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
11949 +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
11951 +struct vcmd_ctx_dlimit_base_v0 {
11952 + const char __user *name;
11956 +struct vcmd_ctx_dlimit_v0 {
11957 + const char __user *name;
11958 + uint32_t space_used; /* used space in kbytes */
11959 + uint32_t space_total; /* maximum space in kbytes */
11960 + uint32_t inodes_used; /* used inodes */
11961 + uint32_t inodes_total; /* maximum inodes */
11962 + uint32_t reserved; /* reserved for root in % */
11966 +#define CDLIM_UNSET ((uint32_t)0UL)
11967 +#define CDLIM_INFINITY ((uint32_t)~0UL)
11968 +#define CDLIM_KEEP ((uint32_t)~1UL)
11970 +#define DLIME_UNIT 0
11971 +#define DLIME_KILO 1
11972 +#define DLIME_MEGA 2
11973 +#define DLIME_GIGA 3
11975 +#define DLIMF_SHIFT 0x10
11977 +#define DLIMS_USED 0
11978 +#define DLIMS_TOTAL 2
11981 +uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
11983 + int exp = (flags & DLIMF_SHIFT) ?
11984 + (flags >> shift) & DLIME_GIGA : DLIME_KILO;
11985 + return ((uint64_t)val) << (10 * exp);
11989 +uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
11993 + if (*flags & DLIMF_SHIFT) {
11994 + while (val > (1LL << 32) && (exp < 3)) {
11998 + *flags &= ~(DLIME_GIGA << shift);
11999 + *flags |= exp << shift;
12005 +#endif /* _UAPI_VS_DLIMIT_CMD_H */
12006 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/inode_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/inode_cmd.h
12007 --- linux-4.9.217/include/uapi/vserver/inode_cmd.h 1970-01-01 00:00:00.000000000 +0000
12008 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/inode_cmd.h 2018-10-20 04:58:14.000000000 +0000
12010 +#ifndef _UAPI_VS_INODE_CMD_H
12011 +#define _UAPI_VS_INODE_CMD_H
12014 +/* inode vserver commands */
12016 +#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
12017 +#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
12019 +#define VCMD_fget_iattr VC_CMD(INODE, 3, 0)
12020 +#define VCMD_fset_iattr VC_CMD(INODE, 4, 0)
12022 +struct vcmd_ctx_iattr_v1 {
12023 + const char __user *name;
12029 +struct vcmd_ctx_fiattr_v0 {
12035 +#endif /* _UAPI_VS_INODE_CMD_H */
12036 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/inode.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/inode.h
12037 --- linux-4.9.217/include/uapi/vserver/inode.h 1970-01-01 00:00:00.000000000 +0000
12038 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/inode.h 2018-10-20 04:58:14.000000000 +0000
12040 +#ifndef _UAPI_VS_INODE_H
12041 +#define _UAPI_VS_INODE_H
12044 +#define IATTR_TAG 0x01000000
12046 +#define IATTR_ADMIN 0x00000001
12047 +#define IATTR_WATCH 0x00000002
12048 +#define IATTR_HIDE 0x00000004
12049 +#define IATTR_FLAGS 0x00000007
12051 +#define IATTR_BARRIER 0x00010000
12052 +#define IATTR_IXUNLINK 0x00020000
12053 +#define IATTR_IMMUTABLE 0x00040000
12054 +#define IATTR_COW 0x00080000
12057 +/* inode ioctls */
12059 +#define FIOC_GETXFLG _IOR('x', 5, long)
12060 +#define FIOC_SETXFLG _IOW('x', 6, long)
12062 +#endif /* _UAPI_VS_INODE_H */
12063 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/Kbuild linux-4.9.217-vs2.3.9.12/include/uapi/vserver/Kbuild
12064 --- linux-4.9.217/include/uapi/vserver/Kbuild 1970-01-01 00:00:00.000000000 +0000
12065 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/Kbuild 2018-10-20 04:58:14.000000000 +0000
12068 +header-y += context_cmd.h network_cmd.h space_cmd.h \
12069 + cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
12070 + inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
12071 + debug_cmd.h device_cmd.h
12073 +header-y += switch.h context.h network.h monitor.h \
12074 + limit.h inode.h device.h
12076 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/limit_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/limit_cmd.h
12077 --- linux-4.9.217/include/uapi/vserver/limit_cmd.h 1970-01-01 00:00:00.000000000 +0000
12078 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/limit_cmd.h 2018-10-20 04:58:14.000000000 +0000
12080 +#ifndef _UAPI_VS_LIMIT_CMD_H
12081 +#define _UAPI_VS_LIMIT_CMD_H
12084 +/* rlimit vserver commands */
12086 +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
12087 +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
12088 +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
12089 +#define VCMD_reset_hits VC_CMD(RLIMIT, 7, 0)
12090 +#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0)
12092 +struct vcmd_ctx_rlimit_v0 {
12094 + uint64_t minimum;
12095 + uint64_t softlimit;
12096 + uint64_t maximum;
12099 +struct vcmd_ctx_rlimit_mask_v0 {
12100 + uint32_t minimum;
12101 + uint32_t softlimit;
12102 + uint32_t maximum;
12105 +#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0)
12107 +struct vcmd_rlimit_stat_v0 {
12111 + uint64_t minimum;
12112 + uint64_t maximum;
12115 +#define CRLIM_UNSET (0ULL)
12116 +#define CRLIM_INFINITY (~0ULL)
12117 +#define CRLIM_KEEP (~1ULL)
12119 +#endif /* _UAPI_VS_LIMIT_CMD_H */
12120 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/limit.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/limit.h
12121 --- linux-4.9.217/include/uapi/vserver/limit.h 1970-01-01 00:00:00.000000000 +0000
12122 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/limit.h 2018-10-20 04:58:14.000000000 +0000
12124 +#ifndef _UAPI_VS_LIMIT_H
12125 +#define _UAPI_VS_LIMIT_H
12128 +#define VLIMIT_NSOCK 16
12129 +#define VLIMIT_OPENFD 17
12130 +#define VLIMIT_ANON 18
12131 +#define VLIMIT_SHMEM 19
12132 +#define VLIMIT_SEMARY 20
12133 +#define VLIMIT_NSEMS 21
12134 +#define VLIMIT_DENTRY 22
12135 +#define VLIMIT_MAPPED 23
12137 +#endif /* _UAPI_VS_LIMIT_H */
12138 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/monitor.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/monitor.h
12139 --- linux-4.9.217/include/uapi/vserver/monitor.h 1970-01-01 00:00:00.000000000 +0000
12140 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/monitor.h 2018-10-20 04:58:14.000000000 +0000
12142 +#ifndef _UAPI_VS_MONITOR_H
12143 +#define _UAPI_VS_MONITOR_H
12145 +#include <linux/types.h>
12153 + VXM_UPDATE = 0x20,
12157 + VXM_RQINFO_1 = 0x24,
12160 + VXM_ACTIVATE = 0x40,
12167 + VXM_MIGRATE = 0x48,
12170 + /* all other bits are flags */
12171 + VXM_SCHED = 0x80,
12174 +struct _vxm_update_1 {
12175 + uint32_t tokens_max;
12176 + uint32_t fill_rate;
12177 + uint32_t interval;
12180 +struct _vxm_update_2 {
12181 + uint32_t tokens_min;
12182 + uint32_t fill_rate;
12183 + uint32_t interval;
12186 +struct _vxm_rqinfo_1 {
12187 + uint16_t running;
12191 + uint32_t idle_tokens;
12194 +struct _vxm_rqinfo_2 {
12195 + uint32_t norm_time;
12196 + uint32_t idle_time;
12197 + uint32_t idle_skip;
12200 +struct _vxm_sched {
12202 + uint32_t norm_time;
12203 + uint32_t idle_time;
12206 +struct _vxm_task {
12211 +struct _vxm_event {
12220 + struct _vxm_task tsk;
12224 +struct _vx_mon_entry {
12228 + struct _vxm_event ev;
12229 + struct _vxm_sched sd;
12230 + struct _vxm_update_1 u1;
12231 + struct _vxm_update_2 u2;
12232 + struct _vxm_rqinfo_1 q1;
12233 + struct _vxm_rqinfo_2 q2;
12237 +#endif /* _UAPI_VS_MONITOR_H */
12238 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/network_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/network_cmd.h
12239 --- linux-4.9.217/include/uapi/vserver/network_cmd.h 1970-01-01 00:00:00.000000000 +0000
12240 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/network_cmd.h 2018-10-20 04:58:14.000000000 +0000
12242 +#ifndef _UAPI_VS_NETWORK_CMD_H
12243 +#define _UAPI_VS_NETWORK_CMD_H
12246 +/* vinfo commands */
12248 +#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
12251 +#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
12253 +struct vcmd_nx_info_v0 {
12255 + /* more to come */
12259 +#include <linux/in.h>
12260 +#include <linux/in6.h>
12262 +#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0)
12263 +#define VCMD_net_create VC_CMD(VNET, 1, 1)
12265 +struct vcmd_net_create {
12266 + uint64_t flagword;
12269 +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
12271 +#define VCMD_net_add VC_CMD(NETALT, 1, 0)
12272 +#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
12274 +struct vcmd_net_addr_v0 {
12277 + struct in_addr ip[4];
12278 + struct in_addr mask[4];
12281 +#define VCMD_net_add_ipv4_v1 VC_CMD(NETALT, 1, 1)
12282 +#define VCMD_net_rem_ipv4_v1 VC_CMD(NETALT, 2, 1)
12284 +struct vcmd_net_addr_ipv4_v1 {
12287 + struct in_addr ip;
12288 + struct in_addr mask;
12291 +#define VCMD_net_add_ipv4 VC_CMD(NETALT, 1, 2)
12292 +#define VCMD_net_rem_ipv4 VC_CMD(NETALT, 2, 2)
12294 +struct vcmd_net_addr_ipv4_v2 {
12297 + struct in_addr ip;
12298 + struct in_addr ip2;
12299 + struct in_addr mask;
12302 +#define VCMD_net_add_ipv6 VC_CMD(NETALT, 3, 1)
12303 +#define VCMD_net_remove_ipv6 VC_CMD(NETALT, 4, 1)
12305 +struct vcmd_net_addr_ipv6_v1 {
12309 + struct in6_addr ip;
12310 + struct in6_addr mask;
12313 +#define VCMD_add_match_ipv4 VC_CMD(NETALT, 5, 0)
12314 +#define VCMD_get_match_ipv4 VC_CMD(NETALT, 6, 0)
12316 +struct vcmd_match_ipv4_v0 {
12321 + struct in_addr ip;
12322 + struct in_addr ip2;
12323 + struct in_addr mask;
12326 +#define VCMD_add_match_ipv6 VC_CMD(NETALT, 7, 0)
12327 +#define VCMD_get_match_ipv6 VC_CMD(NETALT, 8, 0)
12329 +struct vcmd_match_ipv6_v0 {
12334 + struct in6_addr ip;
12335 + struct in6_addr ip2;
12336 + struct in6_addr mask;
12342 +/* flag commands */
12344 +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
12345 +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
12347 +struct vcmd_net_flags_v0 {
12348 + uint64_t flagword;
12354 +/* network caps commands */
12356 +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
12357 +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
12359 +struct vcmd_net_caps_v0 {
12364 +#endif /* _UAPI_VS_NETWORK_CMD_H */
12365 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/network.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/network.h
12366 --- linux-4.9.217/include/uapi/vserver/network.h 1970-01-01 00:00:00.000000000 +0000
12367 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/network.h 2018-10-20 04:58:14.000000000 +0000
12369 +#ifndef _UAPI_VS_NETWORK_H
12370 +#define _UAPI_VS_NETWORK_H
12372 +#include <linux/types.h>
12375 +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
12378 +/* network flags */
12380 +#define NXF_INFO_PRIVATE 0x00000008
12382 +#define NXF_SINGLE_IP 0x00000100
12383 +#define NXF_LBACK_REMAP 0x00000200
12384 +#define NXF_LBACK_ALLOW 0x00000400
12386 +#define NXF_HIDE_NETIF 0x02000000
12387 +#define NXF_HIDE_LBACK 0x04000000
12389 +#define NXF_STATE_SETUP (1ULL << 32)
12390 +#define NXF_STATE_ADMIN (1ULL << 34)
12392 +#define NXF_SC_HELPER (1ULL << 36)
12393 +#define NXF_PERSISTENT (1ULL << 38)
12395 +#define NXF_ONE_TIME (0x0005ULL << 32)
12398 +#define NXF_INIT_SET (__nxf_init_set())
12400 +static inline uint64_t __nxf_init_set(void) {
12401 + return NXF_STATE_ADMIN
12402 +#ifdef CONFIG_VSERVER_AUTO_LBACK
12403 + | NXF_LBACK_REMAP
12406 +#ifdef CONFIG_VSERVER_AUTO_SINGLE
12409 + | NXF_HIDE_NETIF;
12413 +/* network caps */
12415 +#define NXC_TUN_CREATE 0x00000001
12417 +#define NXC_RAW_ICMP 0x00000100
12419 +#define NXC_MULTICAST 0x00001000
12422 +/* address types */
12424 +#define NXA_TYPE_IPV4 0x0001
12425 +#define NXA_TYPE_IPV6 0x0002
12427 +#define NXA_TYPE_NONE 0x0000
12428 +#define NXA_TYPE_ANY 0x00FF
12430 +#define NXA_TYPE_ADDR 0x0010
12431 +#define NXA_TYPE_MASK 0x0020
12432 +#define NXA_TYPE_RANGE 0x0040
12434 +#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
12436 +#define NXA_MOD_BCAST 0x0100
12437 +#define NXA_MOD_LBACK 0x0200
12439 +#define NXA_LOOPBACK 0x1000
12441 +#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
12442 +#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
12444 +#endif /* _UAPI_VS_NETWORK_H */
12445 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/sched_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/sched_cmd.h
12446 --- linux-4.9.217/include/uapi/vserver/sched_cmd.h 1970-01-01 00:00:00.000000000 +0000
12447 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/sched_cmd.h 2018-10-20 04:58:14.000000000 +0000
12449 +#ifndef _UAPI_VS_SCHED_CMD_H
12450 +#define _UAPI_VS_SCHED_CMD_H
12453 +struct vcmd_prio_bias {
12455 + int32_t prio_bias;
12458 +#define VCMD_set_prio_bias VC_CMD(SCHED, 4, 0)
12459 +#define VCMD_get_prio_bias VC_CMD(SCHED, 5, 0)
12461 +#endif /* _UAPI_VS_SCHED_CMD_H */
12462 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/signal_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/signal_cmd.h
12463 --- linux-4.9.217/include/uapi/vserver/signal_cmd.h 1970-01-01 00:00:00.000000000 +0000
12464 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/signal_cmd.h 2018-10-20 04:58:14.000000000 +0000
12466 +#ifndef _UAPI_VS_SIGNAL_CMD_H
12467 +#define _UAPI_VS_SIGNAL_CMD_H
12470 +/* signalling vserver commands */
12472 +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
12473 +#define VCMD_wait_exit VC_CMD(EVENT, 99, 0)
12475 +struct vcmd_ctx_kill_v0 {
12480 +struct vcmd_wait_exit_v0 {
12481 + int32_t reboot_cmd;
12482 + int32_t exit_code;
12486 +/* process alteration commands */
12488 +#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0)
12489 +#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0)
12491 +struct vcmd_pflags_v0 {
12492 + uint32_t flagword;
12496 +#endif /* _UAPI_VS_SIGNAL_CMD_H */
12497 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/space_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/space_cmd.h
12498 --- linux-4.9.217/include/uapi/vserver/space_cmd.h 1970-01-01 00:00:00.000000000 +0000
12499 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/space_cmd.h 2018-10-20 04:58:14.000000000 +0000
12501 +#ifndef _UAPI_VS_SPACE_CMD_H
12502 +#define _UAPI_VS_SPACE_CMD_H
12505 +#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0)
12506 +#define VCMD_enter_space_v1 VC_CMD(PROCALT, 1, 1)
12507 +#define VCMD_enter_space VC_CMD(PROCALT, 1, 2)
12509 +#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0)
12510 +#define VCMD_set_space_v1 VC_CMD(PROCALT, 3, 1)
12511 +#define VCMD_set_space VC_CMD(PROCALT, 3, 2)
12513 +#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
12515 +#define VCMD_get_space_mask VC_CMD(VSPACE, 0, 1)
12516 +#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
12519 +struct vcmd_space_mask_v1 {
12523 +struct vcmd_space_mask_v2 {
12528 +#endif /* _UAPI_VS_SPACE_CMD_H */
12529 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/switch.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/switch.h
12530 --- linux-4.9.217/include/uapi/vserver/switch.h 1970-01-01 00:00:00.000000000 +0000
12531 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/switch.h 2018-10-20 04:58:14.000000000 +0000
12533 +#ifndef _UAPI_VS_SWITCH_H
12534 +#define _UAPI_VS_SWITCH_H
12536 +#include <linux/types.h>
12539 +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
12540 +#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
12541 +#define VC_VERSION(c) ((c) & 0xFFF)
12543 +#define VC_CMD(c, i, v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
12544 + | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
12548 + Syscall Matrix V2.8
12550 + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
12551 + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
12552 + |INFO |SETUP | |MOVE | | | | | |
12553 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12554 + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICE | |
12555 + HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
12556 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12557 + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
12558 + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
12559 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12560 + MEMORY | | | | |MEMCTRL| | |SWAP | |
12561 + | 16| 17| 18| 19| 20| 21| | 22| 23|
12562 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12563 + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
12564 + | 24| 25| 26| 27| 28| 29| | 30| 31|
12565 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12566 + DISK | | | |TAGMIG |DLIMIT | | |INODE | |
12567 + VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
12568 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12569 + OTHER |VSTAT | | | | | | |VINFO | |
12570 + | 40| 41| 42| 43| 44| 45| | 46| 47|
12571 + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
12572 + SPECIAL|EVENT | | | |FLAGS | | |VSPACE | |
12573 + | 48| 49| 50| 51| 52| 53| | 54| 55|
12574 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12575 + SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
12576 + | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
12577 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12581 +#define VC_CAT_VERSION 0
12583 +#define VC_CAT_VSETUP 1
12584 +#define VC_CAT_VHOST 2
12586 +#define VC_CAT_DEVICE 6
12588 +#define VC_CAT_VPROC 9
12589 +#define VC_CAT_PROCALT 10
12590 +#define VC_CAT_PROCMIG 11
12591 +#define VC_CAT_PROCTRL 12
12593 +#define VC_CAT_SCHED 14
12594 +#define VC_CAT_MEMCTRL 20
12596 +#define VC_CAT_VNET 25
12597 +#define VC_CAT_NETALT 26
12598 +#define VC_CAT_NETMIG 27
12599 +#define VC_CAT_NETCTRL 28
12601 +#define VC_CAT_TAGMIG 35
12602 +#define VC_CAT_DLIMIT 36
12603 +#define VC_CAT_INODE 38
12605 +#define VC_CAT_VSTAT 40
12606 +#define VC_CAT_VINFO 46
12607 +#define VC_CAT_EVENT 48
12609 +#define VC_CAT_FLAGS 52
12610 +#define VC_CAT_VSPACE 54
12611 +#define VC_CAT_DEBUG 56
12612 +#define VC_CAT_RLIMIT 60
12614 +#define VC_CAT_SYSTEST 61
12615 +#define VC_CAT_COMPAT 63
12617 +/* query version */
12619 +#define VCMD_get_version VC_CMD(VERSION, 0, 0)
12620 +#define VCMD_get_vci VC_CMD(VERSION, 1, 0)
12622 +#endif /* _UAPI_VS_SWITCH_H */
12623 diff -NurpP --minimal linux-4.9.217/include/uapi/vserver/tag_cmd.h linux-4.9.217-vs2.3.9.12/include/uapi/vserver/tag_cmd.h
12624 --- linux-4.9.217/include/uapi/vserver/tag_cmd.h 1970-01-01 00:00:00.000000000 +0000
12625 +++ linux-4.9.217-vs2.3.9.12/include/uapi/vserver/tag_cmd.h 2018-10-20 04:58:14.000000000 +0000
12627 +#ifndef _UAPI_VS_TAG_CMD_H
12628 +#define _UAPI_VS_TAG_CMD_H
12631 +/* vinfo commands */
12633 +#define VCMD_task_tag VC_CMD(VINFO, 3, 0)
12636 +/* context commands */
12638 +#define VCMD_tag_migrate VC_CMD(TAGMIG, 1, 0)
12640 +#endif /* _UAPI_VS_TAG_CMD_H */
12641 diff -NurpP --minimal linux-4.9.217/init/Kconfig linux-4.9.217-vs2.3.9.12/init/Kconfig
12642 --- linux-4.9.217/init/Kconfig 2020-03-27 00:51:42.790148377 +0000
12643 +++ linux-4.9.217-vs2.3.9.12/init/Kconfig 2018-10-20 04:58:14.000000000 +0000
12644 @@ -958,6 +958,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED
12646 bool "Control Group support"
12650 This option adds support for grouping sets of processes together, for
12651 use with process control subsystems such as Cpusets, CFS, memory
12652 diff -NurpP --minimal linux-4.9.217/init/main.c linux-4.9.217-vs2.3.9.12/init/main.c
12653 --- linux-4.9.217/init/main.c 2020-03-27 00:51:42.800148216 +0000
12654 +++ linux-4.9.217-vs2.3.9.12/init/main.c 2019-10-05 14:58:45.760306119 +0000
12656 #include <linux/io.h>
12657 #include <linux/kaiser.h>
12658 #include <linux/cache.h>
12659 +#include <linux/vserver/percpu.h>
12661 #include <asm/io.h>
12662 #include <asm/bugs.h>
12663 diff -NurpP --minimal linux-4.9.217/ipc/mqueue.c linux-4.9.217-vs2.3.9.12/ipc/mqueue.c
12664 --- linux-4.9.217/ipc/mqueue.c 2020-03-27 00:51:42.820147901 +0000
12665 +++ linux-4.9.217-vs2.3.9.12/ipc/mqueue.c 2019-10-05 15:01:19.537840241 +0000
12667 #include <linux/ipc_namespace.h>
12668 #include <linux/user_namespace.h>
12669 #include <linux/slab.h>
12670 +#include <linux/vs_context.h>
12671 +#include <linux/vs_limit.h>
12673 #include <net/sock.h>
12675 @@ -75,6 +77,7 @@ struct mqueue_inode_info {
12676 struct pid *notify_owner;
12677 struct user_namespace *notify_user_ns;
12678 struct user_struct *user; /* user who created, for accounting */
12679 + struct vx_info *vxi;
12680 struct sock *notify_sock;
12681 struct sk_buff *notify_cookie;
12683 @@ -230,6 +233,7 @@ static struct inode *mqueue_get_inode(st
12684 if (S_ISREG(mode)) {
12685 struct mqueue_inode_info *info;
12686 unsigned long mq_bytes, mq_treesize;
12687 + struct vx_info *vxi = current_vx_info();
12689 inode->i_fop = &mqueue_file_operations;
12690 inode->i_size = FILENT_SIZE;
12691 @@ -243,6 +247,7 @@ static struct inode *mqueue_get_inode(st
12692 info->notify_user_ns = NULL;
12694 info->user = NULL; /* set when all is ok */
12695 + info->vxi = NULL;
12696 info->msg_tree = RB_ROOT;
12697 info->node_cache = NULL;
12698 memset(&info->attr, 0, sizeof(info->attr));
12699 @@ -276,17 +281,20 @@ static struct inode *mqueue_get_inode(st
12701 spin_lock(&mq_lock);
12702 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
12703 - u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
12704 + u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE) ||
12705 + !vx_ipcmsg_avail(vxi, mq_bytes)) {
12706 spin_unlock(&mq_lock);
12707 /* mqueue_evict_inode() releases info->messages */
12711 u->mq_bytes += mq_bytes;
12712 + vx_ipcmsg_add(vxi, u, mq_bytes);
12713 spin_unlock(&mq_lock);
12716 info->user = get_uid(u);
12717 + info->vxi = get_vx_info(vxi);
12718 } else if (S_ISDIR(mode)) {
12720 /* Some things misbehave if size == 0 on a directory */
12721 @@ -393,6 +401,7 @@ static void mqueue_evict_inode(struct in
12725 + struct vx_info *vxi = info->vxi;
12726 unsigned long mq_bytes, mq_treesize;
12728 /* Total amount of bytes accounted for the mqueue */
12729 @@ -405,6 +414,7 @@ static void mqueue_evict_inode(struct in
12731 spin_lock(&mq_lock);
12732 user->mq_bytes -= mq_bytes;
12733 + vx_ipcmsg_sub(vxi, user, mq_bytes);
12735 * get_ns_from_inode() ensures that the
12736 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
12737 @@ -414,6 +424,7 @@ static void mqueue_evict_inode(struct in
12739 ipc_ns->mq_queues_count--;
12740 spin_unlock(&mq_lock);
12741 + put_vx_info(vxi);
12745 diff -NurpP --minimal linux-4.9.217/ipc/msg.c linux-4.9.217-vs2.3.9.12/ipc/msg.c
12746 --- linux-4.9.217/ipc/msg.c 2020-03-27 00:51:42.850147429 +0000
12747 +++ linux-4.9.217-vs2.3.9.12/ipc/msg.c 2018-10-20 04:58:14.000000000 +0000
12749 #include <linux/rwsem.h>
12750 #include <linux/nsproxy.h>
12751 #include <linux/ipc_namespace.h>
12752 +#include <linux/vs_base.h>
12754 #include <asm/current.h>
12755 #include <linux/uaccess.h>
12756 @@ -124,6 +125,7 @@ static int newque(struct ipc_namespace *
12758 msq->q_perm.mode = msgflg & S_IRWXUGO;
12759 msq->q_perm.key = key;
12760 + msq->q_perm.xid = vx_current_xid();
12762 msq->q_perm.security = NULL;
12763 retval = security_msg_queue_alloc(msq);
12764 diff -NurpP --minimal linux-4.9.217/ipc/namespace.c linux-4.9.217-vs2.3.9.12/ipc/namespace.c
12765 --- linux-4.9.217/ipc/namespace.c 2016-12-11 19:17:54.000000000 +0000
12766 +++ linux-4.9.217-vs2.3.9.12/ipc/namespace.c 2018-10-20 04:58:14.000000000 +0000
12768 #include <linux/mount.h>
12769 #include <linux/user_namespace.h>
12770 #include <linux/proc_ns.h>
12771 +#include <linux/vserver/global.h>
12775 @@ -59,6 +60,7 @@ static struct ipc_namespace *create_ipc_
12779 + atomic_inc(&vs_global_ipc_ns);
12783 @@ -121,6 +123,7 @@ static void free_ipc_ns(struct ipc_names
12784 dec_ipc_namespaces(ns->ucounts);
12785 put_user_ns(ns->user_ns);
12786 ns_free_inum(&ns->ns);
12787 + atomic_dec(&vs_global_ipc_ns);
12791 diff -NurpP --minimal linux-4.9.217/ipc/sem.c linux-4.9.217-vs2.3.9.12/ipc/sem.c
12792 --- linux-4.9.217/ipc/sem.c 2020-03-27 00:51:42.870147113 +0000
12793 +++ linux-4.9.217-vs2.3.9.12/ipc/sem.c 2020-04-01 09:40:32.455401832 +0000
12795 #include <linux/rwsem.h>
12796 #include <linux/nsproxy.h>
12797 #include <linux/ipc_namespace.h>
12798 +#include <linux/vs_base.h>
12799 +#include <linux/vs_limit.h>
12801 #include <linux/uaccess.h>
12803 @@ -537,6 +539,7 @@ static int newary(struct ipc_namespace *
12805 sma->sem_perm.mode = (semflg & S_IRWXUGO);
12806 sma->sem_perm.key = key;
12807 + sma->sem_perm.xid = vx_current_xid();
12809 sma->sem_perm.security = NULL;
12810 retval = security_sem_alloc(sma);
12811 @@ -567,6 +570,9 @@ static int newary(struct ipc_namespace *
12814 ns->used_sems += nsems;
12815 + /* FIXME: obsoleted? */
12816 + vx_semary_inc(sma);
12817 + vx_nsems_add(sma, nsems);
12819 sem_unlock(sma, -1);
12821 @@ -1155,6 +1161,9 @@ static void freeary(struct ipc_namespace
12823 wake_up_sem_queue_do(&tasks);
12824 ns->used_sems -= sma->sem_nsems;
12825 + /* FIXME: obsoleted? */
12826 + vx_nsems_sub(sma, sma->sem_nsems);
12827 + vx_semary_dec(sma);
12828 ipc_rcu_putref(sma, sem_rcu_free);
12831 diff -NurpP --minimal linux-4.9.217/ipc/shm.c linux-4.9.217-vs2.3.9.12/ipc/shm.c
12832 --- linux-4.9.217/ipc/shm.c 2020-03-27 00:51:42.870147113 +0000
12833 +++ linux-4.9.217-vs2.3.9.12/ipc/shm.c 2018-10-20 04:58:14.000000000 +0000
12835 #include <linux/nsproxy.h>
12836 #include <linux/mount.h>
12837 #include <linux/ipc_namespace.h>
12838 +#include <linux/vs_context.h>
12839 +#include <linux/vs_limit.h>
12841 #include <linux/uaccess.h>
12843 @@ -234,10 +236,14 @@ static void shm_open(struct vm_area_stru
12844 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
12846 struct file *shm_file;
12847 + struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
12848 + int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
12850 shm_file = shp->shm_file;
12851 shp->shm_file = NULL;
12852 - ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
12853 + vx_ipcshm_sub(vxi, shp, numpages);
12854 + ns->shm_tot -= numpages;
12858 if (!is_file_hugepages(shm_file))
12859 @@ -246,6 +252,7 @@ static void shm_destroy(struct ipc_names
12860 user_shm_unlock(i_size_read(file_inode(shm_file)),
12863 + put_vx_info(vxi);
12864 ipc_rcu_putref(shp, shm_rcu_free);
12867 @@ -559,11 +566,15 @@ static int newseg(struct ipc_namespace *
12868 ns->shm_tot + numpages > ns->shm_ctlall)
12871 + if (!vx_ipcshm_avail(current_vx_info(), numpages))
12874 shp = ipc_rcu_alloc(sizeof(*shp));
12878 shp->shm_perm.key = key;
12879 + shp->shm_perm.xid = vx_current_xid();
12880 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
12881 shp->mlock_user = NULL;
12883 @@ -634,6 +645,7 @@ static int newseg(struct ipc_namespace *
12885 ipc_unlock_object(&shp->shm_perm);
12887 + vx_ipcshm_add(current_vx_info(), key, numpages);
12891 diff -NurpP --minimal linux-4.9.217/kernel/auditsc.c linux-4.9.217-vs2.3.9.12/kernel/auditsc.c
12892 --- linux-4.9.217/kernel/auditsc.c 2020-03-27 00:51:42.950145853 +0000
12893 +++ linux-4.9.217-vs2.3.9.12/kernel/auditsc.c 2019-12-25 15:37:52.348423568 +0000
12894 @@ -1967,7 +1967,7 @@ static int audit_set_loginuid_perm(kuid_
12895 if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
12897 /* it is set, you need permission */
12898 - if (!capable(CAP_AUDIT_CONTROL))
12899 + if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
12901 /* reject if this is not an unset and we don't allow that */
12902 if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
12903 diff -NurpP --minimal linux-4.9.217/kernel/capability.c linux-4.9.217-vs2.3.9.12/kernel/capability.c
12904 --- linux-4.9.217/kernel/capability.c 2020-03-27 00:51:43.080143807 +0000
12905 +++ linux-4.9.217-vs2.3.9.12/kernel/capability.c 2018-10-20 04:58:14.000000000 +0000
12907 #include <linux/syscalls.h>
12908 #include <linux/pid_namespace.h>
12909 #include <linux/user_namespace.h>
12910 +#include <linux/vs_context.h>
12911 #include <asm/uaccess.h>
12914 @@ -107,6 +108,7 @@ static int cap_validate_magic(cap_user_h
12920 * The only thing that can change the capabilities of the current
12921 * process is the current process. As such, we can't be in this code
12922 @@ -344,6 +346,8 @@ bool has_ns_capability_noaudit(struct ta
12926 +#include <linux/vserver/base.h>
12929 * has_capability_noaudit - Does a task have a capability (unaudited) in the
12931 diff -NurpP --minimal linux-4.9.217/kernel/compat.c linux-4.9.217-vs2.3.9.12/kernel/compat.c
12932 --- linux-4.9.217/kernel/compat.c 2016-12-11 19:17:54.000000000 +0000
12933 +++ linux-4.9.217-vs2.3.9.12/kernel/compat.c 2018-10-20 04:58:14.000000000 +0000
12935 #include <linux/times.h>
12936 #include <linux/ptrace.h>
12937 #include <linux/gfp.h>
12938 +#include <linux/vs_time.h>
12940 #include <asm/uaccess.h>
12942 @@ -1059,7 +1060,7 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_tim
12946 - do_settimeofday(&tv);
12947 + vx_settimeofday(&tv);
12951 diff -NurpP --minimal linux-4.9.217/kernel/cred.c linux-4.9.217-vs2.3.9.12/kernel/cred.c
12952 --- linux-4.9.217/kernel/cred.c 2020-03-27 00:51:43.110143330 +0000
12953 +++ linux-4.9.217-vs2.3.9.12/kernel/cred.c 2020-04-01 09:40:32.475401502 +0000
12954 @@ -64,31 +64,6 @@ struct cred init_cred = {
12955 .group_info = &init_groups,
12958 -static inline void set_cred_subscribers(struct cred *cred, int n)
12960 -#ifdef CONFIG_DEBUG_CREDENTIALS
12961 - atomic_set(&cred->subscribers, n);
12965 -static inline int read_cred_subscribers(const struct cred *cred)
12967 -#ifdef CONFIG_DEBUG_CREDENTIALS
12968 - return atomic_read(&cred->subscribers);
12974 -static inline void alter_cred_subscribers(const struct cred *_cred, int n)
12976 -#ifdef CONFIG_DEBUG_CREDENTIALS
12977 - struct cred *cred = (struct cred *) _cred;
12979 - atomic_add(n, &cred->subscribers);
12984 * The RCU callback to actually dispose of a set of credentials
12986 @@ -243,21 +218,16 @@ error:
12988 * Call commit_creds() or abort_creds() to clean up.
12990 -struct cred *prepare_creds(void)
12991 +struct cred *__prepare_creds(const struct cred *old)
12993 - struct task_struct *task = current;
12994 - const struct cred *old;
12997 - validate_process_creds();
12999 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
13003 kdebug("prepare_creds() alloc %p", new);
13005 - old = task->cred;
13006 memcpy(new, old, sizeof(struct cred));
13009 @@ -287,6 +257,13 @@ error:
13014 +struct cred *prepare_creds(void)
13016 + validate_process_creds();
13018 + return __prepare_creds(current->cred);
13020 EXPORT_SYMBOL(prepare_creds);
13023 diff -NurpP --minimal linux-4.9.217/kernel/exit.c linux-4.9.217-vs2.3.9.12/kernel/exit.c
13024 --- linux-4.9.217/kernel/exit.c 2020-03-27 00:51:43.250141127 +0000
13025 +++ linux-4.9.217-vs2.3.9.12/kernel/exit.c 2019-02-24 12:44:58.353750946 +0000
13027 #include <linux/fs_struct.h>
13028 #include <linux/init_task.h>
13029 #include <linux/perf_event.h>
13030 +#include <linux/vs_limit.h>
13031 +#include <linux/vs_context.h>
13032 +#include <linux/vs_network.h>
13033 +#include <linux/vs_pid.h>
13034 #include <trace/events/sched.h>
13035 #include <linux/hw_breakpoint.h>
13036 #include <linux/oom.h>
13037 @@ -532,15 +536,25 @@ static struct task_struct *find_child_re
13039 struct pid_namespace *pid_ns = task_active_pid_ns(father);
13040 struct task_struct *reaper = pid_ns->child_reaper;
13041 + struct vx_info *vxi = task_get_vx_info(father);
13042 struct task_struct *p, *n;
13045 + BUG_ON(!vxi->vx_reaper);
13046 + if (vxi->vx_reaper != init_pid_ns.child_reaper &&
13047 + vxi->vx_reaper != father) {
13048 + reaper = vxi->vx_reaper;
13053 if (likely(reaper != father))
13057 reaper = find_alive_thread(father);
13059 pid_ns->child_reaper = reaper;
13064 write_unlock_irq(&tasklist_lock);
13065 @@ -557,7 +571,10 @@ static struct task_struct *find_child_re
13066 zap_pid_ns_processes(pid_ns);
13067 write_lock_irq(&tasklist_lock);
13072 + put_vx_info(vxi);
13077 @@ -645,9 +662,13 @@ static void forget_original_parent(struc
13080 reaper = find_new_reaper(father, reaper);
13081 - list_for_each_entry(p, &father->children, sibling) {
13082 + for (p = list_first_entry(&father->children, struct task_struct, sibling);
13083 + &p->sibling != &father->children; ) {
13084 + struct task_struct *next, *this_reaper = reaper;
13086 + this_reaper = task_active_pid_ns(reaper)->child_reaper;
13087 for_each_thread(p, t) {
13088 - t->real_parent = reaper;
13089 + t->real_parent = this_reaper;
13090 BUG_ON((!t->ptrace) != (t->parent == father));
13091 if (likely(!t->ptrace))
13092 t->parent = t->real_parent;
13093 @@ -659,10 +680,13 @@ static void forget_original_parent(struc
13094 * If this is a threaded reparent there is no need to
13095 * notify anyone anything has happened.
13097 - if (!same_thread_group(reaper, father))
13098 + if (!same_thread_group(this_reaper, father))
13099 reparent_leader(father, p, dead);
13100 + next = list_next_entry(p, sibling);
13101 + list_add(&p->sibling, &this_reaper->children);
13104 - list_splice_tail_init(&father->children, &reaper->children);
13105 + INIT_LIST_HEAD(&father->children);
13109 @@ -852,6 +876,9 @@ void __noreturn do_exit(long code)
13111 flush_ptrace_hw_breakpoint(tsk);
13113 + /* needs to stay before exit_notify() */
13114 + exit_vx_info_early(tsk, code);
13116 TASKS_RCU(preempt_disable());
13117 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
13118 TASKS_RCU(preempt_enable());
13119 @@ -884,6 +911,10 @@ void __noreturn do_exit(long code)
13121 validate_creds_for_do_exit(tsk);
13123 + /* needs to stay after exit_notify() and before preempt_disable() */
13124 + exit_vx_info(tsk, code);
13125 + exit_nx_info(tsk);
13127 check_stack_usage();
13129 if (tsk->nr_dirtied)
13130 diff -NurpP --minimal linux-4.9.217/kernel/fork.c linux-4.9.217-vs2.3.9.12/kernel/fork.c
13131 --- linux-4.9.217/kernel/fork.c 2020-03-27 00:51:43.250141127 +0000
13132 +++ linux-4.9.217-vs2.3.9.12/kernel/fork.c 2019-10-22 13:47:05.619629084 +0000
13134 #include <linux/compiler.h>
13135 #include <linux/sysctl.h>
13136 #include <linux/kcov.h>
13137 +#include <linux/vs_context.h>
13138 +#include <linux/vs_network.h>
13139 +#include <linux/vs_limit.h>
13141 #include <asm/pgtable.h>
13142 #include <asm/pgalloc.h>
13143 @@ -356,6 +359,8 @@ void free_task(struct task_struct *tsk)
13144 WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
13146 rt_mutex_debug_task_free(tsk);
13147 + clr_vx_info(&tsk->vx_info);
13148 + clr_nx_info(&tsk->nx_info);
13149 ftrace_graph_exit_task(tsk);
13150 put_seccomp_filter(tsk);
13151 arch_release_task_struct(tsk);
13152 @@ -1480,6 +1485,8 @@ static __latent_entropy struct task_stru
13155 struct task_struct *p;
13156 + struct vx_info *vxi;
13157 + struct nx_info *nxi;
13159 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
13160 return ERR_PTR(-EINVAL);
13161 @@ -1552,7 +1559,12 @@ static __latent_entropy struct task_stru
13162 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
13163 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
13165 + init_vx_info(&p->vx_info, current_vx_info());
13166 + init_nx_info(&p->nx_info, current_nx_info());
13169 + if (!vx_nproc_avail(1))
13170 + goto bad_fork_free;
13171 if (atomic_read(&p->real_cred->user->processes) >=
13172 task_rlimit(p, RLIMIT_NPROC)) {
13173 if (p->real_cred->user != INIT_USER &&
13174 @@ -1853,6 +1865,18 @@ static __latent_entropy struct task_stru
13176 spin_unlock(¤t->sighand->siglock);
13177 syscall_tracepoint_update(p);
13179 + /* p is copy of current */
13180 + vxi = p->vx_info;
13182 + claim_vx_info(vxi, p);
13183 + atomic_inc(&vxi->cvirt.nr_threads);
13184 + atomic_inc(&vxi->cvirt.total_forks);
13187 + nxi = p->nx_info;
13189 + claim_nx_info(nxi, p);
13190 write_unlock_irq(&tasklist_lock);
13192 proc_fork_connector(p);
13193 diff -NurpP --minimal linux-4.9.217/kernel/kthread.c linux-4.9.217-vs2.3.9.12/kernel/kthread.c
13194 --- linux-4.9.217/kernel/kthread.c 2020-03-27 00:51:43.440138133 +0000
13195 +++ linux-4.9.217-vs2.3.9.12/kernel/kthread.c 2018-10-20 05:55:43.000000000 +0000
13197 #include <linux/ptrace.h>
13198 #include <linux/uaccess.h>
13199 #include <linux/cgroup.h>
13200 +#include <linux/vs_pid.h>
13201 #include <trace/events/sched.h>
13203 static DEFINE_SPINLOCK(kthread_create_lock);
13204 diff -NurpP --minimal linux-4.9.217/kernel/Makefile linux-4.9.217-vs2.3.9.12/kernel/Makefile
13205 --- linux-4.9.217/kernel/Makefile 2020-03-27 00:51:42.890146797 +0000
13206 +++ linux-4.9.217-vs2.3.9.12/kernel/Makefile 2019-10-05 14:58:46.030301805 +0000
13207 @@ -40,6 +40,7 @@ obj-y += printk/
13210 obj-y += livepatch/
13213 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
13214 obj-$(CONFIG_FREEZER) += freezer.o
13215 diff -NurpP --minimal linux-4.9.217/kernel/nsproxy.c linux-4.9.217-vs2.3.9.12/kernel/nsproxy.c
13216 --- linux-4.9.217/kernel/nsproxy.c 2016-12-11 19:17:54.000000000 +0000
13217 +++ linux-4.9.217-vs2.3.9.12/kernel/nsproxy.c 2018-10-20 04:58:14.000000000 +0000
13218 @@ -20,12 +20,15 @@
13219 #include <linux/mnt_namespace.h>
13220 #include <linux/utsname.h>
13221 #include <linux/pid_namespace.h>
13222 +#include <linux/vserver/global.h>
13223 +#include <linux/vserver/debug.h>
13224 #include <net/net_namespace.h>
13225 #include <linux/ipc_namespace.h>
13226 #include <linux/proc_ns.h>
13227 #include <linux/file.h>
13228 #include <linux/syscalls.h>
13229 #include <linux/cgroup.h>
13230 +#include "../fs/mount.h"
13232 static struct kmem_cache *nsproxy_cachep;
13234 @@ -50,8 +53,11 @@ static inline struct nsproxy *create_nsp
13235 struct nsproxy *nsproxy;
13237 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
13240 atomic_set(&nsproxy->count, 1);
13241 + atomic_inc(&vs_global_nsproxy);
13243 + vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
13247 @@ -60,9 +66,12 @@ static inline struct nsproxy *create_nsp
13248 * Return the newly created nsproxy. Do not attach this to the task,
13249 * leave it to the caller to do proper locking and attach it to task.
13251 -static struct nsproxy *create_new_namespaces(unsigned long flags,
13252 - struct task_struct *tsk, struct user_namespace *user_ns,
13253 - struct fs_struct *new_fs)
13254 +static struct nsproxy *unshare_namespaces(
13255 + unsigned long flags,
13256 + struct nsproxy *orig,
13257 + struct fs_struct *new_fs,
13258 + struct user_namespace *new_user,
13259 + struct pid_namespace *new_pid)
13261 struct nsproxy *new_nsp;
13263 @@ -71,39 +80,37 @@ static struct nsproxy *create_new_namesp
13265 return ERR_PTR(-ENOMEM);
13267 - new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
13268 + new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_user, new_fs);
13269 if (IS_ERR(new_nsp->mnt_ns)) {
13270 err = PTR_ERR(new_nsp->mnt_ns);
13274 - new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
13275 + new_nsp->uts_ns = copy_utsname(flags, new_user, orig->uts_ns);
13276 if (IS_ERR(new_nsp->uts_ns)) {
13277 err = PTR_ERR(new_nsp->uts_ns);
13281 - new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
13282 + new_nsp->ipc_ns = copy_ipcs(flags, new_user, orig->ipc_ns);
13283 if (IS_ERR(new_nsp->ipc_ns)) {
13284 err = PTR_ERR(new_nsp->ipc_ns);
13288 - new_nsp->pid_ns_for_children =
13289 - copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
13290 + new_nsp->pid_ns_for_children = copy_pid_ns(flags, new_user, new_pid);
13291 if (IS_ERR(new_nsp->pid_ns_for_children)) {
13292 err = PTR_ERR(new_nsp->pid_ns_for_children);
13296 - new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
13297 - tsk->nsproxy->cgroup_ns);
13298 + new_nsp->cgroup_ns = copy_cgroup_ns(flags, new_user, orig->cgroup_ns);
13299 if (IS_ERR(new_nsp->cgroup_ns)) {
13300 err = PTR_ERR(new_nsp->cgroup_ns);
13304 - new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
13305 + new_nsp->net_ns = copy_net_ns(flags, new_user, orig->net_ns);
13306 if (IS_ERR(new_nsp->net_ns)) {
13307 err = PTR_ERR(new_nsp->net_ns);
13309 @@ -130,6 +137,43 @@ out_ns:
13310 return ERR_PTR(err);
13313 +static struct nsproxy *create_new_namespaces(unsigned long flags,
13314 + struct task_struct *tsk, struct user_namespace *user_ns,
13315 + struct fs_struct *new_fs)
13318 + return unshare_namespaces(flags, tsk->nsproxy,
13319 + new_fs, user_ns, task_active_pid_ns(tsk));
13323 + * copies the nsproxy, setting refcount to 1, and grabbing a
13324 + * reference to all contained namespaces.
13326 +struct nsproxy *copy_nsproxy(struct nsproxy *orig)
13328 + struct nsproxy *ns = create_nsproxy();
13331 + memcpy(ns, orig, sizeof(struct nsproxy));
13332 + atomic_set(&ns->count, 1);
13335 + get_mnt_ns(ns->mnt_ns);
13337 + get_uts_ns(ns->uts_ns);
13339 + get_ipc_ns(ns->ipc_ns);
13340 + if (ns->pid_ns_for_children)
13341 + get_pid_ns(ns->pid_ns_for_children);
13343 + get_net(ns->net_ns);
13344 + if (ns->cgroup_ns)
13345 + get_cgroup_ns(ns->cgroup_ns);
13351 * called from clone. This now handles copy for nsproxy and all
13352 * namespaces therein.
13353 @@ -138,7 +182,10 @@ int copy_namespaces(unsigned long flags,
13355 struct nsproxy *old_ns = tsk->nsproxy;
13356 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
13357 - struct nsproxy *new_ns;
13358 + struct nsproxy *new_ns = NULL;
13360 + vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
13361 + flags, tsk, old_ns);
13363 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
13364 CLONE_NEWPID | CLONE_NEWNET |
13365 @@ -147,7 +194,7 @@ int copy_namespaces(unsigned long flags,
13369 - if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13370 + if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, flags))
13374 @@ -166,6 +213,9 @@ int copy_namespaces(unsigned long flags,
13375 return PTR_ERR(new_ns);
13377 tsk->nsproxy = new_ns;
13378 + vxdprintk(VXD_CBIT(space, 3),
13379 + "copy_namespaces(0x%08lx,%p[%p]) = [%p]",
13380 + flags, tsk, old_ns, new_ns);
13384 @@ -179,8 +229,10 @@ void free_nsproxy(struct nsproxy *ns)
13385 put_ipc_ns(ns->ipc_ns);
13386 if (ns->pid_ns_for_children)
13387 put_pid_ns(ns->pid_ns_for_children);
13389 + put_net(ns->net_ns);
13390 put_cgroup_ns(ns->cgroup_ns);
13391 - put_net(ns->net_ns);
13392 + atomic_dec(&vs_global_nsproxy);
13393 kmem_cache_free(nsproxy_cachep, ns);
13396 @@ -194,12 +246,16 @@ int unshare_nsproxy_namespaces(unsigned
13397 struct user_namespace *user_ns;
13400 + vxdprintk(VXD_CBIT(space, 4),
13401 + "unshare_nsproxy_namespaces(0x%08lx,[%p])",
13402 + unshare_flags, current->nsproxy);
13404 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
13405 CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
13408 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
13409 - if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13410 + if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, unshare_flags))
13413 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
13414 diff -NurpP --minimal linux-4.9.217/kernel/pid.c linux-4.9.217-vs2.3.9.12/kernel/pid.c
13415 --- linux-4.9.217/kernel/pid.c 2020-03-27 00:51:43.530136712 +0000
13416 +++ linux-4.9.217-vs2.3.9.12/kernel/pid.c 2018-10-20 04:58:14.000000000 +0000
13418 #include <linux/syscalls.h>
13419 #include <linux/proc_ns.h>
13420 #include <linux/proc_fs.h>
13421 +#include <linux/vs_pid.h>
13423 #define pid_hashfn(nr, ns) \
13424 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
13425 @@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
13427 struct pid *find_vpid(int nr)
13429 - return find_pid_ns(nr, task_active_pid_ns(current));
13430 + return find_pid_ns(vx_rmap_pid(nr), task_active_pid_ns(current));
13432 EXPORT_SYMBOL_GPL(find_vpid);
13434 @@ -437,6 +438,9 @@ void transfer_pid(struct task_struct *ol
13435 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
13437 struct task_struct *result = NULL;
13439 + if (type == __PIDTYPE_REALPID)
13440 + type = PIDTYPE_PID;
13442 struct hlist_node *first;
13443 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
13444 @@ -455,7 +459,7 @@ struct task_struct *find_task_by_pid_ns(
13446 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
13447 "find_task_by_pid_ns() needs rcu_read_lock() protection");
13448 - return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
13449 + return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
13452 struct task_struct *find_task_by_vpid(pid_t vnr)
13453 @@ -499,7 +503,7 @@ struct pid *find_get_pid(pid_t nr)
13455 EXPORT_SYMBOL_GPL(find_get_pid);
13457 -pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13458 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
13462 @@ -513,6 +517,11 @@ pid_t pid_nr_ns(struct pid *pid, struct
13464 EXPORT_SYMBOL_GPL(pid_nr_ns);
13466 +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13468 + return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
13471 pid_t pid_vnr(struct pid *pid)
13473 return pid_nr_ns(pid, task_active_pid_ns(current));
13474 diff -NurpP --minimal linux-4.9.217/kernel/pid_namespace.c linux-4.9.217-vs2.3.9.12/kernel/pid_namespace.c
13475 --- linux-4.9.217/kernel/pid_namespace.c 2020-03-27 00:51:43.530136712 +0000
13476 +++ linux-4.9.217-vs2.3.9.12/kernel/pid_namespace.c 2019-10-05 14:58:46.030301805 +0000
13478 #include <linux/proc_ns.h>
13479 #include <linux/reboot.h>
13480 #include <linux/export.h>
13481 +#include <linux/vserver/global.h>
13485 @@ -124,6 +125,7 @@ static struct pid_namespace *create_pid_
13486 ns->ns.ops = &pidns_operations;
13488 kref_init(&ns->kref);
13489 + atomic_inc(&vs_global_pid_ns);
13491 ns->parent = get_pid_ns(parent_pid_ns);
13492 ns->user_ns = get_user_ns(user_ns);
13493 @@ -142,6 +144,7 @@ static struct pid_namespace *create_pid_
13495 kfree(ns->pidmap[0].page);
13497 + atomic_dec(&vs_global_pid_ns);
13498 kmem_cache_free(pid_ns_cachep, ns);
13500 dec_pid_namespaces(ucounts);
13501 diff -NurpP --minimal linux-4.9.217/kernel/printk/printk.c linux-4.9.217-vs2.3.9.12/kernel/printk/printk.c
13502 --- linux-4.9.217/kernel/printk/printk.c 2020-03-27 00:51:43.630135138 +0000
13503 +++ linux-4.9.217-vs2.3.9.12/kernel/printk/printk.c 2019-12-25 15:37:52.568420017 +0000
13505 #include <linux/utsname.h>
13506 #include <linux/ctype.h>
13507 #include <linux/uio.h>
13508 +#include <linux/vs_cvirt.h>
13510 #include <asm/uaccess.h>
13511 #include <asm/sections.h>
13512 @@ -611,7 +612,7 @@ int check_syslog_permissions(int type, i
13515 if (syslog_action_restricted(type)) {
13516 - if (capable(CAP_SYSLOG))
13517 + if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
13520 * For historical reasons, accept CAP_SYS_ADMIN too, with
13521 @@ -1398,12 +1399,9 @@ int do_syslog(int type, char __user *buf
13526 - case SYSLOG_ACTION_CLOSE: /* Close log */
13528 - case SYSLOG_ACTION_OPEN: /* Open log */
13530 - case SYSLOG_ACTION_READ: /* Read from log */
13531 + if ((type == SYSLOG_ACTION_READ) ||
13532 + (type == SYSLOG_ACTION_READ_ALL) ||
13533 + (type == SYSLOG_ACTION_READ_CLEAR)) {
13535 if (!buf || len < 0)
13537 @@ -1414,6 +1412,16 @@ int do_syslog(int type, char __user *buf
13542 + if (!vx_check(0, VS_ADMIN|VS_WATCH))
13543 + return vx_do_syslog(type, buf, len);
13546 + case SYSLOG_ACTION_CLOSE: /* Close log */
13548 + case SYSLOG_ACTION_OPEN: /* Open log */
13550 + case SYSLOG_ACTION_READ: /* Read from log */
13551 error = wait_event_interruptible(log_wait,
13552 syslog_seq != log_next_seq);
13554 @@ -1426,16 +1434,6 @@ int do_syslog(int type, char __user *buf
13556 /* Read last kernel messages */
13557 case SYSLOG_ACTION_READ_ALL:
13559 - if (!buf || len < 0)
13564 - if (!access_ok(VERIFY_WRITE, buf, len)) {
13568 error = syslog_print_all(buf, len, clear);
13570 /* Clear ring buffer */
13571 diff -NurpP --minimal linux-4.9.217/kernel/ptrace.c linux-4.9.217-vs2.3.9.12/kernel/ptrace.c
13572 --- linux-4.9.217/kernel/ptrace.c 2020-03-27 00:51:43.630135138 +0000
13573 +++ linux-4.9.217-vs2.3.9.12/kernel/ptrace.c 2019-10-05 14:59:56.929168738 +0000
13575 #include <linux/syscalls.h>
13576 #include <linux/uaccess.h>
13577 #include <linux/regset.h>
13578 +#include <linux/vs_context.h>
13579 #include <linux/hw_breakpoint.h>
13580 #include <linux/cn_proc.h>
13581 #include <linux/compat.h>
13582 @@ -336,6 +337,11 @@ ok:
13583 !ptrace_has_cap(mm->user_ns, mode)))
13586 + if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
13588 + if (!vx_check(task->xid, VS_IDENT) &&
13589 + !task_vx_flags(task, VXF_STATE_ADMIN, 0))
13591 if (mode & PTRACE_MODE_SCHED)
13593 return security_ptrace_access_check(task, mode);
13594 diff -NurpP --minimal linux-4.9.217/kernel/reboot.c linux-4.9.217-vs2.3.9.12/kernel/reboot.c
13595 --- linux-4.9.217/kernel/reboot.c 2016-12-11 19:17:54.000000000 +0000
13596 +++ linux-4.9.217-vs2.3.9.12/kernel/reboot.c 2018-10-20 04:58:15.000000000 +0000
13598 #include <linux/syscalls.h>
13599 #include <linux/syscore_ops.h>
13600 #include <linux/uaccess.h>
13601 +#include <linux/vs_pid.h>
13604 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
13605 @@ -269,6 +270,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
13607 static DEFINE_MUTEX(reboot_mutex);
13609 +long vs_reboot(unsigned int, void __user *);
13612 * Reboot system call: for obvious reasons only root may call it,
13613 * and even root needs to set up some magic numbers in the registers
13614 @@ -311,6 +314,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
13615 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
13616 cmd = LINUX_REBOOT_CMD_HALT;
13618 + if (!vx_check(0, VS_ADMIN|VS_WATCH))
13619 + return vs_reboot(cmd, arg);
13621 mutex_lock(&reboot_mutex);
13623 case LINUX_REBOOT_CMD_RESTART:
13624 diff -NurpP --minimal linux-4.9.217/kernel/sched/core.c linux-4.9.217-vs2.3.9.12/kernel/sched/core.c
13625 --- linux-4.9.217/kernel/sched/core.c 2020-03-27 00:51:43.660134665 +0000
13626 +++ linux-4.9.217-vs2.3.9.12/kernel/sched/core.c 2019-10-22 13:47:05.639628767 +0000
13628 #include <linux/compiler.h>
13629 #include <linux/frame.h>
13630 #include <linux/prefetch.h>
13631 +#include <linux/vs_sched.h>
13632 +#include <linux/vs_cvirt.h>
13634 #include <asm/switch_to.h>
13635 #include <asm/tlb.h>
13636 @@ -3431,6 +3433,7 @@ void __noreturn do_task_dead(void)
13637 __set_current_state(TASK_DEAD);
13638 current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
13640 + printk("bad task: %p [%lx]\n", current, current->state);
13642 /* Avoid "noreturn function does return". */
13644 @@ -3824,7 +3827,7 @@ SYSCALL_DEFINE1(nice, int, increment)
13646 nice = clamp_val(nice, MIN_NICE, MAX_NICE);
13647 if (increment < 0 && !can_nice(current, nice))
13649 + return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
13651 retval = security_task_setnice(current, nice);
13653 diff -NurpP --minimal linux-4.9.217/kernel/sched/cputime.c linux-4.9.217-vs2.3.9.12/kernel/sched/cputime.c
13654 --- linux-4.9.217/kernel/sched/cputime.c 2020-03-27 00:51:43.690134189 +0000
13655 +++ linux-4.9.217-vs2.3.9.12/kernel/sched/cputime.c 2018-10-20 11:46:17.000000000 +0000
13657 #include <linux/kernel_stat.h>
13658 #include <linux/static_key.h>
13659 #include <linux/context_tracking.h>
13660 +#include <linux/vs_sched.h>
13662 #ifdef CONFIG_PARAVIRT
13663 #include <asm/paravirt.h>
13664 @@ -125,14 +126,17 @@ static inline void task_group_account_fi
13665 void account_user_time(struct task_struct *p, cputime_t cputime,
13666 cputime_t cputime_scaled)
13668 + struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13669 + int nice = (task_nice(p) > 0);
13672 /* Add user time to process. */
13673 p->utime += cputime;
13674 p->utimescaled += cputime_scaled;
13675 + vx_account_user(vxi, cputime, nice);
13676 account_group_user_time(p, cputime);
13678 - index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
13679 + index = (nice) ? CPUTIME_NICE : CPUTIME_USER;
13681 /* Add user time to cpustat. */
13682 task_group_account_field(p, index, cputime_to_nsecs(cputime));
13683 @@ -179,9 +183,12 @@ static inline
13684 void __account_system_time(struct task_struct *p, cputime_t cputime,
13685 cputime_t cputime_scaled, int index)
13687 + struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13689 /* Add system time to process. */
13690 p->stime += cputime;
13691 p->stimescaled += cputime_scaled;
13692 + vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
13693 account_group_system_time(p, cputime);
13695 /* Add system time to cpustat. */
13696 diff -NurpP --minimal linux-4.9.217/kernel/sched/fair.c linux-4.9.217-vs2.3.9.12/kernel/sched/fair.c
13697 --- linux-4.9.217/kernel/sched/fair.c 2020-03-27 00:51:43.690134189 +0000
13698 +++ linux-4.9.217-vs2.3.9.12/kernel/sched/fair.c 2019-12-25 15:37:52.578419856 +0000
13700 #include <linux/mempolicy.h>
13701 #include <linux/migrate.h>
13702 #include <linux/task_work.h>
13703 +#include <linux/vs_cvirt.h>
13705 #include <trace/events/sched.h>
13707 @@ -3431,6 +3432,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
13708 __enqueue_entity(cfs_rq, se);
13711 + if (entity_is_task(se))
13712 + vx_activate_task(task_of(se));
13713 if (cfs_rq->nr_running == 1) {
13714 list_add_leaf_cfs_rq(cfs_rq);
13715 check_enqueue_throttle(cfs_rq);
13716 @@ -3500,6 +3503,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
13717 if (se != cfs_rq->curr)
13718 __dequeue_entity(cfs_rq, se);
13720 + if (entity_is_task(se))
13721 + vx_deactivate_task(task_of(se));
13722 account_entity_dequeue(cfs_rq, se);
13725 diff -NurpP --minimal linux-4.9.217/kernel/sched/loadavg.c linux-4.9.217-vs2.3.9.12/kernel/sched/loadavg.c
13726 --- linux-4.9.217/kernel/sched/loadavg.c 2020-03-27 00:51:43.690134189 +0000
13727 +++ linux-4.9.217-vs2.3.9.12/kernel/sched/loadavg.c 2018-10-20 04:58:15.000000000 +0000
13728 @@ -73,9 +73,16 @@ EXPORT_SYMBOL(avenrun); /* should be rem
13730 void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
13732 - loads[0] = (avenrun[0] + offset) << shift;
13733 - loads[1] = (avenrun[1] + offset) << shift;
13734 - loads[2] = (avenrun[2] + offset) << shift;
13735 + if (vx_flags(VXF_VIRT_LOAD, 0)) {
13736 + struct vx_info *vxi = current_vx_info();
13737 + loads[0] = (vxi->cvirt.load[0] + offset) << shift;
13738 + loads[1] = (vxi->cvirt.load[1] + offset) << shift;
13739 + loads[2] = (vxi->cvirt.load[2] + offset) << shift;
13741 + loads[0] = (avenrun[0] + offset) << shift;
13742 + loads[1] = (avenrun[1] + offset) << shift;
13743 + loads[2] = (avenrun[2] + offset) << shift;
13747 long calc_load_fold_active(struct rq *this_rq, long adjust)
13748 diff -NurpP --minimal linux-4.9.217/kernel/signal.c linux-4.9.217-vs2.3.9.12/kernel/signal.c
13749 --- linux-4.9.217/kernel/signal.c 2020-03-27 00:51:43.700134034 +0000
13750 +++ linux-4.9.217-vs2.3.9.12/kernel/signal.c 2020-04-01 09:40:32.625399016 +0000
13752 #include <linux/compat.h>
13753 #include <linux/cn_proc.h>
13754 #include <linux/compiler.h>
13755 +#include <linux/vs_context.h>
13756 +#include <linux/vs_pid.h>
13758 #define CREATE_TRACE_POINTS
13759 #include <trace/events/signal.h>
13760 @@ -782,9 +784,18 @@ static int check_kill_permission(int sig
13764 + vxdprintk(VXD_CBIT(misc, 7),
13765 + "check_kill_permission(%d,%p,%p[#%u,%u])",
13766 + sig, info, t, vx_task_xid(t), t->pid);
13768 if (!valid_signal(sig))
13771 +/* FIXME: needed? if so, why?
13772 + if ((info != SEND_SIG_NOINFO) &&
13773 + (is_si_special(info) || !si_fromuser(info)))
13776 if (!si_fromuser(info))
13779 @@ -808,6 +819,20 @@ static int check_kill_permission(int sig
13784 + if (t->pid == 1 && current->xid)
13788 + /* FIXME: we shouldn't return ESRCH ever, to avoid
13789 + loops, maybe ENOENT or EACCES? */
13790 + if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
13791 + vxdprintk(current->xid || VXD_CBIT(misc, 7),
13792 + "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
13793 + sig, info, t, vx_task_xid(t), t->pid, current->xid);
13797 return security_task_kill(t, info, sig, 0);
13800 @@ -1359,8 +1384,14 @@ int kill_pid_info(int sig, struct siginf
13803 p = pid_task(pid, PIDTYPE_PID);
13805 - error = group_send_sig_info(sig, info, p);
13807 + if (vx_check(vx_task_xid(p), VS_IDENT))
13808 + error = group_send_sig_info(sig, info, p);
13810 + rcu_read_unlock();
13815 if (likely(!p || error != -ESRCH))
13817 @@ -1405,7 +1436,7 @@ int kill_pid_info_as_cred(int sig, struc
13820 p = pid_task(pid, PIDTYPE_PID);
13822 + if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
13826 @@ -1461,8 +1492,10 @@ static int kill_something_info(int sig,
13827 struct task_struct * p;
13829 for_each_process(p) {
13830 - if (task_pid_vnr(p) > 1 &&
13831 - !same_thread_group(p, current)) {
13832 + if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
13833 + task_pid_vnr(p) > 1 &&
13834 + !same_thread_group(p, current) &&
13835 + !vx_current_initpid(p->pid)) {
13836 int err = group_send_sig_info(sig, info, p);
13839 @@ -2333,6 +2366,11 @@ relock:
13840 !sig_kernel_only(signr))
13843 + /* virtual init is protected against user signals */
13844 + if ((ksig->info.si_code == SI_USER) &&
13845 + vx_current_initpid(current->pid))
13848 if (sig_kernel_stop(signr)) {
13850 * The default action is to stop all threads in
13851 diff -NurpP --minimal linux-4.9.217/kernel/softirq.c linux-4.9.217-vs2.3.9.12/kernel/softirq.c
13852 --- linux-4.9.217/kernel/softirq.c 2020-03-27 00:51:43.710133873 +0000
13853 +++ linux-4.9.217-vs2.3.9.12/kernel/softirq.c 2018-10-20 05:55:43.000000000 +0000
13855 #include <linux/smpboot.h>
13856 #include <linux/tick.h>
13857 #include <linux/irq.h>
13858 +#include <linux/vs_context.h>
13860 #define CREATE_TRACE_POINTS
13861 #include <trace/events/irq.h>
13862 diff -NurpP --minimal linux-4.9.217/kernel/sys.c linux-4.9.217-vs2.3.9.12/kernel/sys.c
13863 --- linux-4.9.217/kernel/sys.c 2020-03-27 00:51:43.730133562 +0000
13864 +++ linux-4.9.217-vs2.3.9.12/kernel/sys.c 2019-10-13 16:09:18.098106817 +0000
13866 #include <linux/nospec.h>
13868 #include <linux/kmsg_dump.h>
13869 +#include <linux/vs_pid.h>
13870 /* Move somewhere else to avoid recompiling? */
13871 #include <generated/utsrelease.h>
13873 @@ -159,7 +160,10 @@ static int set_one_prio(struct task_stru
13876 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
13878 + if (vx_flags(VXF_IGNEG_NICE, 0))
13884 no_nice = security_task_setnice(p, niceval);
13885 @@ -210,6 +214,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
13887 pgrp = task_pgrp(current);
13888 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13889 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13891 error = set_one_prio(p, niceval, error);
13892 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
13894 @@ -276,6 +282,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
13896 pgrp = task_pgrp(current);
13897 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13898 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13900 niceval = nice_to_rlimit(task_nice(p));
13901 if (niceval > retval)
13903 @@ -292,6 +300,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
13904 goto out_unlock; /* No processes for this user */
13906 do_each_thread(g, p) {
13907 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13909 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
13910 niceval = nice_to_rlimit(task_nice(p));
13911 if (niceval > retval)
13912 @@ -1211,7 +1221,8 @@ SYSCALL_DEFINE2(sethostname, char __user
13914 char tmp[__NEW_UTS_LEN];
13916 - if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13917 + if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13918 + CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13921 if (len < 0 || len > __NEW_UTS_LEN)
13922 @@ -1264,7 +1275,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
13924 char tmp[__NEW_UTS_LEN];
13926 - if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13927 + if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13928 + CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13930 if (len < 0 || len > __NEW_UTS_LEN)
13932 @@ -1384,7 +1396,7 @@ int do_prlimit(struct task_struct *tsk,
13933 /* Keep the capable check against init_user_ns until
13934 cgroups can contain all limits */
13935 if (new_rlim->rlim_max > rlim->rlim_max &&
13936 - !capable(CAP_SYS_RESOURCE))
13937 + !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13940 retval = security_task_setrlimit(tsk->group_leader,
13941 @@ -1437,7 +1449,8 @@ static int check_prlimit_permission(stru
13942 gid_eq(cred->gid, tcred->sgid) &&
13943 gid_eq(cred->gid, tcred->gid))
13945 - if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
13946 + if (vx_ns_capable(tcred->user_ns,
13947 + CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13951 @@ -2326,7 +2339,12 @@ static int do_sysinfo(struct sysinfo *in
13953 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
13955 - info->procs = nr_threads;
13956 + if (vx_flags(VXF_VIRT_LOAD, 0)) {
13957 + struct vx_info *vxi = current_vx_info();
13958 + info->procs = atomic_read(&vxi->cvirt.nr_threads);
13960 + info->procs = nr_threads;
13965 diff -NurpP --minimal linux-4.9.217/kernel/sysctl_binary.c linux-4.9.217-vs2.3.9.12/kernel/sysctl_binary.c
13966 --- linux-4.9.217/kernel/sysctl_binary.c 2016-12-11 19:17:54.000000000 +0000
13967 +++ linux-4.9.217-vs2.3.9.12/kernel/sysctl_binary.c 2018-10-20 04:58:15.000000000 +0000
13968 @@ -74,6 +74,7 @@ static const struct bin_table bin_kern_t
13970 { CTL_INT, KERN_PANIC, "panic" },
13971 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
13972 + { CTL_STR, KERN_VSHELPER, "vshelper" },
13974 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
13975 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
13976 diff -NurpP --minimal linux-4.9.217/kernel/sysctl.c linux-4.9.217-vs2.3.9.12/kernel/sysctl.c
13977 --- linux-4.9.217/kernel/sysctl.c 2020-03-27 00:51:43.730133562 +0000
13978 +++ linux-4.9.217-vs2.3.9.12/kernel/sysctl.c 2020-04-01 09:40:32.625399016 +0000
13980 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
13981 #include <linux/lockdep.h>
13983 +extern char vshelper_path[];
13984 #ifdef CONFIG_CHR_DEV_SG
13985 #include <scsi/sg.h>
13987 @@ -284,6 +285,13 @@ static int max_extfrag_threshold = 1000;
13989 static struct ctl_table kern_table[] = {
13991 + .procname = "vshelper",
13992 + .data = &vshelper_path,
13995 + .proc_handler = proc_dostring,
13998 .procname = "sched_child_runs_first",
13999 .data = &sysctl_sched_child_runs_first,
14000 .maxlen = sizeof(unsigned int),
14001 @@ -1429,7 +1437,6 @@ static struct ctl_table vm_table[] = {
14006 #endif /* CONFIG_COMPACTION */
14008 .procname = "min_free_kbytes",
14009 diff -NurpP --minimal linux-4.9.217/kernel/time/posix-timers.c linux-4.9.217-vs2.3.9.12/kernel/time/posix-timers.c
14010 --- linux-4.9.217/kernel/time/posix-timers.c 2020-03-27 00:51:43.770132929 +0000
14011 +++ linux-4.9.217-vs2.3.9.12/kernel/time/posix-timers.c 2019-02-22 08:37:55.983044969 +0000
14013 #include <linux/workqueue.h>
14014 #include <linux/export.h>
14015 #include <linux/hashtable.h>
14016 +#include <linux/vs_context.h>
14018 #include "timekeeping.h"
14020 @@ -417,6 +418,7 @@ int posix_timer_event(struct k_itimer *t
14022 struct task_struct *task;
14023 int shared, ret = -1;
14026 * FIXME: if ->sigq is queued we can race with
14027 * dequeue_signal()->do_schedule_next_timer().
14028 @@ -433,10 +435,18 @@ int posix_timer_event(struct k_itimer *t
14030 task = pid_task(timr->it_pid, PIDTYPE_PID);
14032 + struct vx_info_save vxis;
14033 + struct vx_info *vxi;
14035 + vxi = get_vx_info(task->vx_info);
14036 + enter_vx_info(vxi, &vxis);
14037 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
14038 ret = send_sigqueue(timr->sigq, task, shared);
14039 + leave_vx_info(&vxis);
14040 + put_vx_info(vxi);
14044 /* If we failed to send the signal the timer stops. */
14047 diff -NurpP --minimal linux-4.9.217/kernel/time/time.c linux-4.9.217-vs2.3.9.12/kernel/time/time.c
14048 --- linux-4.9.217/kernel/time/time.c 2020-03-27 00:51:43.780132774 +0000
14049 +++ linux-4.9.217-vs2.3.9.12/kernel/time/time.c 2018-10-20 05:55:43.000000000 +0000
14051 #include <linux/fs.h>
14052 #include <linux/math64.h>
14053 #include <linux/ptrace.h>
14054 +#include <linux/vs_time.h>
14056 #include <asm/uaccess.h>
14057 #include <asm/unistd.h>
14058 @@ -94,7 +95,7 @@ SYSCALL_DEFINE1(stime, time_t __user *,
14062 - do_settimeofday(&tv);
14063 + vx_settimeofday(&tv);
14067 @@ -187,7 +188,7 @@ int do_sys_settimeofday64(const struct t
14071 - return do_settimeofday64(tv);
14072 + return vx_settimeofday64(tv);
14076 diff -NurpP --minimal linux-4.9.217/kernel/time/timekeeping.c linux-4.9.217-vs2.3.9.12/kernel/time/timekeeping.c
14077 --- linux-4.9.217/kernel/time/timekeeping.c 2020-03-27 00:51:43.790132613 +0000
14078 +++ linux-4.9.217-vs2.3.9.12/kernel/time/timekeeping.c 2019-12-25 15:09:47.185439847 +0000
14080 #include <linux/stop_machine.h>
14081 #include <linux/pvclock_gtod.h>
14082 #include <linux/compiler.h>
14083 +#include <linux/vs_time.h>
14084 +#include <linux/vs_base.h>
14086 #include "tick-internal.h"
14087 #include "ntp_internal.h"
14088 @@ -768,6 +770,15 @@ ktime_t ktime_get_with_offset(enum tk_of
14090 } while (read_seqcount_retry(&tk_core.seq, seq));
14092 +#ifdef CONFIG_VSERVER_VTIME
14093 + if ((offs == TK_OFFS_BOOT) &&
14094 + vx_flags(VXF_VIRT_UPTIME, 0) &&
14095 + !vx_check(0, VS_ADMIN|VS_WATCH)) {
14096 + struct vx_info *vxi = current_vx_info();
14097 + ktime_t bias_uptime = timespec64_to_ktime(vxi->cvirt.bias_uptime);
14098 + base = ktime_sub(base, bias_uptime);
14101 return ktime_add_ns(base, nsecs);
14104 diff -NurpP --minimal linux-4.9.217/kernel/time/timer.c linux-4.9.217-vs2.3.9.12/kernel/time/timer.c
14105 --- linux-4.9.217/kernel/time/timer.c 2020-03-27 00:51:43.790132613 +0000
14106 +++ linux-4.9.217-vs2.3.9.12/kernel/time/timer.c 2019-10-22 13:47:05.659628449 +0000
14108 #include <linux/slab.h>
14109 #include <linux/compat.h>
14110 #include <linux/random.h>
14111 +#include <linux/vs_base.h>
14112 +#include <linux/vs_cvirt.h>
14113 +#include <linux/vs_pid.h>
14114 +#include <linux/vserver/sched.h>
14116 #include <asm/uaccess.h>
14117 #include <asm/unistd.h>
14118 diff -NurpP --minimal linux-4.9.217/kernel/user_namespace.c linux-4.9.217-vs2.3.9.12/kernel/user_namespace.c
14119 --- linux-4.9.217/kernel/user_namespace.c 2020-03-27 00:51:43.850131671 +0000
14120 +++ linux-4.9.217-vs2.3.9.12/kernel/user_namespace.c 2018-10-20 05:55:43.000000000 +0000
14122 #include <linux/ctype.h>
14123 #include <linux/projid.h>
14124 #include <linux/fs_struct.h>
14125 +#include <linux/vserver/global.h>
14127 static struct kmem_cache *user_ns_cachep __read_mostly;
14128 static DEFINE_MUTEX(userns_state_mutex);
14129 @@ -115,6 +116,7 @@ int create_user_ns(struct cred *new)
14131 atomic_set(&ns->count, 1);
14132 /* Leave the new->user_ns reference with the new user namespace. */
14133 + atomic_inc(&vs_global_user_ns);
14134 ns->parent = parent_ns;
14135 ns->level = parent_ns->level + 1;
14137 @@ -185,6 +187,7 @@ static void free_user_ns(struct work_str
14138 key_put(ns->persistent_keyring_register);
14140 ns_free_inum(&ns->ns);
14141 + atomic_dec(&vs_global_user_ns);
14142 kmem_cache_free(user_ns_cachep, ns);
14143 dec_user_namespaces(ucounts);
14145 @@ -404,6 +407,18 @@ gid_t from_kgid_munged(struct user_names
14147 EXPORT_SYMBOL(from_kgid_munged);
14149 +ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
14151 + return KTAGT_INIT(tag);
14153 +EXPORT_SYMBOL(make_ktag);
14155 +vtag_t from_ktag(struct user_namespace *to, ktag_t tag)
14157 + return __ktag_val(tag);
14159 +EXPORT_SYMBOL(from_ktag);
14162 * make_kprojid - Map a user-namespace projid pair into a kprojid.
14163 * @ns: User namespace that the projid is in
14164 diff -NurpP --minimal linux-4.9.217/kernel/utsname.c linux-4.9.217-vs2.3.9.12/kernel/utsname.c
14165 --- linux-4.9.217/kernel/utsname.c 2016-12-11 19:17:54.000000000 +0000
14166 +++ linux-4.9.217-vs2.3.9.12/kernel/utsname.c 2018-10-20 04:58:15.000000000 +0000
14168 #include <linux/slab.h>
14169 #include <linux/user_namespace.h>
14170 #include <linux/proc_ns.h>
14171 +#include <linux/vserver/global.h>
14173 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
14175 @@ -32,8 +33,10 @@ static struct uts_namespace *create_uts_
14176 struct uts_namespace *uts_ns;
14178 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
14181 kref_init(&uts_ns->kref);
14182 + atomic_inc(&vs_global_uts_ns);
14187 @@ -111,6 +114,7 @@ void free_uts_ns(struct kref *kref)
14188 dec_uts_namespaces(ns->ucounts);
14189 put_user_ns(ns->user_ns);
14190 ns_free_inum(&ns->ns);
14191 + atomic_dec(&vs_global_uts_ns);
14195 diff -NurpP --minimal linux-4.9.217/kernel/vserver/cacct.c linux-4.9.217-vs2.3.9.12/kernel/vserver/cacct.c
14196 --- linux-4.9.217/kernel/vserver/cacct.c 1970-01-01 00:00:00.000000000 +0000
14197 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/cacct.c 2018-10-20 04:58:15.000000000 +0000
14200 + * linux/kernel/vserver/cacct.c
14202 + * Virtual Server: Context Accounting
14204 + * Copyright (C) 2006-2007 Herbert P?tzl
14206 + * V0.01 added accounting stats
14210 +#include <linux/types.h>
14211 +#include <linux/vs_context.h>
14212 +#include <linux/vserver/cacct_cmd.h>
14213 +#include <linux/vserver/cacct_int.h>
14215 +#include <asm/errno.h>
14216 +#include <asm/uaccess.h>
14219 +int vc_sock_stat(struct vx_info *vxi, void __user *data)
14221 + struct vcmd_sock_stat_v0 vc_data;
14224 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
14227 + field = vc_data.field;
14228 + if ((field < 0) || (field >= VXA_SOCK_SIZE))
14231 + for (j = 0; j < 3; j++) {
14232 + vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
14233 + vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
14236 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
14241 diff -NurpP --minimal linux-4.9.217/kernel/vserver/cacct_init.h linux-4.9.217-vs2.3.9.12/kernel/vserver/cacct_init.h
14242 --- linux-4.9.217/kernel/vserver/cacct_init.h 1970-01-01 00:00:00.000000000 +0000
14243 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/cacct_init.h 2018-10-20 04:58:15.000000000 +0000
14247 +static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
14252 + for (i = 0; i < VXA_SOCK_SIZE; i++) {
14253 + for (j = 0; j < 3; j++) {
14254 + atomic_long_set(&cacct->sock[i][j].count, 0);
14255 + atomic_long_set(&cacct->sock[i][j].total, 0);
14258 + for (i = 0; i < 8; i++)
14259 + atomic_set(&cacct->slab[i], 0);
14260 + for (i = 0; i < 5; i++)
14261 + for (j = 0; j < 4; j++)
14262 + atomic_set(&cacct->page[i][j], 0);
14265 +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
14270 diff -NurpP --minimal linux-4.9.217/kernel/vserver/cacct_proc.h linux-4.9.217-vs2.3.9.12/kernel/vserver/cacct_proc.h
14271 --- linux-4.9.217/kernel/vserver/cacct_proc.h 1970-01-01 00:00:00.000000000 +0000
14272 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/cacct_proc.h 2018-10-20 04:58:15.000000000 +0000
14274 +#ifndef _VX_CACCT_PROC_H
14275 +#define _VX_CACCT_PROC_H
14277 +#include <linux/vserver/cacct_int.h>
14280 +#define VX_SOCKA_TOP \
14281 + "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n"
14283 +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
14285 + int i, j, length = 0;
14286 + static char *type[VXA_SOCK_SIZE] = {
14287 + "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
14290 + length += sprintf(buffer + length, VX_SOCKA_TOP);
14291 + for (i = 0; i < VXA_SOCK_SIZE; i++) {
14292 + length += sprintf(buffer + length, "%s:", type[i]);
14293 + for (j = 0; j < 3; j++) {
14294 + length += sprintf(buffer + length,
14295 + "\t%10lu/%-10lu",
14296 + vx_sock_count(cacct, i, j),
14297 + vx_sock_total(cacct, i, j));
14299 + buffer[length++] = '\n';
14302 + length += sprintf(buffer + length, "\n");
14303 + length += sprintf(buffer + length,
14304 + "slab:\t %8u %8u %8u %8u\n",
14305 + atomic_read(&cacct->slab[1]),
14306 + atomic_read(&cacct->slab[4]),
14307 + atomic_read(&cacct->slab[0]),
14308 + atomic_read(&cacct->slab[2]));
14310 + length += sprintf(buffer + length, "\n");
14311 + for (i = 0; i < 5; i++) {
14312 + length += sprintf(buffer + length,
14313 + "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
14314 + atomic_read(&cacct->page[i][0]),
14315 + atomic_read(&cacct->page[i][1]),
14316 + atomic_read(&cacct->page[i][2]),
14317 + atomic_read(&cacct->page[i][3]),
14318 + atomic_read(&cacct->page[i][4]),
14319 + atomic_read(&cacct->page[i][5]),
14320 + atomic_read(&cacct->page[i][6]),
14321 + atomic_read(&cacct->page[i][7]));
14326 +#endif /* _VX_CACCT_PROC_H */
14327 diff -NurpP --minimal linux-4.9.217/kernel/vserver/context.c linux-4.9.217-vs2.3.9.12/kernel/vserver/context.c
14328 --- linux-4.9.217/kernel/vserver/context.c 1970-01-01 00:00:00.000000000 +0000
14329 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/context.c 2018-10-20 04:58:15.000000000 +0000
14332 + * linux/kernel/vserver/context.c
14334 + * Virtual Server: Context Support
14336 + * Copyright (C) 2003-2011 Herbert P?tzl
14338 + * V0.01 context helper
14339 + * V0.02 vx_ctx_kill syscall command
14340 + * V0.03 replaced context_info calls
14341 + * V0.04 redesign of struct (de)alloc
14342 + * V0.05 rlimit basic implementation
14343 + * V0.06 task_xid and info commands
14344 + * V0.07 context flags and caps
14345 + * V0.08 switch to RCU based hash
14346 + * V0.09 revert to non RCU for now
14347 + * V0.10 and back to working RCU hash
14348 + * V0.11 and back to locking again
14349 + * V0.12 referenced context store
14350 + * V0.13 separate per cpu data
14351 + * V0.14 changed vcmds to vxi arg
14352 + * V0.15 added context stat
14353 + * V0.16 have __create claim() the vxi
14354 + * V0.17 removed older and legacy stuff
14355 + * V0.18 added user credentials
14356 + * V0.19 added warn mask
14360 +#include <linux/slab.h>
14361 +#include <linux/types.h>
14362 +#include <linux/security.h>
14363 +#include <linux/pid_namespace.h>
14364 +#include <linux/capability.h>
14366 +#include <linux/vserver/context.h>
14367 +#include <linux/vserver/network.h>
14368 +#include <linux/vserver/debug.h>
14369 +#include <linux/vserver/limit.h>
14370 +#include <linux/vserver/limit_int.h>
14371 +#include <linux/vserver/space.h>
14372 +#include <linux/init_task.h>
14373 +#include <linux/fs_struct.h>
14374 +#include <linux/cred.h>
14376 +#include <linux/vs_context.h>
14377 +#include <linux/vs_limit.h>
14378 +#include <linux/vs_pid.h>
14379 +#include <linux/vserver/context_cmd.h>
14381 +#include "cvirt_init.h"
14382 +#include "cacct_init.h"
14383 +#include "limit_init.h"
14384 +#include "sched_init.h"
14387 +atomic_t vx_global_ctotal = ATOMIC_INIT(0);
14388 +atomic_t vx_global_cactive = ATOMIC_INIT(0);
14391 +/* now inactive context structures */
14393 +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
14395 +static DEFINE_SPINLOCK(vx_info_inactive_lock);
14398 +/* __alloc_vx_info()
14400 + * allocate an initialized vx_info struct
14401 + * doesn't make it visible (hash) */
14403 +static struct vx_info *__alloc_vx_info(vxid_t xid)
14405 + struct vx_info *new = NULL;
14408 + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
14410 + /* would this benefit from a slab cache? */
14411 + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
14415 + memset(new, 0, sizeof(struct vx_info));
14417 + new->ptr_pc = alloc_percpu(struct _vx_info_pc);
14418 + if (!new->ptr_pc)
14421 + new->vx_id = xid;
14422 + INIT_HLIST_NODE(&new->vx_hlist);
14423 + atomic_set(&new->vx_usecnt, 0);
14424 + atomic_set(&new->vx_tasks, 0);
14425 + new->vx_parent = NULL;
14426 + new->vx_state = 0;
14427 + init_waitqueue_head(&new->vx_wait);
14429 + /* prepare reaper */
14430 + get_task_struct(init_pid_ns.child_reaper);
14431 + new->vx_reaper = init_pid_ns.child_reaper;
14432 + new->vx_badness_bias = 0;
14434 + /* rest of init goes here */
14435 + vx_info_init_limit(&new->limit);
14436 + vx_info_init_sched(&new->sched);
14437 + vx_info_init_cvirt(&new->cvirt);
14438 + vx_info_init_cacct(&new->cacct);
14440 + /* per cpu data structures */
14441 + for_each_possible_cpu(cpu) {
14442 + vx_info_init_sched_pc(
14443 + &vx_per_cpu(new, sched_pc, cpu), cpu);
14444 + vx_info_init_cvirt_pc(
14445 + &vx_per_cpu(new, cvirt_pc, cpu), cpu);
14448 + new->vx_flags = VXF_INIT_SET;
14449 + new->vx_bcaps = CAP_FULL_SET; // maybe ~CAP_SETPCAP
14450 + new->vx_ccaps = 0;
14451 + new->vx_umask = 0;
14452 + new->vx_wmask = 0;
14454 + new->reboot_cmd = 0;
14455 + new->exit_code = 0;
14457 + // preconfig spaces
14458 + for (index = 0; index < VX_SPACES; index++) {
14459 + struct _vx_space *space = &new->space[index];
14462 + spin_lock(&init_fs.lock);
14464 + spin_unlock(&init_fs.lock);
14465 + space->vx_fs = &init_fs;
14467 + /* FIXME: do we want defaults? */
14468 + // space->vx_real_cred = 0;
14469 + // space->vx_cred = 0;
14473 + vxdprintk(VXD_CBIT(xid, 0),
14474 + "alloc_vx_info(%d) = %p", xid, new);
14475 + vxh_alloc_vx_info(new);
14476 + atomic_inc(&vx_global_ctotal);
14485 +/* __dealloc_vx_info()
14487 + * final disposal of vx_info */
14489 +static void __dealloc_vx_info(struct vx_info *vxi)
14491 +#ifdef CONFIG_VSERVER_WARN
14492 + struct vx_info_save vxis;
14495 + vxdprintk(VXD_CBIT(xid, 0),
14496 + "dealloc_vx_info(%p)", vxi);
14497 + vxh_dealloc_vx_info(vxi);
14499 +#ifdef CONFIG_VSERVER_WARN
14500 + enter_vx_info(vxi, &vxis);
14501 + vx_info_exit_limit(&vxi->limit);
14502 + vx_info_exit_sched(&vxi->sched);
14503 + vx_info_exit_cvirt(&vxi->cvirt);
14504 + vx_info_exit_cacct(&vxi->cacct);
14506 + for_each_possible_cpu(cpu) {
14507 + vx_info_exit_sched_pc(
14508 + &vx_per_cpu(vxi, sched_pc, cpu), cpu);
14509 + vx_info_exit_cvirt_pc(
14510 + &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
14512 + leave_vx_info(&vxis);
14516 + vxi->vx_state |= VXS_RELEASED;
14519 + free_percpu(vxi->ptr_pc);
14522 + atomic_dec(&vx_global_ctotal);
14525 +static void __shutdown_vx_info(struct vx_info *vxi)
14527 + struct nsproxy *nsproxy;
14528 + struct fs_struct *fs;
14529 + struct cred *cred;
14534 + vxi->vx_state |= VXS_SHUTDOWN;
14535 + vs_state_change(vxi, VSC_SHUTDOWN);
14537 + for (index = 0; index < VX_SPACES; index++) {
14538 + struct _vx_space *space = &vxi->space[index];
14540 + nsproxy = xchg(&space->vx_nsproxy, NULL);
14542 + put_nsproxy(nsproxy);
14544 + fs = xchg(&space->vx_fs, NULL);
14545 + spin_lock(&fs->lock);
14546 + kill = !--fs->users;
14547 + spin_unlock(&fs->lock);
14549 + free_fs_struct(fs);
14551 + cred = (struct cred *)xchg(&space->vx_cred, NULL);
14553 + abort_creds(cred);
14557 +/* exported stuff */
14559 +void free_vx_info(struct vx_info *vxi)
14561 + unsigned long flags;
14564 + /* check for reference counts first */
14565 + BUG_ON(atomic_read(&vxi->vx_usecnt));
14566 + BUG_ON(atomic_read(&vxi->vx_tasks));
14568 + /* context must not be hashed */
14569 + BUG_ON(vx_info_state(vxi, VXS_HASHED));
14571 + /* context shutdown is mandatory */
14572 + BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
14574 + /* spaces check */
14575 + for (index = 0; index < VX_SPACES; index++) {
14576 + struct _vx_space *space = &vxi->space[index];
14578 + BUG_ON(space->vx_nsproxy);
14579 + BUG_ON(space->vx_fs);
14580 + // BUG_ON(space->vx_real_cred);
14581 + // BUG_ON(space->vx_cred);
14584 + spin_lock_irqsave(&vx_info_inactive_lock, flags);
14585 + hlist_del(&vxi->vx_hlist);
14586 + spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14588 + __dealloc_vx_info(vxi);
14592 +/* hash table for vx_info hash */
14594 +#define VX_HASH_SIZE 13
14596 +static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
14597 + { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
14599 +static DEFINE_SPINLOCK(vx_info_hash_lock);
14602 +static inline unsigned int __hashval(vxid_t xid)
14604 + return (xid % VX_HASH_SIZE);
14609 +/* __hash_vx_info()
14611 + * add the vxi to the global hash table
14612 + * requires the hash_lock to be held */
14614 +static inline void __hash_vx_info(struct vx_info *vxi)
14616 + struct hlist_head *head;
14618 + vxd_assert_lock(&vx_info_hash_lock);
14619 + vxdprintk(VXD_CBIT(xid, 4),
14620 + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
14621 + vxh_hash_vx_info(vxi);
14623 + /* context must not be hashed */
14624 + BUG_ON(vx_info_state(vxi, VXS_HASHED));
14626 + vxi->vx_state |= VXS_HASHED;
14627 + head = &vx_info_hash[__hashval(vxi->vx_id)];
14628 + hlist_add_head(&vxi->vx_hlist, head);
14629 + atomic_inc(&vx_global_cactive);
14632 +/* __unhash_vx_info()
14634 + * remove the vxi from the global hash table
14635 + * requires the hash_lock to be held */
14637 +static inline void __unhash_vx_info(struct vx_info *vxi)
14639 + unsigned long flags;
14641 + vxd_assert_lock(&vx_info_hash_lock);
14642 + vxdprintk(VXD_CBIT(xid, 4),
14643 + "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
14644 + atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
14645 + vxh_unhash_vx_info(vxi);
14647 + /* context must be hashed */
14648 + BUG_ON(!vx_info_state(vxi, VXS_HASHED));
14649 + /* but without tasks */
14650 + BUG_ON(atomic_read(&vxi->vx_tasks));
14652 + vxi->vx_state &= ~VXS_HASHED;
14653 + hlist_del_init(&vxi->vx_hlist);
14654 + spin_lock_irqsave(&vx_info_inactive_lock, flags);
14655 + hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
14656 + spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14657 + atomic_dec(&vx_global_cactive);
14661 +/* __lookup_vx_info()
14663 + * requires the hash_lock to be held
14664 + * doesn't increment the vx_refcnt */
14666 +static inline struct vx_info *__lookup_vx_info(vxid_t xid)
14668 + struct hlist_head *head = &vx_info_hash[__hashval(xid)];
14669 + struct hlist_node *pos;
14670 + struct vx_info *vxi;
14672 + vxd_assert_lock(&vx_info_hash_lock);
14673 + hlist_for_each(pos, head) {
14674 + vxi = hlist_entry(pos, struct vx_info, vx_hlist);
14676 + if (vxi->vx_id == xid)
14681 + vxdprintk(VXD_CBIT(xid, 0),
14682 + "__lookup_vx_info(#%u): %p[#%u]",
14683 + xid, vxi, vxi ? vxi->vx_id : 0);
14684 + vxh_lookup_vx_info(vxi, xid);
14689 +/* __create_vx_info()
14691 + * create the requested context
14692 + * get(), claim() and hash it */
14694 +static struct vx_info *__create_vx_info(int id)
14696 + struct vx_info *new, *vxi = NULL;
14698 + vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
14700 + if (!(new = __alloc_vx_info(id)))
14701 + return ERR_PTR(-ENOMEM);
14703 + /* required to make dynamic xids unique */
14704 + spin_lock(&vx_info_hash_lock);
14706 + /* static context requested */
14707 + if ((vxi = __lookup_vx_info(id))) {
14708 + vxdprintk(VXD_CBIT(xid, 0),
14709 + "create_vx_info(%d) = %p (already there)", id, vxi);
14710 + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
14711 + vxi = ERR_PTR(-EBUSY);
14713 + vxi = ERR_PTR(-EEXIST);
14716 + /* new context */
14717 + vxdprintk(VXD_CBIT(xid, 0),
14718 + "create_vx_info(%d) = %p (new)", id, new);
14719 + claim_vx_info(new, NULL);
14720 + __hash_vx_info(get_vx_info(new));
14721 + vxi = new, new = NULL;
14724 + spin_unlock(&vx_info_hash_lock);
14725 + vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
14727 + __dealloc_vx_info(new);
14732 +/* exported stuff */
14735 +void unhash_vx_info(struct vx_info *vxi)
14737 + spin_lock(&vx_info_hash_lock);
14738 + __unhash_vx_info(vxi);
14739 + spin_unlock(&vx_info_hash_lock);
14740 + __shutdown_vx_info(vxi);
14741 + __wakeup_vx_info(vxi);
14745 +/* lookup_vx_info()
14747 + * search for a vx_info and get() it
14748 + * negative id means current */
14750 +struct vx_info *lookup_vx_info(int id)
14752 + struct vx_info *vxi = NULL;
14755 + vxi = get_vx_info(current_vx_info());
14756 + } else if (id > 1) {
14757 + spin_lock(&vx_info_hash_lock);
14758 + vxi = get_vx_info(__lookup_vx_info(id));
14759 + spin_unlock(&vx_info_hash_lock);
14764 +/* xid_is_hashed()
14766 + * verify that xid is still hashed */
14768 +int xid_is_hashed(vxid_t xid)
14772 + spin_lock(&vx_info_hash_lock);
14773 + hashed = (__lookup_vx_info(xid) != NULL);
14774 + spin_unlock(&vx_info_hash_lock);
14778 +#ifdef CONFIG_PROC_FS
14782 + * get a subset of hashed xids for proc
14783 + * assumes size is at least one */
14785 +int get_xid_list(int index, unsigned int *xids, int size)
14787 + int hindex, nr_xids = 0;
14789 + /* only show current and children */
14790 + if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
14793 + xids[nr_xids] = vx_current_xid();
14797 + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
14798 + struct hlist_head *head = &vx_info_hash[hindex];
14799 + struct hlist_node *pos;
14801 + spin_lock(&vx_info_hash_lock);
14802 + hlist_for_each(pos, head) {
14803 + struct vx_info *vxi;
14808 + vxi = hlist_entry(pos, struct vx_info, vx_hlist);
14809 + xids[nr_xids] = vxi->vx_id;
14810 + if (++nr_xids >= size) {
14811 + spin_unlock(&vx_info_hash_lock);
14815 + /* keep the lock time short */
14816 + spin_unlock(&vx_info_hash_lock);
14823 +#ifdef CONFIG_VSERVER_DEBUG
14825 +void dump_vx_info_inactive(int level)
14827 + struct hlist_node *entry, *next;
14829 + hlist_for_each_safe(entry, next, &vx_info_inactive) {
14830 + struct vx_info *vxi =
14831 + list_entry(entry, struct vx_info, vx_hlist);
14833 + dump_vx_info(vxi, level);
14840 +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
14842 + struct user_struct *new_user, *old_user;
14847 + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
14850 + new_user = alloc_uid(vxi->vx_id, p->uid);
14854 + old_user = p->user;
14855 + if (new_user != old_user) {
14856 + atomic_inc(&new_user->processes);
14857 + atomic_dec(&old_user->processes);
14858 + p->user = new_user;
14860 + free_uid(old_user);
14866 +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
14868 + // p->cap_effective &= vxi->vx_cap_bset;
14869 + p->cap_effective =
14870 + cap_intersect(p->cap_effective, vxi->cap_bset);
14871 + // p->cap_inheritable &= vxi->vx_cap_bset;
14872 + p->cap_inheritable =
14873 + cap_intersect(p->cap_inheritable, vxi->cap_bset);
14874 + // p->cap_permitted &= vxi->vx_cap_bset;
14875 + p->cap_permitted =
14876 + cap_intersect(p->cap_permitted, vxi->cap_bset);
14881 +#include <linux/file.h>
14882 +#include <linux/fdtable.h>
14884 +static int vx_openfd_task(struct task_struct *tsk)
14886 + struct files_struct *files = tsk->files;
14887 + struct fdtable *fdt;
14888 + const unsigned long *bptr;
14889 + int count, total;
14891 + /* no rcu_read_lock() because of spin_lock() */
14892 + spin_lock(&files->file_lock);
14893 + fdt = files_fdtable(files);
14894 + bptr = fdt->open_fds;
14895 + count = fdt->max_fds / (sizeof(unsigned long) * 8);
14896 + for (total = 0; count > 0; count--) {
14898 + total += hweight_long(*bptr);
14901 + spin_unlock(&files->file_lock);
14906 +/* for *space compatibility */
14908 +asmlinkage long sys_unshare(unsigned long);
14911 + * migrate task to new context
14912 + * gets vxi, puts old_vxi on change
14913 + * optionally unshares namespaces (hack)
14916 +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
14918 + struct vx_info *old_vxi;
14924 + vxdprintk(VXD_CBIT(xid, 5),
14925 + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
14926 + vxi->vx_id, atomic_read(&vxi->vx_usecnt));
14928 + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
14929 + !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
14932 + if (vx_info_state(vxi, VXS_SHUTDOWN))
14935 + old_vxi = task_get_vx_info(p);
14936 + if (old_vxi == vxi)
14939 +// if (!(ret = vx_migrate_user(p, vxi))) {
14944 + openfd = vx_openfd_task(p);
14947 + atomic_dec(&old_vxi->cvirt.nr_threads);
14948 + atomic_dec(&old_vxi->cvirt.nr_running);
14949 + __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
14950 + /* FIXME: what about the struct files here? */
14951 + __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
14952 + /* account for the executable */
14953 + __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
14955 + atomic_inc(&vxi->cvirt.nr_threads);
14956 + atomic_inc(&vxi->cvirt.nr_running);
14957 + __rlim_inc(&vxi->limit, RLIMIT_NPROC);
14958 + /* FIXME: what about the struct files here? */
14959 + __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
14960 + /* account for the executable */
14961 + __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
14964 + release_vx_info(old_vxi, p);
14965 + clr_vx_info(&p->vx_info);
14967 + claim_vx_info(vxi, p);
14968 + set_vx_info(&p->vx_info, vxi);
14969 + p->xid = vxi->vx_id;
14971 + vxdprintk(VXD_CBIT(xid, 5),
14972 + "moved task %p into vxi:%p[#%d]",
14973 + p, vxi, vxi->vx_id);
14975 + // vx_mask_cap_bset(vxi, p);
14978 + /* hack for *spaces to provide compatibility */
14980 + struct nsproxy *old_nsp, *new_nsp;
14982 + ret = unshare_nsproxy_namespaces(
14983 + CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
14984 + &new_nsp, NULL, NULL);
14988 + old_nsp = xchg(&p->nsproxy, new_nsp);
14989 + vx_set_space(vxi,
14990 + CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
14991 + put_nsproxy(old_nsp);
14995 + put_vx_info(old_vxi);
14999 +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
15001 + struct task_struct *old_reaper;
15002 + struct vx_info *reaper_vxi;
15007 + vxdprintk(VXD_CBIT(xid, 6),
15008 + "vx_set_reaper(%p[#%d],%p[#%d,%d])",
15009 + vxi, vxi->vx_id, p, p->xid, p->pid);
15011 + old_reaper = vxi->vx_reaper;
15012 + if (old_reaper == p)
15015 + reaper_vxi = task_get_vx_info(p);
15016 + if (reaper_vxi && reaper_vxi != vxi) {
15018 + "Unsuitable reaper [" VS_Q("%s") ",%u:#%u] "
15020 + p->comm, p->pid, p->xid, vx_current_xid());
15024 + /* set new child reaper */
15025 + get_task_struct(p);
15026 + vxi->vx_reaper = p;
15027 + put_task_struct(old_reaper);
15029 + put_vx_info(reaper_vxi);
15033 +int vx_set_init(struct vx_info *vxi, struct task_struct *p)
15038 + vxdprintk(VXD_CBIT(xid, 6),
15039 + "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
15040 + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
15042 + vxi->vx_flags &= ~VXF_STATE_INIT;
15043 + // vxi->vx_initpid = p->tgid;
15044 + vxi->vx_initpid = p->pid;
15048 +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
15050 + vxdprintk(VXD_CBIT(xid, 6),
15051 + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
15052 + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
15054 + vxi->exit_code = code;
15055 + vxi->vx_initpid = 0;
15059 +void vx_set_persistent(struct vx_info *vxi)
15061 + vxdprintk(VXD_CBIT(xid, 6),
15062 + "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
15064 + get_vx_info(vxi);
15065 + claim_vx_info(vxi, NULL);
15068 +void vx_clear_persistent(struct vx_info *vxi)
15070 + vxdprintk(VXD_CBIT(xid, 6),
15071 + "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
15073 + release_vx_info(vxi, NULL);
15074 + put_vx_info(vxi);
15077 +void vx_update_persistent(struct vx_info *vxi)
15079 + if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
15080 + vx_set_persistent(vxi);
15082 + vx_clear_persistent(vxi);
15086 +/* task must be current or locked */
15088 +void exit_vx_info(struct task_struct *p, int code)
15090 + struct vx_info *vxi = p->vx_info;
15093 + atomic_dec(&vxi->cvirt.nr_threads);
15096 + vxi->exit_code = code;
15097 + release_vx_info(vxi, p);
15101 +void exit_vx_info_early(struct task_struct *p, int code)
15103 + struct vx_info *vxi = p->vx_info;
15106 + if (vxi->vx_initpid == p->pid)
15107 + vx_exit_init(vxi, p, code);
15108 + if (vxi->vx_reaper == p)
15109 + vx_set_reaper(vxi, init_pid_ns.child_reaper);
15114 +/* vserver syscall commands below here */
15116 +/* taks xid and vx_info functions */
15118 +#include <asm/uaccess.h>
15121 +int vc_task_xid(uint32_t id)
15126 + struct task_struct *tsk;
15129 + tsk = find_task_by_real_pid(id);
15130 + xid = (tsk) ? tsk->xid : -ESRCH;
15131 + rcu_read_unlock();
15133 + xid = vx_current_xid();
15138 +int vc_vx_info(struct vx_info *vxi, void __user *data)
15140 + struct vcmd_vx_info_v0 vc_data;
15142 + vc_data.xid = vxi->vx_id;
15143 + vc_data.initpid = vxi->vx_initpid;
15145 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15151 +int vc_ctx_stat(struct vx_info *vxi, void __user *data)
15153 + struct vcmd_ctx_stat_v0 vc_data;
15155 + vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
15156 + vc_data.tasks = atomic_read(&vxi->vx_tasks);
15158 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15164 +/* context functions */
15166 +int vc_ctx_create(uint32_t xid, void __user *data)
15168 + struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
15169 + struct vx_info *new_vxi;
15172 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15175 + if ((xid > MAX_S_CONTEXT) || (xid < 2))
15178 + new_vxi = __create_vx_info(xid);
15179 + if (IS_ERR(new_vxi))
15180 + return PTR_ERR(new_vxi);
15182 + /* initial flags */
15183 + new_vxi->vx_flags = vc_data.flagword;
15186 + if (vs_state_change(new_vxi, VSC_STARTUP))
15189 + ret = vx_migrate_task(current, new_vxi, (!data));
15193 + /* return context id on success */
15194 + ret = new_vxi->vx_id;
15196 + /* get a reference for persistent contexts */
15197 + if ((vc_data.flagword & VXF_PERSISTENT))
15198 + vx_set_persistent(new_vxi);
15200 + release_vx_info(new_vxi, NULL);
15201 + put_vx_info(new_vxi);
15206 +int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
15208 + struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
15211 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15214 + ret = vx_migrate_task(current, vxi, 0);
15217 + if (vc_data.flagword & VXM_SET_INIT)
15218 + ret = vx_set_init(vxi, current);
15221 + if (vc_data.flagword & VXM_SET_REAPER)
15222 + ret = vx_set_reaper(vxi, current);
15227 +int vc_get_cflags(struct vx_info *vxi, void __user *data)
15229 + struct vcmd_ctx_flags_v0 vc_data;
15231 + vc_data.flagword = vxi->vx_flags;
15233 + /* special STATE flag handling */
15234 + vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
15236 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15241 +int vc_set_cflags(struct vx_info *vxi, void __user *data)
15243 + struct vcmd_ctx_flags_v0 vc_data;
15244 + uint64_t mask, trigger;
15246 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15249 + /* special STATE flag handling */
15250 + mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
15251 + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
15253 + if (vxi == current_vx_info()) {
15254 + /* if (trigger & VXF_STATE_SETUP)
15255 + vx_mask_cap_bset(vxi, current); */
15256 + if (trigger & VXF_STATE_INIT) {
15259 + ret = vx_set_init(vxi, current);
15262 + ret = vx_set_reaper(vxi, current);
15268 + vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
15269 + vc_data.flagword, mask);
15270 + if (trigger & VXF_PERSISTENT)
15271 + vx_update_persistent(vxi);
15277 +static inline uint64_t caps_from_cap_t(kernel_cap_t c)
15279 + uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
15281 + // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
15285 +static inline kernel_cap_t cap_t_from_caps(uint64_t v)
15287 + kernel_cap_t c = __cap_empty_set;
15289 + c.cap[0] = v & 0xFFFFFFFF;
15290 + c.cap[1] = (v >> 32) & 0xFFFFFFFF;
15292 + // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
15297 +static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
15300 + *bcaps = caps_from_cap_t(vxi->vx_bcaps);
15302 + *ccaps = vxi->vx_ccaps;
15307 +int vc_get_ccaps(struct vx_info *vxi, void __user *data)
15309 + struct vcmd_ctx_caps_v1 vc_data;
15312 + ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
15315 + vc_data.cmask = ~0ULL;
15317 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15322 +static int do_set_caps(struct vx_info *vxi,
15323 + uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
15325 + uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
15328 + printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
15329 + bcaps, bmask, ccaps, cmask);
15331 + vxi->vx_bcaps = cap_t_from_caps(
15332 + vs_mask_flags(bcold, bcaps, bmask));
15333 + vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
15338 +int vc_set_ccaps(struct vx_info *vxi, void __user *data)
15340 + struct vcmd_ctx_caps_v1 vc_data;
15342 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15345 + return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
15348 +int vc_get_bcaps(struct vx_info *vxi, void __user *data)
15350 + struct vcmd_bcaps vc_data;
15353 + ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
15356 + vc_data.bmask = ~0ULL;
15358 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15363 +int vc_set_bcaps(struct vx_info *vxi, void __user *data)
15365 + struct vcmd_bcaps vc_data;
15367 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15370 + return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
15374 +int vc_get_umask(struct vx_info *vxi, void __user *data)
15376 + struct vcmd_umask vc_data;
15378 + vc_data.umask = vxi->vx_umask;
15379 + vc_data.mask = ~0ULL;
15381 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15386 +int vc_set_umask(struct vx_info *vxi, void __user *data)
15388 + struct vcmd_umask vc_data;
15390 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15393 + vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
15394 + vc_data.umask, vc_data.mask);
15399 +int vc_get_wmask(struct vx_info *vxi, void __user *data)
15401 + struct vcmd_wmask vc_data;
15403 + vc_data.wmask = vxi->vx_wmask;
15404 + vc_data.mask = ~0ULL;
15406 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15411 +int vc_set_wmask(struct vx_info *vxi, void __user *data)
15413 + struct vcmd_wmask vc_data;
15415 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15418 + vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
15419 + vc_data.wmask, vc_data.mask);
15424 +int vc_get_badness(struct vx_info *vxi, void __user *data)
15426 + struct vcmd_badness_v0 vc_data;
15428 + vc_data.bias = vxi->vx_badness_bias;
15430 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15435 +int vc_set_badness(struct vx_info *vxi, void __user *data)
15437 + struct vcmd_badness_v0 vc_data;
15439 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15442 + vxi->vx_badness_bias = vc_data.bias;
15446 +#include <linux/module.h>
15448 +EXPORT_SYMBOL_GPL(free_vx_info);
15450 diff -NurpP --minimal linux-4.9.217/kernel/vserver/cvirt.c linux-4.9.217-vs2.3.9.12/kernel/vserver/cvirt.c
15451 --- linux-4.9.217/kernel/vserver/cvirt.c 1970-01-01 00:00:00.000000000 +0000
15452 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/cvirt.c 2018-10-20 04:58:15.000000000 +0000
15455 + * linux/kernel/vserver/cvirt.c
15457 + * Virtual Server: Context Virtualization
15459 + * Copyright (C) 2004-2007 Herbert P?tzl
15461 + * V0.01 broken out from limit.c
15462 + * V0.02 added utsname stuff
15463 + * V0.03 changed vcmds to vxi arg
15467 +#include <linux/types.h>
15468 +#include <linux/utsname.h>
15469 +#include <linux/vs_cvirt.h>
15470 +#include <linux/vserver/switch.h>
15471 +#include <linux/vserver/cvirt_cmd.h>
15473 +#include <asm/uaccess.h>
15476 +void vx_vsi_boottime64(struct timespec64 *boottime)
15478 + struct vx_info *vxi = current_vx_info();
15480 + set_normalized_timespec64(boottime,
15481 + boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
15482 + boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
15486 +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
15488 + struct vx_info *vxi = current_vx_info();
15490 + set_normalized_timespec(uptime,
15491 + uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
15492 + uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
15495 + set_normalized_timespec(idle,
15496 + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
15497 + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
15501 +uint64_t vx_idle_jiffies(void)
15503 + return init_task.utime + init_task.stime;
15508 +static inline uint32_t __update_loadavg(uint32_t load,
15509 + int wsize, int delta, int n)
15511 + unsigned long long calc, prev;
15513 + /* just set it to n */
15514 + if (unlikely(delta >= wsize))
15515 + return (n << FSHIFT);
15517 + calc = delta * n;
15519 + prev = (wsize - delta);
15522 + do_div(calc, wsize);
15527 +void vx_update_load(struct vx_info *vxi)
15529 + uint32_t now, last, delta;
15530 + unsigned int nr_running, nr_uninterruptible;
15531 + unsigned int total;
15532 + unsigned long flags;
15534 + spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
15537 + last = vxi->cvirt.load_last;
15538 + delta = now - last;
15540 + if (delta < 5*HZ)
15543 + nr_running = atomic_read(&vxi->cvirt.nr_running);
15544 + nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
15545 + total = nr_running + nr_uninterruptible;
15547 + vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
15548 + 60*HZ, delta, total);
15549 + vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
15550 + 5*60*HZ, delta, total);
15551 + vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
15552 + 15*60*HZ, delta, total);
15554 + vxi->cvirt.load_last = now;
15556 + atomic_inc(&vxi->cvirt.load_updates);
15557 + spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
15562 + * Commands to do_syslog:
15564 + * 0 -- Close the log. Currently a NOP.
15565 + * 1 -- Open the log. Currently a NOP.
15566 + * 2 -- Read from the log.
15567 + * 3 -- Read all messages remaining in the ring buffer.
15568 + * 4 -- Read and clear all messages remaining in the ring buffer
15569 + * 5 -- Clear ring buffer.
15570 + * 6 -- Disable printk's to console
15571 + * 7 -- Enable printk's to console
15572 + * 8 -- Set level of messages printed to console
15573 + * 9 -- Return number of unread characters in the log buffer
15574 + * 10 -- Return size of the log buffer
15576 +int vx_do_syslog(int type, char __user *buf, int len)
15579 + int do_clear = 0;
15580 + struct vx_info *vxi = current_vx_info();
15581 + struct _vx_syslog *log;
15585 + log = &vxi->cvirt.syslog;
15588 + case 0: /* Close log */
15589 + case 1: /* Open log */
15591 + case 2: /* Read from log */
15592 + error = wait_event_interruptible(log->log_wait,
15593 + (log->log_start - log->log_end));
15596 + spin_lock_irq(&log->logbuf_lock);
15597 + spin_unlock_irq(&log->logbuf_lock);
15599 + case 4: /* Read/clear last kernel messages */
15601 + /* fall through */
15602 + case 3: /* Read last kernel messages */
15605 + case 5: /* Clear ring buffer */
15608 + case 6: /* Disable logging to console */
15609 + case 7: /* Enable logging to console */
15610 + case 8: /* Set level of messages printed to console */
15613 + case 9: /* Number of chars in the log buffer */
15615 + case 10: /* Size of the log buffer */
15625 +/* virtual host info names */
15627 +static char *vx_vhi_name(struct vx_info *vxi, int id)
15629 + struct nsproxy *nsproxy;
15630 + struct uts_namespace *uts;
15632 + if (id == VHIN_CONTEXT)
15633 + return vxi->vx_name;
15635 + nsproxy = vxi->space[0].vx_nsproxy;
15639 + uts = nsproxy->uts_ns;
15644 + case VHIN_SYSNAME:
15645 + return uts->name.sysname;
15646 + case VHIN_NODENAME:
15647 + return uts->name.nodename;
15648 + case VHIN_RELEASE:
15649 + return uts->name.release;
15650 + case VHIN_VERSION:
15651 + return uts->name.version;
15652 + case VHIN_MACHINE:
15653 + return uts->name.machine;
15654 + case VHIN_DOMAINNAME:
15655 + return uts->name.domainname;
15662 +int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
15664 + struct vcmd_vhi_name_v0 vc_data;
15667 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15670 + name = vx_vhi_name(vxi, vc_data.field);
15674 + memcpy(name, vc_data.name, 65);
15678 +int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
15680 + struct vcmd_vhi_name_v0 vc_data;
15683 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15686 + name = vx_vhi_name(vxi, vc_data.field);
15690 + memcpy(vc_data.name, name, 65);
15691 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15697 +int vc_virt_stat(struct vx_info *vxi, void __user *data)
15699 + struct vcmd_virt_stat_v0 vc_data;
15700 + struct _vx_cvirt *cvirt = &vxi->cvirt;
15701 + struct timespec64 uptime;
15703 + ktime_get_ts64(&uptime);
15704 + set_normalized_timespec64(&uptime,
15705 + uptime.tv_sec - cvirt->bias_uptime.tv_sec,
15706 + uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
15708 + vc_data.offset = timespec64_to_ns(&cvirt->bias_ts);
15709 + vc_data.uptime = timespec64_to_ns(&uptime);
15710 + vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
15711 + vc_data.nr_running = atomic_read(&cvirt->nr_running);
15712 + vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
15713 + vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
15714 + vc_data.nr_forks = atomic_read(&cvirt->total_forks);
15715 + vc_data.load[0] = cvirt->load[0];
15716 + vc_data.load[1] = cvirt->load[1];
15717 + vc_data.load[2] = cvirt->load[2];
15719 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15725 +#ifdef CONFIG_VSERVER_VTIME
15727 +/* virtualized time base */
15729 +void vx_adjust_timespec(struct timespec *ts)
15731 + struct vx_info *vxi;
15733 + if (!vx_flags(VXF_VIRT_TIME, 0))
15736 + vxi = current_vx_info();
15737 + ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15738 + ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
15740 + if (ts->tv_nsec >= NSEC_PER_SEC) {
15742 + ts->tv_nsec -= NSEC_PER_SEC;
15743 + } else if (ts->tv_nsec < 0) {
15745 + ts->tv_nsec += NSEC_PER_SEC;
15749 +void vx_adjust_timespec64(struct timespec64 *ts)
15751 + struct vx_info *vxi;
15753 + if (!vx_flags(VXF_VIRT_TIME, 0))
15756 + vxi = current_vx_info();
15757 + ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15758 + ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
15760 + if (ts->tv_nsec >= NSEC_PER_SEC) {
15762 + ts->tv_nsec -= NSEC_PER_SEC;
15763 + } else if (ts->tv_nsec < 0) {
15765 + ts->tv_nsec += NSEC_PER_SEC;
15769 +int vx_settimeofday(const struct timespec *ts)
15771 + struct timespec ats, delta;
15772 + struct vx_info *vxi;
15774 + if (!vx_flags(VXF_VIRT_TIME, 0))
15775 + return do_settimeofday(ts);
15777 + getnstimeofday(&ats);
15778 + delta = timespec_sub(*ts, ats);
15780 + vxi = current_vx_info();
15781 + vxi->cvirt.bias_ts = timespec64_add(vxi->cvirt.bias_ts,
15782 + timespec_to_timespec64(delta));
15786 +int vx_settimeofday64(const struct timespec64 *ts)
15788 + struct timespec64 ats, delta;
15789 + struct vx_info *vxi;
15791 + if (!vx_flags(VXF_VIRT_TIME, 0))
15792 + return do_settimeofday64(ts);
15794 + getnstimeofday64(&ats);
15795 + delta = timespec64_sub(*ts, ats);
15797 + vxi = current_vx_info();
15798 + vxi->cvirt.bias_ts = timespec64_add(vxi->cvirt.bias_ts, delta);
15804 diff -NurpP --minimal linux-4.9.217/kernel/vserver/cvirt_init.h linux-4.9.217-vs2.3.9.12/kernel/vserver/cvirt_init.h
15805 --- linux-4.9.217/kernel/vserver/cvirt_init.h 1970-01-01 00:00:00.000000000 +0000
15806 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/cvirt_init.h 2018-10-20 04:58:15.000000000 +0000
15810 +extern uint64_t vx_idle_jiffies(void);
15812 +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
15814 + uint64_t idle_jiffies = vx_idle_jiffies();
15815 + uint64_t nsuptime;
15817 + ktime_get_ts64(&cvirt->bias_uptime);
15818 + nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
15819 + * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
15820 + cvirt->bias_clock = nsec_to_clock_t(nsuptime);
15821 + cvirt->bias_ts.tv_sec = 0;
15822 + cvirt->bias_ts.tv_nsec = 0;
15824 + jiffies_to_timespec64(idle_jiffies, &cvirt->bias_idle);
15825 + atomic_set(&cvirt->nr_threads, 0);
15826 + atomic_set(&cvirt->nr_running, 0);
15827 + atomic_set(&cvirt->nr_uninterruptible, 0);
15828 + atomic_set(&cvirt->nr_onhold, 0);
15830 + spin_lock_init(&cvirt->load_lock);
15831 + cvirt->load_last = jiffies;
15832 + atomic_set(&cvirt->load_updates, 0);
15833 + cvirt->load[0] = 0;
15834 + cvirt->load[1] = 0;
15835 + cvirt->load[2] = 0;
15836 + atomic_set(&cvirt->total_forks, 0);
15838 + spin_lock_init(&cvirt->syslog.logbuf_lock);
15839 + init_waitqueue_head(&cvirt->syslog.log_wait);
15840 + cvirt->syslog.log_start = 0;
15841 + cvirt->syslog.log_end = 0;
15842 + cvirt->syslog.con_start = 0;
15843 + cvirt->syslog.logged_chars = 0;
15847 +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
15849 + // cvirt_pc->cpustat = { 0 };
15852 +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
15854 +#ifdef CONFIG_VSERVER_WARN
15857 + vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
15858 + "!!! cvirt: %p[nr_threads] = %d on exit.",
15860 + vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
15861 + "!!! cvirt: %p[nr_running] = %d on exit.",
15863 + vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
15864 + "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
15866 + vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
15867 + "!!! cvirt: %p[nr_onhold] = %d on exit.",
15873 +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
15878 diff -NurpP --minimal linux-4.9.217/kernel/vserver/cvirt_proc.h linux-4.9.217-vs2.3.9.12/kernel/vserver/cvirt_proc.h
15879 --- linux-4.9.217/kernel/vserver/cvirt_proc.h 1970-01-01 00:00:00.000000000 +0000
15880 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/cvirt_proc.h 2018-10-20 04:58:15.000000000 +0000
15882 +#ifndef _VX_CVIRT_PROC_H
15883 +#define _VX_CVIRT_PROC_H
15885 +#include <linux/nsproxy.h>
15886 +#include <linux/mnt_namespace.h>
15887 +#include <linux/ipc_namespace.h>
15888 +#include <linux/utsname.h>
15889 +#include <linux/ipc.h>
15891 +extern int vx_info_mnt_namespace(struct mnt_namespace *, char *);
15894 +int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
15896 + struct mnt_namespace *ns;
15897 + struct uts_namespace *uts;
15898 + struct ipc_namespace *ipc;
15904 + length += sprintf(buffer + length,
15905 + "NSProxy:\t%p [%p,%p,%p]\n",
15906 + nsproxy, nsproxy->mnt_ns,
15907 + nsproxy->uts_ns, nsproxy->ipc_ns);
15909 + ns = nsproxy->mnt_ns;
15913 + length += vx_info_mnt_namespace(ns, buffer + length);
15917 + uts = nsproxy->uts_ns;
15921 + length += sprintf(buffer + length,
15922 + "SysName:\t%.*s\n"
15923 + "NodeName:\t%.*s\n"
15924 + "Release:\t%.*s\n"
15925 + "Version:\t%.*s\n"
15926 + "Machine:\t%.*s\n"
15927 + "DomainName:\t%.*s\n",
15928 + __NEW_UTS_LEN, uts->name.sysname,
15929 + __NEW_UTS_LEN, uts->name.nodename,
15930 + __NEW_UTS_LEN, uts->name.release,
15931 + __NEW_UTS_LEN, uts->name.version,
15932 + __NEW_UTS_LEN, uts->name.machine,
15933 + __NEW_UTS_LEN, uts->name.domainname);
15936 + ipc = nsproxy->ipc_ns;
15940 + length += sprintf(buffer + length,
15941 + "SEMS:\t\t%d %d %d %d %d\n"
15942 + "MSG:\t\t%d %d %d\n"
15943 + "SHM:\t\t%lu %lu %d %ld\n",
15944 + ipc->sem_ctls[0], ipc->sem_ctls[1],
15945 + ipc->sem_ctls[2], ipc->sem_ctls[3],
15947 + ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
15948 + (unsigned long)ipc->shm_ctlmax,
15949 + (unsigned long)ipc->shm_ctlall,
15950 + ipc->shm_ctlmni, ipc->shm_tot);
15957 +#include <linux/sched.h>
15959 +#define LOAD_INT(x) ((x) >> FSHIFT)
15960 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
15963 +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
15968 + length += sprintf(buffer + length,
15969 + "BiasUptime:\t%llu.%02lu\n",
15970 + (unsigned long long)cvirt->bias_uptime.tv_sec,
15971 + (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
15973 + a = cvirt->load[0] + (FIXED_1 / 200);
15974 + b = cvirt->load[1] + (FIXED_1 / 200);
15975 + c = cvirt->load[2] + (FIXED_1 / 200);
15976 + length += sprintf(buffer + length,
15977 + "nr_threads:\t%d\n"
15978 + "nr_running:\t%d\n"
15979 + "nr_unintr:\t%d\n"
15980 + "nr_onhold:\t%d\n"
15981 + "load_updates:\t%d\n"
15982 + "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
15983 + "total_forks:\t%d\n",
15984 + atomic_read(&cvirt->nr_threads),
15985 + atomic_read(&cvirt->nr_running),
15986 + atomic_read(&cvirt->nr_uninterruptible),
15987 + atomic_read(&cvirt->nr_onhold),
15988 + atomic_read(&cvirt->load_updates),
15989 + LOAD_INT(a), LOAD_FRAC(a),
15990 + LOAD_INT(b), LOAD_FRAC(b),
15991 + LOAD_INT(c), LOAD_FRAC(c),
15992 + atomic_read(&cvirt->total_forks));
15997 +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
15998 + char *buffer, int cpu)
16004 +#endif /* _VX_CVIRT_PROC_H */
16005 diff -NurpP --minimal linux-4.9.217/kernel/vserver/debug.c linux-4.9.217-vs2.3.9.12/kernel/vserver/debug.c
16006 --- linux-4.9.217/kernel/vserver/debug.c 1970-01-01 00:00:00.000000000 +0000
16007 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/debug.c 2018-10-20 04:58:15.000000000 +0000
16010 + * kernel/vserver/debug.c
16012 + * Copyright (C) 2005-2007 Herbert P?tzl
16014 + * V0.01 vx_info dump support
16018 +#include <linux/module.h>
16020 +#include <linux/vserver/context.h>
16023 +void dump_vx_info(struct vx_info *vxi, int level)
16025 + printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
16026 + atomic_read(&vxi->vx_usecnt),
16027 + atomic_read(&vxi->vx_tasks),
16030 + __dump_vx_limit(&vxi->limit);
16031 + __dump_vx_sched(&vxi->sched);
16032 + __dump_vx_cvirt(&vxi->cvirt);
16033 + __dump_vx_cacct(&vxi->cacct);
16039 +EXPORT_SYMBOL_GPL(dump_vx_info);
16041 diff -NurpP --minimal linux-4.9.217/kernel/vserver/device.c linux-4.9.217-vs2.3.9.12/kernel/vserver/device.c
16042 --- linux-4.9.217/kernel/vserver/device.c 1970-01-01 00:00:00.000000000 +0000
16043 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/device.c 2018-10-20 04:58:15.000000000 +0000
16046 + * linux/kernel/vserver/device.c
16048 + * Linux-VServer: Device Support
16050 + * Copyright (C) 2006 Herbert P?tzl
16051 + * Copyright (C) 2007 Daniel Hokka Zakrisson
16053 + * V0.01 device mapping basics
16054 + * V0.02 added defaults
16058 +#include <linux/slab.h>
16059 +#include <linux/rcupdate.h>
16060 +#include <linux/fs.h>
16061 +#include <linux/namei.h>
16062 +#include <linux/hash.h>
16064 +#include <asm/errno.h>
16065 +#include <asm/uaccess.h>
16066 +#include <linux/vserver/base.h>
16067 +#include <linux/vserver/debug.h>
16068 +#include <linux/vserver/context.h>
16069 +#include <linux/vserver/device.h>
16070 +#include <linux/vserver/device_cmd.h>
16073 +#define DMAP_HASH_BITS 4
16076 +struct vs_mapping {
16078 + struct hlist_node hlist;
16079 + struct list_head list;
16081 +#define dm_hlist u.hlist
16082 +#define dm_list u.list
16085 + struct vx_dmap_target target;
16089 +static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
16091 +static DEFINE_SPINLOCK(dmap_main_hash_lock);
16093 +static struct vx_dmap_target dmap_defaults[2] = {
16094 + { .flags = DATTR_OPEN },
16095 + { .flags = DATTR_OPEN },
16099 +struct kmem_cache *dmap_cachep __read_mostly;
16101 +int __init dmap_cache_init(void)
16103 + dmap_cachep = kmem_cache_create("dmap_cache",
16104 + sizeof(struct vs_mapping), 0,
16105 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
16109 +__initcall(dmap_cache_init);
16112 +static inline unsigned int __hashval(dev_t dev, int bits)
16114 + return hash_long((unsigned long)dev, bits);
16118 +/* __hash_mapping()
16119 + * add the mapping to the hash table
16121 +static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
16123 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16124 + struct hlist_head *head, *hash = dmap_main_hash;
16125 + int device = vdm->device;
16127 + spin_lock(hash_lock);
16128 + vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
16129 + vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
16131 + head = &hash[__hashval(device, DMAP_HASH_BITS)];
16132 + hlist_add_head(&vdm->dm_hlist, head);
16133 + spin_unlock(hash_lock);
16137 +static inline int __mode_to_default(umode_t mode)
16150 +/* __set_default()
16153 +static inline void __set_default(struct vx_info *vxi, umode_t mode,
16154 + struct vx_dmap_target *vdmt)
16156 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16157 + spin_lock(hash_lock);
16160 + vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
16162 + dmap_defaults[__mode_to_default(mode)] = *vdmt;
16165 + spin_unlock(hash_lock);
16167 + vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
16168 + vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
16172 +/* __remove_default()
16173 + * remove a default
16175 +static inline int __remove_default(struct vx_info *vxi, umode_t mode)
16177 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16178 + spin_lock(hash_lock);
16181 + vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
16182 + else /* remove == reset */
16183 + dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
16185 + spin_unlock(hash_lock);
16190 +/* __find_mapping()
16191 + * find a mapping in the hash table
16193 + * caller must hold hash_lock
16195 +static inline int __find_mapping(vxid_t xid, dev_t device, umode_t mode,
16196 + struct vs_mapping **local, struct vs_mapping **global)
16198 + struct hlist_head *hash = dmap_main_hash;
16199 + struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
16200 + struct hlist_node *pos;
16201 + struct vs_mapping *vdm;
16207 + hlist_for_each(pos, head) {
16208 + vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
16210 + if ((vdm->device == device) &&
16211 + !((vdm->target.flags ^ mode) & S_IFMT)) {
16212 + if (vdm->xid == xid) {
16215 + } else if (global && vdm->xid == 0)
16220 + if (global && *global)
16227 +/* __lookup_mapping()
16228 + * find a mapping and store the result in target and flags
16230 +static inline int __lookup_mapping(struct vx_info *vxi,
16231 + dev_t device, dev_t *target, int *flags, umode_t mode)
16233 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16234 + struct vs_mapping *vdm, *global;
16235 + struct vx_dmap_target *vdmt;
16237 + vxid_t xid = vxi->vx_id;
16240 + spin_lock(hash_lock);
16241 + if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
16243 + vdmt = &vdm->target;
16247 + index = __mode_to_default(mode);
16248 + if (vxi && vxi->dmap.targets[index].flags) {
16250 + vdmt = &vxi->dmap.targets[index];
16251 + } else if (global) {
16253 + vdmt = &global->target;
16257 + vdmt = &dmap_defaults[index];
16261 + if (target && (vdmt->flags & DATTR_REMAP))
16262 + *target = vdmt->target;
16264 + *target = device;
16266 + *flags = vdmt->flags;
16268 + spin_unlock(hash_lock);
16274 +/* __remove_mapping()
16275 + * remove a mapping from the hash table
16277 +static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
16280 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16281 + struct vs_mapping *vdm = NULL;
16284 + spin_lock(hash_lock);
16286 + ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
16288 + vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
16289 + vxi, vxi ? vxi->vx_id : 0, device, mode);
16292 + hlist_del(&vdm->dm_hlist);
16295 + spin_unlock(hash_lock);
16297 + kmem_cache_free(dmap_cachep, vdm);
16303 +int vs_map_device(struct vx_info *vxi,
16304 + dev_t device, dev_t *target, umode_t mode)
16306 + int ret, flags = DATTR_MASK;
16310 + *target = device;
16313 + ret = __lookup_mapping(vxi, device, target, &flags, mode);
16314 + vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
16315 + device, target ? *target : 0, flags, mode, ret);
16317 + return (flags & DATTR_MASK);
16322 +static int do_set_mapping(struct vx_info *vxi,
16323 + dev_t device, dev_t target, int flags, umode_t mode)
16326 + struct vs_mapping *new;
16328 + new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
16332 + INIT_HLIST_NODE(&new->dm_hlist);
16333 + new->device = device;
16334 + new->target.target = target;
16335 + new->target.flags = flags | mode;
16336 + new->xid = (vxi ? vxi->vx_id : 0);
16338 + vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
16339 + __hash_mapping(vxi, new);
16341 + struct vx_dmap_target new = {
16342 + .target = target,
16343 + .flags = flags | mode,
16345 + __set_default(vxi, mode, &new);
16351 +static int do_unset_mapping(struct vx_info *vxi,
16352 + dev_t device, dev_t target, int flags, umode_t mode)
16354 + int ret = -EINVAL;
16357 + ret = __remove_mapping(vxi, device, mode);
16361 + ret = __remove_default(vxi, mode);
16371 +static inline int __user_device(const char __user *name, dev_t *dev,
16374 + struct path path;
16381 + ret = user_lpath(name, &path);
16384 + if (path.dentry->d_inode) {
16385 + *dev = path.dentry->d_inode->i_rdev;
16386 + *mode = path.dentry->d_inode->i_mode;
16392 +static inline int __mapping_mode(dev_t device, dev_t target,
16393 + umode_t device_mode, umode_t target_mode, umode_t *mode)
16396 + *mode = device_mode & S_IFMT;
16398 + *mode = target_mode & S_IFMT;
16402 + /* if both given, device and target mode have to match */
16403 + if (device && target &&
16404 + ((device_mode ^ target_mode) & S_IFMT))
16410 +static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
16411 + const char __user *target_path, int flags, int set)
16413 + dev_t device = ~0, target = ~0;
16414 + umode_t device_mode = 0, target_mode = 0, mode;
16417 + ret = __user_device(device_path, &device, &device_mode);
16420 + ret = __user_device(target_path, &target, &target_mode);
16424 + ret = __mapping_mode(device, target,
16425 + device_mode, target_mode, &mode);
16430 + return do_set_mapping(vxi, device, target,
16433 + return do_unset_mapping(vxi, device, target,
16438 +int vc_set_mapping(struct vx_info *vxi, void __user *data)
16440 + struct vcmd_set_mapping_v0 vc_data;
16442 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16445 + return do_mapping(vxi, vc_data.device, vc_data.target,
16446 + vc_data.flags, 1);
16449 +int vc_unset_mapping(struct vx_info *vxi, void __user *data)
16451 + struct vcmd_set_mapping_v0 vc_data;
16453 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16456 + return do_mapping(vxi, vc_data.device, vc_data.target,
16457 + vc_data.flags, 0);
16461 +#ifdef CONFIG_COMPAT
16463 +int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
16465 + struct vcmd_set_mapping_v0_x32 vc_data;
16467 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16470 + return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16471 + compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
16474 +int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
16476 + struct vcmd_set_mapping_v0_x32 vc_data;
16478 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16481 + return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16482 + compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
16485 +#endif /* CONFIG_COMPAT */
16488 diff -NurpP --minimal linux-4.9.217/kernel/vserver/dlimit.c linux-4.9.217-vs2.3.9.12/kernel/vserver/dlimit.c
16489 --- linux-4.9.217/kernel/vserver/dlimit.c 1970-01-01 00:00:00.000000000 +0000
16490 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/dlimit.c 2018-10-20 04:58:15.000000000 +0000
16493 + * linux/kernel/vserver/dlimit.c
16495 + * Virtual Server: Context Disk Limits
16497 + * Copyright (C) 2004-2009 Herbert P?tzl
16499 + * V0.01 initial version
16500 + * V0.02 compat32 splitup
16501 + * V0.03 extended interface
16505 +#include <linux/statfs.h>
16506 +#include <linux/sched.h>
16507 +#include <linux/namei.h>
16508 +#include <linux/vs_tag.h>
16509 +#include <linux/vs_dlimit.h>
16510 +#include <linux/vserver/dlimit_cmd.h>
16511 +#include <linux/slab.h>
16512 +// #include <linux/gfp.h>
16514 +#include <asm/uaccess.h>
16516 +/* __alloc_dl_info()
16518 + * allocate an initialized dl_info struct
16519 + * doesn't make it visible (hash) */
16521 +static struct dl_info *__alloc_dl_info(struct super_block *sb, vtag_t tag)
16523 + struct dl_info *new = NULL;
16525 + vxdprintk(VXD_CBIT(dlim, 5),
16526 + "alloc_dl_info(%p,%d)*", sb, tag);
16528 + /* would this benefit from a slab cache? */
16529 + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
16533 + memset(new, 0, sizeof(struct dl_info));
16534 + new->dl_tag = tag;
16536 + // INIT_RCU_HEAD(&new->dl_rcu);
16537 + INIT_HLIST_NODE(&new->dl_hlist);
16538 + spin_lock_init(&new->dl_lock);
16539 + atomic_set(&new->dl_refcnt, 0);
16540 + atomic_set(&new->dl_usecnt, 0);
16542 + /* rest of init goes here */
16544 + vxdprintk(VXD_CBIT(dlim, 4),
16545 + "alloc_dl_info(%p,%d) = %p", sb, tag, new);
16549 +/* __dealloc_dl_info()
16551 + * final disposal of dl_info */
16553 +static void __dealloc_dl_info(struct dl_info *dli)
16555 + vxdprintk(VXD_CBIT(dlim, 4),
16556 + "dealloc_dl_info(%p)", dli);
16558 + dli->dl_hlist.next = LIST_POISON1;
16559 + dli->dl_tag = -1;
16562 + BUG_ON(atomic_read(&dli->dl_usecnt));
16563 + BUG_ON(atomic_read(&dli->dl_refcnt));
16569 +/* hash table for dl_info hash */
16571 +#define DL_HASH_SIZE 13
16573 +struct hlist_head dl_info_hash[DL_HASH_SIZE];
16575 +static DEFINE_SPINLOCK(dl_info_hash_lock);
16578 +static inline unsigned int __hashval(struct super_block *sb, vtag_t tag)
16580 + return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
16585 +/* __hash_dl_info()
16587 + * add the dli to the global hash table
16588 + * requires the hash_lock to be held */
16590 +static inline void __hash_dl_info(struct dl_info *dli)
16592 + struct hlist_head *head;
16594 + vxdprintk(VXD_CBIT(dlim, 6),
16595 + "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
16596 + get_dl_info(dli);
16597 + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
16598 + hlist_add_head_rcu(&dli->dl_hlist, head);
16601 +/* __unhash_dl_info()
16603 + * remove the dli from the global hash table
16604 + * requires the hash_lock to be held */
16606 +static inline void __unhash_dl_info(struct dl_info *dli)
16608 + vxdprintk(VXD_CBIT(dlim, 6),
16609 + "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
16610 + hlist_del_rcu(&dli->dl_hlist);
16611 + put_dl_info(dli);
16615 +/* __lookup_dl_info()
16617 + * requires the rcu_read_lock()
16618 + * doesn't increment the dl_refcnt */
16620 +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, vtag_t tag)
16622 + struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
16623 + struct dl_info *dli;
16625 + hlist_for_each_entry_rcu(dli, head, dl_hlist) {
16626 + if (dli->dl_tag == tag && dli->dl_sb == sb)
16633 +struct dl_info *locate_dl_info(struct super_block *sb, vtag_t tag)
16635 + struct dl_info *dli;
16638 + dli = get_dl_info(__lookup_dl_info(sb, tag));
16639 + vxdprintk(VXD_CBIT(dlim, 7),
16640 + "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
16641 + rcu_read_unlock();
16645 +void rcu_free_dl_info(struct rcu_head *head)
16647 + struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
16648 + int usecnt, refcnt;
16650 + BUG_ON(!dli || !head);
16652 + usecnt = atomic_read(&dli->dl_usecnt);
16653 + BUG_ON(usecnt < 0);
16655 + refcnt = atomic_read(&dli->dl_refcnt);
16656 + BUG_ON(refcnt < 0);
16658 + vxdprintk(VXD_CBIT(dlim, 3),
16659 + "rcu_free_dl_info(%p)", dli);
16661 + __dealloc_dl_info(dli);
16663 + printk("!!! rcu didn't free\n");
16669 +static int do_addrem_dlimit(uint32_t id, const char __user *name,
16670 + uint32_t flags, int add)
16672 + struct path path;
16675 + ret = user_lpath(name, &path);
16677 + struct super_block *sb;
16678 + struct dl_info *dli;
16681 + if (!path.dentry->d_inode)
16682 + goto out_release;
16683 + if (!(sb = path.dentry->d_inode->i_sb))
16684 + goto out_release;
16687 + dli = __alloc_dl_info(sb, id);
16688 + spin_lock(&dl_info_hash_lock);
16691 + if (__lookup_dl_info(sb, id))
16693 + __hash_dl_info(dli);
16696 + spin_lock(&dl_info_hash_lock);
16697 + dli = __lookup_dl_info(sb, id);
16702 + __unhash_dl_info(dli);
16706 + spin_unlock(&dl_info_hash_lock);
16708 + __dealloc_dl_info(dli);
16715 +int vc_add_dlimit(uint32_t id, void __user *data)
16717 + struct vcmd_ctx_dlimit_base_v0 vc_data;
16719 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16722 + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
16725 +int vc_rem_dlimit(uint32_t id, void __user *data)
16727 + struct vcmd_ctx_dlimit_base_v0 vc_data;
16729 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16732 + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
16735 +#ifdef CONFIG_COMPAT
16737 +int vc_add_dlimit_x32(uint32_t id, void __user *data)
16739 + struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
16741 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16744 + return do_addrem_dlimit(id,
16745 + compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
16748 +int vc_rem_dlimit_x32(uint32_t id, void __user *data)
16750 + struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
16752 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16755 + return do_addrem_dlimit(id,
16756 + compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
16759 +#endif /* CONFIG_COMPAT */
16763 +int do_set_dlimit(uint32_t id, const char __user *name,
16764 + uint32_t space_used, uint32_t space_total,
16765 + uint32_t inodes_used, uint32_t inodes_total,
16766 + uint32_t reserved, uint32_t flags)
16768 + struct path path;
16771 + ret = user_lpath(name, &path);
16773 + struct super_block *sb;
16774 + struct dl_info *dli;
16777 + if (!path.dentry->d_inode)
16778 + goto out_release;
16779 + if (!(sb = path.dentry->d_inode->i_sb))
16780 + goto out_release;
16782 + /* sanity checks */
16783 + if ((reserved != CDLIM_KEEP &&
16784 + reserved > 100) ||
16785 + (inodes_used != CDLIM_KEEP &&
16786 + inodes_used > inodes_total) ||
16787 + (space_used != CDLIM_KEEP &&
16788 + space_used > space_total))
16789 + goto out_release;
16792 + dli = locate_dl_info(sb, id);
16794 + goto out_release;
16796 + spin_lock(&dli->dl_lock);
16798 + if (inodes_used != CDLIM_KEEP)
16799 + dli->dl_inodes_used = inodes_used;
16800 + if (inodes_total != CDLIM_KEEP)
16801 + dli->dl_inodes_total = inodes_total;
16802 + if (space_used != CDLIM_KEEP)
16803 + dli->dl_space_used = dlimit_space_32to64(
16804 + space_used, flags, DLIMS_USED);
16806 + if (space_total == CDLIM_INFINITY)
16807 + dli->dl_space_total = DLIM_INFINITY;
16808 + else if (space_total != CDLIM_KEEP)
16809 + dli->dl_space_total = dlimit_space_32to64(
16810 + space_total, flags, DLIMS_TOTAL);
16812 + if (reserved != CDLIM_KEEP)
16813 + dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
16815 + spin_unlock(&dli->dl_lock);
16817 + put_dl_info(dli);
16826 +int vc_set_dlimit(uint32_t id, void __user *data)
16828 + struct vcmd_ctx_dlimit_v0 vc_data;
16830 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16833 + return do_set_dlimit(id, vc_data.name,
16834 + vc_data.space_used, vc_data.space_total,
16835 + vc_data.inodes_used, vc_data.inodes_total,
16836 + vc_data.reserved, vc_data.flags);
16839 +#ifdef CONFIG_COMPAT
16841 +int vc_set_dlimit_x32(uint32_t id, void __user *data)
16843 + struct vcmd_ctx_dlimit_v0_x32 vc_data;
16845 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16848 + return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
16849 + vc_data.space_used, vc_data.space_total,
16850 + vc_data.inodes_used, vc_data.inodes_total,
16851 + vc_data.reserved, vc_data.flags);
16854 +#endif /* CONFIG_COMPAT */
16858 +int do_get_dlimit(uint32_t id, const char __user *name,
16859 + uint32_t *space_used, uint32_t *space_total,
16860 + uint32_t *inodes_used, uint32_t *inodes_total,
16861 + uint32_t *reserved, uint32_t *flags)
16863 + struct path path;
16866 + ret = user_lpath(name, &path);
16868 + struct super_block *sb;
16869 + struct dl_info *dli;
16872 + if (!path.dentry->d_inode)
16873 + goto out_release;
16874 + if (!(sb = path.dentry->d_inode->i_sb))
16875 + goto out_release;
16878 + dli = locate_dl_info(sb, id);
16880 + goto out_release;
16882 + spin_lock(&dli->dl_lock);
16883 + *inodes_used = dli->dl_inodes_used;
16884 + *inodes_total = dli->dl_inodes_total;
16886 + *space_used = dlimit_space_64to32(
16887 + dli->dl_space_used, flags, DLIMS_USED);
16889 + if (dli->dl_space_total == DLIM_INFINITY)
16890 + *space_total = CDLIM_INFINITY;
16892 + *space_total = dlimit_space_64to32(
16893 + dli->dl_space_total, flags, DLIMS_TOTAL);
16895 + *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
16896 + spin_unlock(&dli->dl_lock);
16898 + put_dl_info(dli);
16909 +int vc_get_dlimit(uint32_t id, void __user *data)
16911 + struct vcmd_ctx_dlimit_v0 vc_data;
16914 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16917 + ret = do_get_dlimit(id, vc_data.name,
16918 + &vc_data.space_used, &vc_data.space_total,
16919 + &vc_data.inodes_used, &vc_data.inodes_total,
16920 + &vc_data.reserved, &vc_data.flags);
16924 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16929 +#ifdef CONFIG_COMPAT
16931 +int vc_get_dlimit_x32(uint32_t id, void __user *data)
16933 + struct vcmd_ctx_dlimit_v0_x32 vc_data;
16936 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16939 + ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
16940 + &vc_data.space_used, &vc_data.space_total,
16941 + &vc_data.inodes_used, &vc_data.inodes_total,
16942 + &vc_data.reserved, &vc_data.flags);
16946 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16951 +#endif /* CONFIG_COMPAT */
16954 +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
16956 + struct dl_info *dli;
16957 + __u64 blimit, bfree, bavail;
16960 + dli = locate_dl_info(sb, dx_current_tag());
16964 + spin_lock(&dli->dl_lock);
16965 + if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
16968 + /* reduce max inodes available to limit */
16969 + if (buf->f_files > dli->dl_inodes_total)
16970 + buf->f_files = dli->dl_inodes_total;
16972 + ifree = dli->dl_inodes_total - dli->dl_inodes_used;
16973 + /* reduce free inodes to min */
16974 + if (ifree < buf->f_ffree)
16975 + buf->f_ffree = ifree;
16978 + if (dli->dl_space_total == DLIM_INFINITY)
16981 + blimit = dli->dl_space_total >> sb->s_blocksize_bits;
16983 + if (dli->dl_space_total < dli->dl_space_used)
16986 + bfree = (dli->dl_space_total - dli->dl_space_used)
16987 + >> sb->s_blocksize_bits;
16989 + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
16990 + if (bavail < dli->dl_space_used)
16993 + bavail = (bavail - dli->dl_space_used)
16994 + >> sb->s_blocksize_bits;
16996 + /* reduce max space available to limit */
16997 + if (buf->f_blocks > blimit)
16998 + buf->f_blocks = blimit;
17000 + /* reduce free space to min */
17001 + if (bfree < buf->f_bfree)
17002 + buf->f_bfree = bfree;
17004 + /* reduce avail space to min */
17005 + if (bavail < buf->f_bavail)
17006 + buf->f_bavail = bavail;
17009 + spin_unlock(&dli->dl_lock);
17010 + put_dl_info(dli);
17015 +#include <linux/module.h>
17017 +EXPORT_SYMBOL_GPL(locate_dl_info);
17018 +EXPORT_SYMBOL_GPL(rcu_free_dl_info);
17020 diff -NurpP --minimal linux-4.9.217/kernel/vserver/helper.c linux-4.9.217-vs2.3.9.12/kernel/vserver/helper.c
17021 --- linux-4.9.217/kernel/vserver/helper.c 1970-01-01 00:00:00.000000000 +0000
17022 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/helper.c 2018-10-20 04:58:15.000000000 +0000
17025 + * linux/kernel/vserver/helper.c
17027 + * Virtual Context Support
17029 + * Copyright (C) 2004-2007 Herbert P?tzl
17031 + * V0.01 basic helper
17035 +#include <linux/kmod.h>
17036 +#include <linux/reboot.h>
17037 +#include <linux/vs_context.h>
17038 +#include <linux/vs_network.h>
17039 +#include <linux/vserver/signal.h>
17042 +char vshelper_path[255] = "/sbin/vshelper";
17044 +static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
17046 + current->flags &= ~PF_NO_SETAFFINITY;
17050 +static int vs_call_usermodehelper(char *path, char **argv, char **envp, int wait)
17052 + struct subprocess_info *info;
17053 + gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
17055 + info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
17056 + vshelper_init, NULL, NULL);
17057 + if (info == NULL)
17060 + return call_usermodehelper_exec(info, wait);
17063 +static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
17067 + if ((ret = vs_call_usermodehelper(name, argv, envp,
17068 + sync ? UMH_WAIT_PROC : UMH_WAIT_EXEC))) {
17069 + printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
17070 + name, argv[1], argv[2],
17071 + sync ? "sync" : "async", ret);
17073 + vxdprintk(VXD_CBIT(switch, 4),
17074 + "%s: (%s %s) returned %s with %d",
17075 + name, argv[1], argv[2], sync ? "sync" : "async", ret);
17080 + * vshelper path is set via /proc/sys
17081 + * invoked by vserver sys_reboot(), with
17082 + * the following arguments
17084 + * argv [0] = vshelper_path;
17085 + * argv [1] = action: "restart", "halt", "poweroff", ...
17086 + * argv [2] = context identifier
17088 + * envp [*] = type-specific parameters
17091 +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
17093 + char id_buf[8], cmd_buf[16];
17094 + char uid_buf[16], pid_buf[16];
17097 + char *argv[] = {vshelper_path, NULL, id_buf, 0};
17098 + char *envp[] = {"HOME=/", "TERM=linux",
17099 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
17100 + uid_buf, pid_buf, cmd_buf, 0};
17102 + if (vx_info_state(vxi, VXS_HELPER))
17104 + vxi->vx_state |= VXS_HELPER;
17106 + snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17108 + snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17109 + snprintf(uid_buf, sizeof(uid_buf), "VS_UID=%d",
17110 + from_kuid(&init_user_ns, current_uid()));
17111 + snprintf(pid_buf, sizeof(pid_buf), "VS_PID=%d", current->pid);
17114 + case LINUX_REBOOT_CMD_RESTART:
17115 + argv[1] = "restart";
17118 + case LINUX_REBOOT_CMD_HALT:
17119 + argv[1] = "halt";
17122 + case LINUX_REBOOT_CMD_POWER_OFF:
17123 + argv[1] = "poweroff";
17126 + case LINUX_REBOOT_CMD_SW_SUSPEND:
17127 + argv[1] = "swsusp";
17130 + case LINUX_REBOOT_CMD_OOM:
17135 + vxi->vx_state &= ~VXS_HELPER;
17139 + ret = do_vshelper(vshelper_path, argv, envp, 0);
17140 + vxi->vx_state &= ~VXS_HELPER;
17141 + __wakeup_vx_info(vxi);
17142 + return (ret) ? -EPERM : 0;
17146 +long vs_reboot(unsigned int cmd, void __user *arg)
17148 + struct vx_info *vxi = current_vx_info();
17151 + vxdprintk(VXD_CBIT(misc, 5),
17152 + "vs_reboot(%p[#%d],%u)",
17153 + vxi, vxi ? vxi->vx_id : 0, cmd);
17155 + ret = vs_reboot_helper(vxi, cmd, arg);
17159 + vxi->reboot_cmd = cmd;
17160 + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17162 + case LINUX_REBOOT_CMD_RESTART:
17163 + case LINUX_REBOOT_CMD_HALT:
17164 + case LINUX_REBOOT_CMD_POWER_OFF:
17165 + vx_info_kill(vxi, 0, SIGKILL);
17166 + vx_info_kill(vxi, 1, SIGKILL);
17174 +long vs_oom_action(unsigned int cmd)
17176 + struct vx_info *vxi = current_vx_info();
17179 + vxdprintk(VXD_CBIT(misc, 5),
17180 + "vs_oom_action(%p[#%d],%u)",
17181 + vxi, vxi ? vxi->vx_id : 0, cmd);
17183 + ret = vs_reboot_helper(vxi, cmd, NULL);
17187 + vxi->reboot_cmd = cmd;
17188 + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17189 + vx_info_kill(vxi, 0, SIGKILL);
17190 + vx_info_kill(vxi, 1, SIGKILL);
17196 + * argv [0] = vshelper_path;
17197 + * argv [1] = action: "startup", "shutdown"
17198 + * argv [2] = context identifier
17200 + * envp [*] = type-specific parameters
17203 +long vs_state_change(struct vx_info *vxi, unsigned int cmd)
17205 + char id_buf[8], cmd_buf[16];
17206 + char *argv[] = {vshelper_path, NULL, id_buf, 0};
17207 + char *envp[] = {"HOME=/", "TERM=linux",
17208 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17210 + if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
17213 + snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17214 + snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17217 + case VSC_STARTUP:
17218 + argv[1] = "startup";
17220 + case VSC_SHUTDOWN:
17221 + argv[1] = "shutdown";
17227 + return do_vshelper(vshelper_path, argv, envp, 1);
17232 + * argv [0] = vshelper_path;
17233 + * argv [1] = action: "netup", "netdown"
17234 + * argv [2] = context identifier
17236 + * envp [*] = type-specific parameters
17239 +long vs_net_change(struct nx_info *nxi, unsigned int cmd)
17241 + char id_buf[8], cmd_buf[16];
17242 + char *argv[] = {vshelper_path, NULL, id_buf, 0};
17243 + char *envp[] = {"HOME=/", "TERM=linux",
17244 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17246 + if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
17249 + snprintf(id_buf, sizeof(id_buf), "%d", nxi->nx_id);
17250 + snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17254 + argv[1] = "netup";
17256 + case VSC_NETDOWN:
17257 + argv[1] = "netdown";
17263 + return do_vshelper(vshelper_path, argv, envp, 1);
17266 diff -NurpP --minimal linux-4.9.217/kernel/vserver/history.c linux-4.9.217-vs2.3.9.12/kernel/vserver/history.c
17267 --- linux-4.9.217/kernel/vserver/history.c 1970-01-01 00:00:00.000000000 +0000
17268 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/history.c 2018-10-20 04:58:15.000000000 +0000
17271 + * kernel/vserver/history.c
17273 + * Virtual Context History Backtrace
17275 + * Copyright (C) 2004-2007 Herbert P?tzl
17277 + * V0.01 basic structure
17278 + * V0.02 hash/unhash and trace
17279 + * V0.03 preemption fixes
17283 +#include <linux/module.h>
17284 +#include <asm/uaccess.h>
17286 +#include <linux/vserver/context.h>
17287 +#include <linux/vserver/debug.h>
17288 +#include <linux/vserver/debug_cmd.h>
17289 +#include <linux/vserver/history.h>
17292 +#ifdef CONFIG_VSERVER_HISTORY
17293 +#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
17295 +#define VXH_SIZE 64
17298 +struct _vx_history {
17299 + unsigned int counter;
17301 + struct _vx_hist_entry entry[VXH_SIZE + 1];
17305 +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
17307 +unsigned volatile int vxh_active = 1;
17309 +static atomic_t sequence = ATOMIC_INIT(0);
17314 + * requires disabled preemption */
17316 +struct _vx_hist_entry *vxh_advance(void *loc)
17318 + unsigned int cpu = smp_processor_id();
17319 + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17320 + struct _vx_hist_entry *entry;
17321 + unsigned int index;
17323 + index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
17324 + entry = &hist->entry[index];
17326 + entry->seq = atomic_inc_return(&sequence);
17327 + entry->loc = loc;
17331 +EXPORT_SYMBOL_GPL(vxh_advance);
17334 +#define VXH_LOC_FMTS "(#%04x,*%d):%p"
17336 +#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
17339 +#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
17341 +#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
17342 + (e)->vxi.ptr ? (e)->vxi.xid : 0, \
17343 + (e)->vxi.ptr ? (e)->vxi.usecnt : 0, \
17344 + (e)->vxi.ptr ? (e)->vxi.tasks : 0
17346 +void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
17348 + switch (e->type) {
17349 + case VXH_THROW_OOPS:
17350 + printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
17353 + case VXH_GET_VX_INFO:
17354 + case VXH_PUT_VX_INFO:
17355 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17357 + (e->type == VXH_GET_VX_INFO) ? "get" : "put",
17358 + VXH_VXI_ARGS(e));
17361 + case VXH_INIT_VX_INFO:
17362 + case VXH_SET_VX_INFO:
17363 + case VXH_CLR_VX_INFO:
17364 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17366 + (e->type == VXH_INIT_VX_INFO) ? "init" :
17367 + ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
17368 + VXH_VXI_ARGS(e), e->sc.data);
17371 + case VXH_CLAIM_VX_INFO:
17372 + case VXH_RELEASE_VX_INFO:
17373 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17375 + (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
17376 + VXH_VXI_ARGS(e), e->sc.data);
17379 + case VXH_ALLOC_VX_INFO:
17380 + case VXH_DEALLOC_VX_INFO:
17381 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17383 + (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
17384 + VXH_VXI_ARGS(e));
17387 + case VXH_HASH_VX_INFO:
17388 + case VXH_UNHASH_VX_INFO:
17389 + printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
17391 + (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
17392 + VXH_VXI_ARGS(e));
17395 + case VXH_LOC_VX_INFO:
17396 + case VXH_LOOKUP_VX_INFO:
17397 + case VXH_CREATE_VX_INFO:
17398 + printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
17400 + (e->type == VXH_CREATE_VX_INFO) ? "create" :
17401 + ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
17402 + e->ll.arg, VXH_VXI_ARGS(e));
17407 +static void __vxh_dump_history(void)
17409 + unsigned int i, cpu;
17411 + printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
17412 + atomic_read(&sequence), NR_CPUS);
17414 + for (i = 0; i < VXH_SIZE; i++) {
17415 + for_each_online_cpu(cpu) {
17416 + struct _vx_history *hist =
17417 + &per_cpu(vx_history_buffer, cpu);
17418 + unsigned int index = (hist->counter - i) % VXH_SIZE;
17419 + struct _vx_hist_entry *entry = &hist->entry[index];
17421 + vxh_dump_entry(entry, cpu);
17426 +void vxh_dump_history(void)
17430 + local_irq_enable();
17432 + local_irq_disable();
17434 + __vxh_dump_history();
17438 +/* vserver syscall commands below here */
17441 +int vc_dump_history(uint32_t id)
17444 + __vxh_dump_history();
17451 +int do_read_history(struct __user _vx_hist_entry *data,
17452 + int cpu, uint32_t *index, uint32_t *count)
17454 + int pos, ret = 0;
17455 + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17456 + int end = hist->counter;
17457 + int start = end - VXH_SIZE + 2;
17458 + int idx = *index;
17460 + /* special case: get current pos */
17466 + /* have we lost some data? */
17470 + for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
17471 + struct _vx_hist_entry *entry =
17472 + &hist->entry[idx % VXH_SIZE];
17474 + /* send entry to userspace */
17475 + ret = copy_to_user(&data[pos], entry, sizeof(*entry));
17479 + /* save new index and count */
17482 + return ret ? ret : (*index < end);
17485 +int vc_read_history(uint32_t id, void __user *data)
17487 + struct vcmd_read_history_v0 vc_data;
17490 + if (id >= NR_CPUS)
17493 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17496 + ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
17497 + id, &vc_data.index, &vc_data.count);
17499 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17504 +#ifdef CONFIG_COMPAT
17506 +int vc_read_history_x32(uint32_t id, void __user *data)
17508 + struct vcmd_read_history_v0_x32 vc_data;
17511 + if (id >= NR_CPUS)
17514 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17517 + ret = do_read_history((struct __user _vx_hist_entry *)
17518 + compat_ptr(vc_data.data_ptr),
17519 + id, &vc_data.index, &vc_data.count);
17521 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17526 +#endif /* CONFIG_COMPAT */
17528 diff -NurpP --minimal linux-4.9.217/kernel/vserver/inet.c linux-4.9.217-vs2.3.9.12/kernel/vserver/inet.c
17529 --- linux-4.9.217/kernel/vserver/inet.c 1970-01-01 00:00:00.000000000 +0000
17530 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/inet.c 2018-10-20 04:58:15.000000000 +0000
17533 +#include <linux/in.h>
17534 +#include <linux/inetdevice.h>
17535 +#include <linux/export.h>
17536 +#include <linux/vs_inet.h>
17537 +#include <linux/vs_inet6.h>
17538 +#include <linux/vserver/debug.h>
17539 +#include <net/route.h>
17540 +#include <net/addrconf.h>
17543 +int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
17547 + if (!nxi1 || !nxi2 || nxi1 == nxi2)
17550 + struct nx_addr_v4 *ptr;
17551 + unsigned long irqflags;
17553 + spin_lock_irqsave(&nxi1->addr_lock, irqflags);
17554 + for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
17555 + if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17560 + spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
17563 + vxdprintk(VXD_CBIT(net, 2),
17564 + "nx_v4_addr_conflict(%p,%p): %d",
17565 + nxi1, nxi2, ret);
17571 +#ifdef CONFIG_IPV6
17573 +int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
17577 + if (!nxi1 || !nxi2 || nxi1 == nxi2)
17580 + struct nx_addr_v6 *ptr;
17581 + unsigned long irqflags;
17583 + spin_lock_irqsave(&nxi1->addr_lock, irqflags);
17584 + for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
17585 + if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17590 + spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
17593 + vxdprintk(VXD_CBIT(net, 2),
17594 + "nx_v6_addr_conflict(%p,%p): %d",
17595 + nxi1, nxi2, ret);
17602 +int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17604 + struct in_device *in_dev;
17605 + struct in_ifaddr **ifap;
17606 + struct in_ifaddr *ifa;
17611 + in_dev = in_dev_get(dev);
17615 + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
17616 + ifap = &ifa->ifa_next) {
17617 + if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
17622 + in_dev_put(in_dev);
17628 +#ifdef CONFIG_IPV6
17630 +int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17632 + struct inet6_dev *in_dev;
17633 + struct inet6_ifaddr *ifa;
17638 + in_dev = in6_dev_get(dev);
17642 + // for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
17643 + list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
17644 + if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
17649 + in6_dev_put(in_dev);
17656 +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17662 + if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
17664 +#ifdef CONFIG_IPV6
17666 + if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
17671 + vxdprintk(VXD_CBIT(net, 3),
17672 + "dev_in_nx_info(%p,%p[#%d]) = %d",
17673 + dev, nxi, nxi ? nxi->nx_id : 0, ret);
17677 +struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
17678 + struct flowi4 *fl4)
17680 + struct rtable *rt;
17685 + /* FIXME: handle lback only case */
17686 + if (!NX_IPV4(nxi))
17687 + return ERR_PTR(-EPERM);
17689 + vxdprintk(VXD_CBIT(net, 4),
17690 + "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
17691 + nxi, nxi ? nxi->nx_id : 0,
17692 + NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
17694 + /* single IP is unconditional */
17695 + if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
17696 + (fl4->saddr == INADDR_ANY))
17697 + fl4->saddr = nxi->v4.ip[0].s_addr;
17699 + if (fl4->saddr == INADDR_ANY) {
17700 + struct nx_addr_v4 *ptr;
17701 + __be32 found = 0;
17703 + rt = __ip_route_output_key(net, fl4);
17704 + if (!IS_ERR(rt)) {
17705 + found = fl4->saddr;
17707 + vxdprintk(VXD_CBIT(net, 4),
17708 + "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17709 + nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
17710 + if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
17714 + WARN_ON_ONCE(in_irq());
17715 + spin_lock_bh(&nxi->addr_lock);
17716 + for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
17717 + __be32 primary = ptr->ip[0].s_addr;
17718 + __be32 mask = ptr->mask.s_addr;
17719 + __be32 neta = primary & mask;
17721 + vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
17722 + NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
17723 + nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
17724 + NIPQUAD(mask), NIPQUAD(neta));
17725 + if ((found & mask) != neta)
17728 + fl4->saddr = primary;
17729 + rt = __ip_route_output_key(net, fl4);
17730 + vxdprintk(VXD_CBIT(net, 4),
17731 + "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17732 + nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
17733 + if (!IS_ERR(rt)) {
17734 + found = fl4->saddr;
17736 + if (found == primary)
17737 + goto found_unlock;
17740 + /* still no source ip? */
17741 + found = ipv4_is_loopback(fl4->daddr)
17742 + ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
17744 + spin_unlock_bh(&nxi->addr_lock);
17746 + /* assign src ip to flow */
17747 + fl4->saddr = found;
17750 + if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
17751 + return ERR_PTR(-EPERM);
17754 + if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
17755 + if (ipv4_is_loopback(fl4->daddr))
17756 + fl4->daddr = nxi->v4_lback.s_addr;
17757 + if (ipv4_is_loopback(fl4->saddr))
17758 + fl4->saddr = nxi->v4_lback.s_addr;
17759 + } else if (ipv4_is_loopback(fl4->daddr) &&
17760 + !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
17761 + return ERR_PTR(-EPERM);
17766 +EXPORT_SYMBOL_GPL(ip_v4_find_src);
17768 diff -NurpP --minimal linux-4.9.217/kernel/vserver/init.c linux-4.9.217-vs2.3.9.12/kernel/vserver/init.c
17769 --- linux-4.9.217/kernel/vserver/init.c 1970-01-01 00:00:00.000000000 +0000
17770 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/init.c 2018-10-20 04:58:15.000000000 +0000
17773 + * linux/kernel/init.c
17775 + * Virtual Server Init
17777 + * Copyright (C) 2004-2007 Herbert P?tzl
17779 + * V0.01 basic structure
17783 +#include <linux/init.h>
17784 +#include <linux/module.h>
17786 +int vserver_register_sysctl(void);
17787 +void vserver_unregister_sysctl(void);
17790 +static int __init init_vserver(void)
17794 +#ifdef CONFIG_VSERVER_DEBUG
17795 + vserver_register_sysctl();
17801 +static void __exit exit_vserver(void)
17804 +#ifdef CONFIG_VSERVER_DEBUG
17805 + vserver_unregister_sysctl();
17810 +/* FIXME: GFP_ZONETYPES gone
17811 +long vx_slab[GFP_ZONETYPES]; */
17815 +module_init(init_vserver);
17816 +module_exit(exit_vserver);
17818 diff -NurpP --minimal linux-4.9.217/kernel/vserver/inode.c linux-4.9.217-vs2.3.9.12/kernel/vserver/inode.c
17819 --- linux-4.9.217/kernel/vserver/inode.c 1970-01-01 00:00:00.000000000 +0000
17820 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/inode.c 2018-10-20 04:58:15.000000000 +0000
17823 + * linux/kernel/vserver/inode.c
17825 + * Virtual Server: File System Support
17827 + * Copyright (C) 2004-2007 Herbert P?tzl
17829 + * V0.01 separated from vcontext V0.05
17830 + * V0.02 moved to tag (instead of xid)
17834 +#include <linux/tty.h>
17835 +#include <linux/proc_fs.h>
17836 +#include <linux/devpts_fs.h>
17837 +#include <linux/fs.h>
17838 +#include <linux/file.h>
17839 +#include <linux/mount.h>
17840 +#include <linux/parser.h>
17841 +#include <linux/namei.h>
17842 +#include <linux/magic.h>
17843 +#include <linux/slab.h>
17844 +#include <linux/vserver/inode.h>
17845 +#include <linux/vserver/inode_cmd.h>
17846 +#include <linux/vs_base.h>
17847 +#include <linux/vs_tag.h>
17849 +#include <asm/uaccess.h>
17850 +#include <../../fs/proc/internal.h>
17853 +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
17855 + struct proc_dir_entry *entry;
17857 + if (!in || !in->i_sb)
17860 + *flags = IATTR_TAG
17861 + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
17862 + | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
17863 + | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
17864 + | (IS_COW(in) ? IATTR_COW : 0);
17865 + *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
17867 + if (S_ISDIR(in->i_mode))
17868 + *mask |= IATTR_BARRIER;
17870 + if (IS_TAGGED(in)) {
17871 + *tag = i_tag_read(in);
17872 + *mask |= IATTR_TAG;
17875 + switch (in->i_sb->s_magic) {
17876 + case PROC_SUPER_MAGIC:
17877 + entry = PROC_I(in)->pde;
17879 + /* check for specific inodes? */
17881 + *mask |= IATTR_FLAGS;
17883 + *flags |= (entry->vx_flags & IATTR_FLAGS);
17885 + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
17888 + case DEVPTS_SUPER_MAGIC:
17889 + *tag = i_tag_read(in);
17890 + *mask |= IATTR_TAG;
17899 +int vc_get_iattr(void __user *data)
17901 + struct path path;
17902 + struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
17905 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17908 + ret = user_lpath(vc_data.name, &path);
17910 + ret = __vc_get_iattr(path.dentry->d_inode,
17911 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
17917 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17922 +#ifdef CONFIG_COMPAT
17924 +int vc_get_iattr_x32(void __user *data)
17926 + struct path path;
17927 + struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
17930 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17933 + ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
17935 + ret = __vc_get_iattr(path.dentry->d_inode,
17936 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
17942 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17947 +#endif /* CONFIG_COMPAT */
17950 +int vc_fget_iattr(uint32_t fd, void __user *data)
17952 + struct file *filp;
17953 + struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
17956 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17960 + if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
17963 + ret = __vc_get_iattr(filp->f_path.dentry->d_inode,
17964 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
17968 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17974 +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
17976 + struct inode *in = de->d_inode;
17977 + int error = 0, is_proc = 0, has_tag = 0;
17978 + struct iattr attr = { 0 };
17980 + if (!in || !in->i_sb)
17983 + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
17984 + if ((*mask & IATTR_FLAGS) && !is_proc)
17987 + has_tag = IS_TAGGED(in) ||
17988 + (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
17989 + if ((*mask & IATTR_TAG) && !has_tag)
17993 + if (*mask & IATTR_TAG) {
17994 + attr.ia_tag = make_ktag(&init_user_ns, *tag);
17995 + attr.ia_valid |= ATTR_TAG;
17998 + if (*mask & IATTR_FLAGS) {
17999 + struct proc_dir_entry *entry = PROC_I(in)->pde;
18000 + unsigned int iflags = PROC_I(in)->vx_flags;
18002 + iflags = (iflags & ~(*mask & IATTR_FLAGS))
18003 + | (*flags & IATTR_FLAGS);
18004 + PROC_I(in)->vx_flags = iflags;
18006 + entry->vx_flags = iflags;
18009 + if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
18010 + IATTR_BARRIER | IATTR_COW)) {
18011 + int iflags = in->i_flags;
18012 + int vflags = in->i_vflags;
18014 + if (*mask & IATTR_IMMUTABLE) {
18015 + if (*flags & IATTR_IMMUTABLE)
18016 + iflags |= S_IMMUTABLE;
18018 + iflags &= ~S_IMMUTABLE;
18020 + if (*mask & IATTR_IXUNLINK) {
18021 + if (*flags & IATTR_IXUNLINK)
18022 + iflags |= S_IXUNLINK;
18024 + iflags &= ~S_IXUNLINK;
18026 + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
18027 + if (*flags & IATTR_BARRIER)
18028 + vflags |= V_BARRIER;
18030 + vflags &= ~V_BARRIER;
18032 + if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
18033 + if (*flags & IATTR_COW)
18036 + vflags &= ~V_COW;
18038 + if (in->i_op && in->i_op->sync_flags) {
18039 + error = in->i_op->sync_flags(in, iflags, vflags);
18045 + if (attr.ia_valid) {
18046 + if (in->i_op && in->i_op->setattr)
18047 + error = in->i_op->setattr(de, &attr);
18049 + error = setattr_prepare(de, &attr);
18051 + setattr_copy(in, &attr);
18052 + mark_inode_dirty(in);
18058 + inode_unlock(in);
18062 +int vc_set_iattr(void __user *data)
18064 + struct path path;
18065 + struct vcmd_ctx_iattr_v1 vc_data;
18068 + if (!capable(CAP_LINUX_IMMUTABLE))
18070 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18073 + ret = user_lpath(vc_data.name, &path);
18075 + ret = __vc_set_iattr(path.dentry,
18076 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18080 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18085 +#ifdef CONFIG_COMPAT
18087 +int vc_set_iattr_x32(void __user *data)
18089 + struct path path;
18090 + struct vcmd_ctx_iattr_v1_x32 vc_data;
18093 + if (!capable(CAP_LINUX_IMMUTABLE))
18095 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18098 + ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18100 + ret = __vc_set_iattr(path.dentry,
18101 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18105 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18110 +#endif /* CONFIG_COMPAT */
18112 +int vc_fset_iattr(uint32_t fd, void __user *data)
18114 + struct file *filp;
18115 + struct vcmd_ctx_fiattr_v0 vc_data;
18118 + if (!capable(CAP_LINUX_IMMUTABLE))
18120 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18124 + if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
18127 + ret = __vc_set_iattr(filp->f_path.dentry, &vc_data.tag,
18128 + &vc_data.flags, &vc_data.mask);
18132 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18138 +enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
18140 +static match_table_t tokens = {
18141 + {Opt_notagcheck, "notagcheck"},
18142 +#ifdef CONFIG_PROPAGATE
18143 + {Opt_notag, "notag"},
18144 + {Opt_tag, "tag"},
18145 + {Opt_tagid, "tagid=%u"},
18151 +static void __dx_parse_remove(char *string, char *opt)
18153 + char *p = strstr(string, opt);
18157 + while (*q != '\0' && *q != ',')
18166 +int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
18167 + unsigned long *flags)
18170 + substring_t args[MAX_OPT_ARGS];
18172 + char *s, *p, *opts;
18173 +#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
18179 + s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
18184 + while ((p = strsep(&opts, ",")) != NULL) {
18185 + token = match_token(p, tokens, args);
18188 +#ifdef CONFIG_PROPAGATE
18193 + __dx_parse_remove(s, "tag");
18194 + *mnt_flags |= MNT_TAGID;
18195 + set |= MNT_TAGID;
18199 + __dx_parse_remove(s, "notag");
18200 + *mnt_flags |= MNT_NOTAG;
18201 + set |= MNT_NOTAG;
18204 + if (tag && !match_int(args, &option))
18207 + __dx_parse_remove(s, "tagid");
18208 + *mnt_flags |= MNT_TAGID;
18209 + set |= MNT_TAGID;
18211 +#endif /* CONFIG_PROPAGATE */
18212 + case Opt_notagcheck:
18214 + __dx_parse_remove(s, "notagcheck");
18215 + *flags |= MS_NOTAGCHECK;
18216 + set |= MS_NOTAGCHECK;
18219 + vxdprintk(VXD_CBIT(tag, 7),
18220 + "dx_parse_tag(" VS_Q("%s") "): %d:#%d",
18221 + p, token, option);
18224 + strcpy(string, s);
18229 +#ifdef CONFIG_PROPAGATE
18231 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
18233 + vtag_t new_tag = 0;
18234 + struct vfsmount *mnt;
18239 + mnt = nd->path.mnt;
18243 + propagate = (mnt->mnt_flags & MNT_TAGID);
18245 + new_tag = mnt->mnt_tag;
18247 + vxdprintk(VXD_CBIT(tag, 7),
18248 + "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
18249 + inode, inode->i_ino, inode->i_tag,
18250 + new_tag, (propagate) ? 1 : 0);
18253 + i_tag_write(inode, new_tag);
18256 +#include <linux/module.h>
18258 +EXPORT_SYMBOL_GPL(__dx_propagate_tag);
18260 +#endif /* CONFIG_PROPAGATE */
18262 diff -NurpP --minimal linux-4.9.217/kernel/vserver/Kconfig linux-4.9.217-vs2.3.9.12/kernel/vserver/Kconfig
18263 --- linux-4.9.217/kernel/vserver/Kconfig 1970-01-01 00:00:00.000000000 +0000
18264 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/Kconfig 2018-10-20 04:58:15.000000000 +0000
18267 +# Linux VServer configuration
18270 +menu "Linux VServer"
18272 +config VSERVER_AUTO_LBACK
18273 + bool "Automatically Assign Loopback IP"
18276 + Automatically assign a guest specific loopback
18277 + IP and add it to the kernel network stack on
18280 +config VSERVER_AUTO_SINGLE
18281 + bool "Automatic Single IP Special Casing"
18284 + This allows network contexts with a single IP to
18285 + automatically remap 0.0.0.0 bindings to that IP,
18286 + avoiding further network checks and improving
18289 + (note: such guests do not allow to change the ip
18290 + on the fly and do not show loopback addresses)
18292 +config VSERVER_COWBL
18293 + bool "Enable COW Immutable Link Breaking"
18296 + This enables the COW (Copy-On-Write) link break code.
18297 + It allows you to treat unified files like normal files
18298 + when writing to them (which will implicitely break the
18299 + link and create a copy of the unified file)
18301 +config VSERVER_VTIME
18302 + bool "Enable Virtualized Guest Time (EXPERIMENTAL)"
18305 + This enables per guest time offsets to allow for
18306 + adjusting the system clock individually per guest.
18307 + this adds some overhead to the time functions and
18308 + therefore should not be enabled without good reason.
18310 +config VSERVER_DEVICE
18311 + bool "Enable Guest Device Mapping (EXPERIMENTAL)"
18314 + This enables generic device remapping.
18316 +config VSERVER_PROC_SECURE
18317 + bool "Enable Proc Security"
18318 + depends on PROC_FS
18321 + This configures ProcFS security to initially hide
18322 + non-process entries for all contexts except the main and
18323 + spectator context (i.e. for all guests), which is a secure
18326 + (note: on 1.2x the entries were visible by default)
18329 + prompt "Persistent Inode Tagging"
18330 + default TAGGING_ID24
18332 + This adds persistent context information to filesystems
18333 + mounted with the tagxid option. Tagging is a requirement
18334 + for per-context disk limits and per-context quota.
18337 +config TAGGING_NONE
18340 + do not store per-context information in inodes.
18342 +config TAGGING_UID16
18343 + bool "UID16/GID32"
18345 + reduces UID to 16 bit, but leaves GID at 32 bit.
18347 +config TAGGING_GID16
18348 + bool "UID32/GID16"
18350 + reduces GID to 16 bit, but leaves UID at 32 bit.
18352 +config TAGGING_ID24
18353 + bool "UID24/GID24"
18355 + uses the upper 8bit from UID and GID for XID tagging
18356 + which leaves 24bit for UID/GID each, which should be
18357 + more than sufficient for normal use.
18359 +config TAGGING_INTERN
18360 + bool "UID32/GID32"
18362 + this uses otherwise reserved inode fields in the on
18363 + disk representation, which limits the use to a few
18364 + filesystems (currently ext2 and ext3)
18369 + bool "Tag NFSD User Auth and Files"
18372 + Enable this if you do want the in-kernel NFS
18373 + Server to use the tagging specified above.
18374 + (will require patched clients too)
18376 +config VSERVER_PRIVACY
18377 + bool "Honor Privacy Aspects of Guests"
18380 + When enabled, most context checks will disallow
18381 + access to structures assigned to a specific context,
18382 + like ptys or loop devices.
18384 +config VSERVER_CONTEXTS
18385 + int "Maximum number of Contexts (1-65533)" if EMBEDDED
18387 + default "768" if 64BIT
18390 + This setting will optimize certain data structures
18391 + and memory allocations according to the expected
18394 + note: this is not a strict upper limit.
18396 +config VSERVER_WARN
18397 + bool "VServer Warnings"
18400 + This enables various runtime warnings, which will
18401 + notify about potential manipulation attempts or
18402 + resource shortage. It is generally considered to
18403 + be a good idea to have that enabled.
18405 +config VSERVER_WARN_DEVPTS
18406 + bool "VServer DevPTS Warnings"
18407 + depends on VSERVER_WARN
18410 + This enables DevPTS related warnings, issued when a
18411 + process inside a context tries to lookup or access
18412 + a dynamic pts from the host or a different context.
18414 +config VSERVER_DEBUG
18415 + bool "VServer Debugging Code"
18418 + Set this to yes if you want to be able to activate
18419 + debugging output at runtime. It adds a very small
18420 + overhead to all vserver related functions and
18421 + increases the kernel size by about 20k.
18423 +config VSERVER_HISTORY
18424 + bool "VServer History Tracing"
18425 + depends on VSERVER_DEBUG
18428 + Set this to yes if you want to record the history of
18429 + linux-vserver activities, so they can be replayed in
18430 + the event of a kernel panic or oops.
18432 +config VSERVER_HISTORY_SIZE
18433 + int "Per-CPU History Size (32-65536)"
18434 + depends on VSERVER_HISTORY
18438 + This allows you to specify the number of entries in
18439 + the per-CPU history buffer.
18441 +config VSERVER_EXTRA_MNT_CHECK
18442 + bool "Extra Checks for Reachability"
18445 + Set this to yes if you want to do extra checks for
18446 + vfsmount reachability in the proc filesystem code.
18447 + This shouldn't be required on any setup utilizing
18451 + prompt "Quotes used in debug and warn messages"
18452 + default QUOTES_ISO8859
18454 +config QUOTES_ISO8859
18455 + bool "Extended ASCII (ISO 8859) angle quotes"
18457 + This uses the extended ASCII characters \xbb
18458 + and \xab for quoting file and process names.
18460 +config QUOTES_UTF8
18461 + bool "UTF-8 angle quotes"
18463 + This uses the the UTF-8 sequences for angle
18464 + quotes to quote file and process names.
18466 +config QUOTES_ASCII
18467 + bool "ASCII single quotes"
18469 + This uses the ASCII single quote character
18470 + (\x27) to quote file and process names.
18480 + select NAMESPACES
18486 +config VSERVER_SECURITY
18488 + depends on SECURITY
18490 + select SECURITY_CAPABILITIES
18492 +config VSERVER_DISABLED
18496 diff -NurpP --minimal linux-4.9.217/kernel/vserver/limit.c linux-4.9.217-vs2.3.9.12/kernel/vserver/limit.c
18497 --- linux-4.9.217/kernel/vserver/limit.c 1970-01-01 00:00:00.000000000 +0000
18498 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/limit.c 2018-10-20 04:58:15.000000000 +0000
18501 + * linux/kernel/vserver/limit.c
18503 + * Virtual Server: Context Limits
18505 + * Copyright (C) 2004-2010 Herbert P?tzl
18507 + * V0.01 broken out from vcontext V0.05
18508 + * V0.02 changed vcmds to vxi arg
18509 + * V0.03 added memory cgroup support
18513 +#include <linux/sched.h>
18514 +#include <linux/module.h>
18515 +#include <linux/memcontrol.h>
18516 +#include <linux/page_counter.h>
18517 +#include <linux/vs_limit.h>
18518 +#include <linux/vserver/limit.h>
18519 +#include <linux/vserver/limit_cmd.h>
18521 +#include <asm/uaccess.h>
18524 +const char *vlimit_name[NUM_LIMITS] = {
18525 + [RLIMIT_CPU] = "CPU",
18526 + [RLIMIT_NPROC] = "NPROC",
18527 + [RLIMIT_NOFILE] = "NOFILE",
18528 + [RLIMIT_LOCKS] = "LOCKS",
18529 + [RLIMIT_SIGPENDING] = "SIGP",
18530 + [RLIMIT_MSGQUEUE] = "MSGQ",
18532 + [VLIMIT_NSOCK] = "NSOCK",
18533 + [VLIMIT_OPENFD] = "OPENFD",
18534 + [VLIMIT_SHMEM] = "SHMEM",
18535 + [VLIMIT_DENTRY] = "DENTRY",
18538 +EXPORT_SYMBOL_GPL(vlimit_name);
18540 +#define MASK_ENTRY(x) (1 << (x))
18542 +const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
18545 + , /* softlimit */
18548 + MASK_ENTRY( RLIMIT_NPROC ) |
18549 + MASK_ENTRY( RLIMIT_NOFILE ) |
18550 + MASK_ENTRY( RLIMIT_LOCKS ) |
18551 + MASK_ENTRY( RLIMIT_MSGQUEUE ) |
18553 + MASK_ENTRY( VLIMIT_NSOCK ) |
18554 + MASK_ENTRY( VLIMIT_OPENFD ) |
18555 + MASK_ENTRY( VLIMIT_SHMEM ) |
18556 + MASK_ENTRY( VLIMIT_DENTRY ) |
18559 + /* accounting only */
18560 +uint32_t account_mask =
18561 + MASK_ENTRY( VLIMIT_SEMARY ) |
18562 + MASK_ENTRY( VLIMIT_NSEMS ) |
18563 + MASK_ENTRY( VLIMIT_MAPPED ) |
18567 +static int is_valid_vlimit(int id)
18569 + uint32_t mask = vlimit_mask.minimum |
18570 + vlimit_mask.softlimit | vlimit_mask.maximum;
18571 + return mask & (1 << id);
18574 +static int is_accounted_vlimit(int id)
18576 + if (is_valid_vlimit(id))
18578 + return account_mask & (1 << id);
18582 +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
18584 + rlim_t limit = __rlim_soft(&vxi->limit, id);
18585 + return VX_VLIM(limit);
18588 +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
18590 + rlim_t limit = __rlim_hard(&vxi->limit, id);
18591 + return VX_VLIM(limit);
18594 +static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
18595 + uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
18597 + if (!is_valid_vlimit(id))
18601 + *minimum = CRLIM_UNSET;
18603 + *softlimit = vc_get_soft(vxi, id);
18605 + *maximum = vc_get_hard(vxi, id);
18609 +int vc_get_rlimit(struct vx_info *vxi, void __user *data)
18611 + struct vcmd_ctx_rlimit_v0 vc_data;
18614 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18617 + ret = do_get_rlimit(vxi, vc_data.id,
18618 + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18622 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18627 +static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
18628 + uint64_t minimum, uint64_t softlimit, uint64_t maximum)
18630 + if (!is_valid_vlimit(id))
18633 + if (maximum != CRLIM_KEEP)
18634 + __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
18635 + if (softlimit != CRLIM_KEEP)
18636 + __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
18638 + /* clamp soft limit */
18639 + if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
18640 + __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
18645 +int vc_set_rlimit(struct vx_info *vxi, void __user *data)
18647 + struct vcmd_ctx_rlimit_v0 vc_data;
18649 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18652 + return do_set_rlimit(vxi, vc_data.id,
18653 + vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18656 +#ifdef CONFIG_IA32_EMULATION
18658 +int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
18660 + struct vcmd_ctx_rlimit_v0_x32 vc_data;
18662 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18665 + return do_set_rlimit(vxi, vc_data.id,
18666 + vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18669 +int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
18671 + struct vcmd_ctx_rlimit_v0_x32 vc_data;
18674 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18677 + ret = do_get_rlimit(vxi, vc_data.id,
18678 + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18682 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18687 +#endif /* CONFIG_IA32_EMULATION */
18690 +int vc_get_rlimit_mask(uint32_t id, void __user *data)
18692 + if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
18698 +static inline void vx_reset_hits(struct _vx_limit *limit)
18702 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18703 + atomic_set(&__rlim_lhit(limit, lim), 0);
18707 +int vc_reset_hits(struct vx_info *vxi, void __user *data)
18709 + vx_reset_hits(&vxi->limit);
18713 +static inline void vx_reset_minmax(struct _vx_limit *limit)
18718 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18719 + value = __rlim_get(limit, lim);
18720 + __rlim_rmax(limit, lim) = value;
18721 + __rlim_rmin(limit, lim) = value;
18725 +int vc_reset_minmax(struct vx_info *vxi, void __user *data)
18727 + vx_reset_minmax(&vxi->limit);
18732 +int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
18734 + struct vcmd_rlimit_stat_v0 vc_data;
18735 + struct _vx_limit *limit = &vxi->limit;
18738 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18742 + if (!is_accounted_vlimit(id))
18745 + vx_limit_fixup(limit, id);
18746 + vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
18747 + vc_data.value = __rlim_get(limit, id);
18748 + vc_data.minimum = __rlim_rmin(limit, id);
18749 + vc_data.maximum = __rlim_rmax(limit, id);
18751 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18757 +#ifdef CONFIG_MEMCG
18759 +void dump_sysinfo(struct sysinfo *si)
18761 + printk(KERN_INFO "sysinfo: memunit=%u\n"
18762 + "\ttotalram:\t%lu\n"
18763 + "\tfreeram:\t%lu\n"
18764 + "\tsharedram:\t%lu\n"
18765 + "\tbufferram:\t%lu\n"
18766 + "\ttotalswap:\t%lu\n"
18767 + "\tfreeswap:\t%lu\n"
18768 + "\ttotalhigh:\t%lu\n"
18769 + "\tfreehigh:\t%lu\n",
18781 +void vx_vsi_meminfo(struct sysinfo *val)
18783 + struct mem_cgroup *mcg;
18784 + unsigned long res_limit, res_usage;
18787 + if (VXD_CBIT(cvirt, 4))
18788 + dump_sysinfo(val);
18791 + mcg = mem_cgroup_from_task(current);
18792 + if (VXD_CBIT(cvirt, 5))
18793 + dump_mem_cgroup(mcg);
18794 + rcu_read_unlock();
18798 + res_limit = mem_cgroup_mem_limit_pages(mcg);
18799 + res_usage = mem_cgroup_mem_usage_pages(mcg);
18800 + shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18802 + if (res_limit != PAGE_COUNTER_MAX)
18803 + val->totalram = res_limit << shift;
18804 + val->freeram = val->totalram - (res_usage << shift);
18805 + val->bufferram = 0;
18806 + val->totalhigh = 0;
18807 + val->freehigh = 0;
18812 +void vx_vsi_swapinfo(struct sysinfo *val)
18814 +#ifdef CONFIG_MEMCG_SWAP
18815 + struct mem_cgroup *mcg;
18816 + unsigned long res_limit, res_usage, memsw_limit, memsw_usage;
18817 + signed long swap_limit, swap_usage;
18820 + if (VXD_CBIT(cvirt, 6))
18821 + dump_sysinfo(val);
18824 + mcg = mem_cgroup_from_task(current);
18825 + if (VXD_CBIT(cvirt, 7))
18826 + dump_mem_cgroup(mcg);
18827 + rcu_read_unlock();
18831 + res_limit = mem_cgroup_mem_limit_pages(mcg);
18833 + /* memory unlimited */
18834 + if (res_limit == PAGE_COUNTER_MAX)
18837 + res_usage = mem_cgroup_mem_usage_pages(mcg);
18838 + memsw_limit = mem_cgroup_memsw_limit_pages(mcg);
18839 + memsw_usage = mem_cgroup_memsw_usage_pages(mcg);
18840 + shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18842 + swap_limit = memsw_limit - res_limit;
18843 + /* we have a swap limit? */
18844 + if (memsw_limit != PAGE_COUNTER_MAX)
18845 + val->totalswap = swap_limit << shift;
18847 + /* calculate swap part */
18848 + swap_usage = (memsw_usage > res_usage) ?
18849 + memsw_usage - res_usage : 0;
18851 + /* total shown minus usage gives free swap */
18852 + val->freeswap = (swap_usage < swap_limit) ?
18853 + val->totalswap - (swap_usage << shift) : 0;
18855 +#else /* !CONFIG_MEMCG_SWAP */
18856 + val->totalswap = 0;
18857 + val->freeswap = 0;
18858 +#endif /* !CONFIG_MEMCG_SWAP */
18862 +long vx_vsi_cached(struct sysinfo *val)
18865 +#ifdef CONFIG_MEMCG_BROKEN
18866 + struct mem_cgroup *mcg;
18868 + if (VXD_CBIT(cvirt, 8))
18869 + dump_sysinfo(val);
18872 + mcg = mem_cgroup_from_task(current);
18873 + if (VXD_CBIT(cvirt, 9))
18874 + dump_mem_cgroup(mcg);
18875 + rcu_read_unlock();
18879 + // cache = mem_cgroup_stat_read_cache(mcg);
18884 +#endif /* !CONFIG_MEMCG */
18886 diff -NurpP --minimal linux-4.9.217/kernel/vserver/limit_init.h linux-4.9.217-vs2.3.9.12/kernel/vserver/limit_init.h
18887 --- linux-4.9.217/kernel/vserver/limit_init.h 1970-01-01 00:00:00.000000000 +0000
18888 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/limit_init.h 2018-10-20 04:58:15.000000000 +0000
18892 +static inline void vx_info_init_limit(struct _vx_limit *limit)
18896 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18897 + __rlim_soft(limit, lim) = RLIM_INFINITY;
18898 + __rlim_hard(limit, lim) = RLIM_INFINITY;
18899 + __rlim_set(limit, lim, 0);
18900 + atomic_set(&__rlim_lhit(limit, lim), 0);
18901 + __rlim_rmin(limit, lim) = 0;
18902 + __rlim_rmax(limit, lim) = 0;
18906 +static inline void vx_info_exit_limit(struct _vx_limit *limit)
18911 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18912 + if ((1 << lim) & VLIM_NOCHECK)
18914 + value = __rlim_get(limit, lim);
18915 + vxwprintk_xid(value,
18916 + "!!! limit: %p[%s,%d] = %ld on exit.",
18917 + limit, vlimit_name[lim], lim, (long)value);
18921 diff -NurpP --minimal linux-4.9.217/kernel/vserver/limit_proc.h linux-4.9.217-vs2.3.9.12/kernel/vserver/limit_proc.h
18922 --- linux-4.9.217/kernel/vserver/limit_proc.h 1970-01-01 00:00:00.000000000 +0000
18923 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/limit_proc.h 2018-10-20 04:58:15.000000000 +0000
18925 +#ifndef _VX_LIMIT_PROC_H
18926 +#define _VX_LIMIT_PROC_H
18928 +#include <linux/vserver/limit_int.h>
18931 +#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
18932 +#define VX_LIMIT_TOP \
18933 + "Limit\t current\t min/max\t\t soft/hard\t\thits\n"
18935 +#define VX_LIMIT_ARG(r) \
18936 + (unsigned long)__rlim_get(limit, r), \
18937 + (unsigned long)__rlim_rmin(limit, r), \
18938 + (unsigned long)__rlim_rmax(limit, r), \
18939 + VX_VLIM(__rlim_soft(limit, r)), \
18940 + VX_VLIM(__rlim_hard(limit, r)), \
18941 + atomic_read(&__rlim_lhit(limit, r))
18943 +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
18945 + vx_limit_fixup(limit, -1);
18946 + return sprintf(buffer, VX_LIMIT_TOP
18947 + "PROC" VX_LIMIT_FMT
18948 + "VM" VX_LIMIT_FMT
18949 + "VML" VX_LIMIT_FMT
18950 + "RSS" VX_LIMIT_FMT
18951 + "ANON" VX_LIMIT_FMT
18952 + "RMAP" VX_LIMIT_FMT
18953 + "FILES" VX_LIMIT_FMT
18954 + "OFD" VX_LIMIT_FMT
18955 + "LOCKS" VX_LIMIT_FMT
18956 + "SOCK" VX_LIMIT_FMT
18957 + "MSGQ" VX_LIMIT_FMT
18958 + "SHM" VX_LIMIT_FMT
18959 + "SEMA" VX_LIMIT_FMT
18960 + "SEMS" VX_LIMIT_FMT
18961 + "DENT" VX_LIMIT_FMT,
18962 + VX_LIMIT_ARG(RLIMIT_NPROC),
18963 + VX_LIMIT_ARG(RLIMIT_AS),
18964 + VX_LIMIT_ARG(RLIMIT_MEMLOCK),
18965 + VX_LIMIT_ARG(RLIMIT_RSS),
18966 + VX_LIMIT_ARG(VLIMIT_ANON),
18967 + VX_LIMIT_ARG(VLIMIT_MAPPED),
18968 + VX_LIMIT_ARG(RLIMIT_NOFILE),
18969 + VX_LIMIT_ARG(VLIMIT_OPENFD),
18970 + VX_LIMIT_ARG(RLIMIT_LOCKS),
18971 + VX_LIMIT_ARG(VLIMIT_NSOCK),
18972 + VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
18973 + VX_LIMIT_ARG(VLIMIT_SHMEM),
18974 + VX_LIMIT_ARG(VLIMIT_SEMARY),
18975 + VX_LIMIT_ARG(VLIMIT_NSEMS),
18976 + VX_LIMIT_ARG(VLIMIT_DENTRY));
18979 +#endif /* _VX_LIMIT_PROC_H */
18982 diff -NurpP --minimal linux-4.9.217/kernel/vserver/Makefile linux-4.9.217-vs2.3.9.12/kernel/vserver/Makefile
18983 --- linux-4.9.217/kernel/vserver/Makefile 1970-01-01 00:00:00.000000000 +0000
18984 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/Makefile 2018-10-20 04:58:15.000000000 +0000
18987 +# Makefile for the Linux vserver routines.
18991 +obj-y += vserver.o
18993 +vserver-y := switch.o context.o space.o sched.o network.o inode.o \
18994 + limit.o cvirt.o cacct.o signal.o helper.o init.o \
18997 +vserver-$(CONFIG_INET) += inet.o
18998 +vserver-$(CONFIG_PROC_FS) += proc.o
18999 +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
19000 +vserver-$(CONFIG_VSERVER_HISTORY) += history.o
19001 +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
19002 +vserver-$(CONFIG_VSERVER_DEVICE) += device.o
19004 diff -NurpP --minimal linux-4.9.217/kernel/vserver/network.c linux-4.9.217-vs2.3.9.12/kernel/vserver/network.c
19005 --- linux-4.9.217/kernel/vserver/network.c 1970-01-01 00:00:00.000000000 +0000
19006 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/network.c 2018-10-20 04:58:15.000000000 +0000
19009 + * linux/kernel/vserver/network.c
19011 + * Virtual Server: Network Support
19013 + * Copyright (C) 2003-2007 Herbert P?tzl
19015 + * V0.01 broken out from vcontext V0.05
19016 + * V0.02 cleaned up implementation
19017 + * V0.03 added equiv nx commands
19018 + * V0.04 switch to RCU based hash
19019 + * V0.05 and back to locking again
19020 + * V0.06 changed vcmds to nxi arg
19021 + * V0.07 have __create claim() the nxi
19025 +#include <linux/err.h>
19026 +#include <linux/slab.h>
19027 +#include <linux/rcupdate.h>
19028 +#include <net/ipv6.h>
19030 +#include <linux/vs_network.h>
19031 +#include <linux/vs_pid.h>
19032 +#include <linux/vserver/network_cmd.h>
19035 +atomic_t nx_global_ctotal = ATOMIC_INIT(0);
19036 +atomic_t nx_global_cactive = ATOMIC_INIT(0);
19038 +static struct kmem_cache *nx_addr_v4_cachep = NULL;
19039 +static struct kmem_cache *nx_addr_v6_cachep = NULL;
19042 +static int __init init_network(void)
19044 + nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
19045 + sizeof(struct nx_addr_v4), 0,
19046 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
19047 + nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
19048 + sizeof(struct nx_addr_v6), 0,
19049 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
19054 +/* __alloc_nx_addr_v4() */
19056 +static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
19058 + struct nx_addr_v4 *nxa = kmem_cache_alloc(
19059 + nx_addr_v4_cachep, GFP_KERNEL);
19061 + if (!IS_ERR(nxa))
19062 + memset(nxa, 0, sizeof(*nxa));
19066 +/* __dealloc_nx_addr_v4() */
19068 +static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
19070 + kmem_cache_free(nx_addr_v4_cachep, nxa);
19073 +/* __dealloc_nx_addr_v4_all() */
19075 +static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
19078 + struct nx_addr_v4 *next = nxa->next;
19080 + __dealloc_nx_addr_v4(nxa);
19086 +#ifdef CONFIG_IPV6
19088 +/* __alloc_nx_addr_v6() */
19090 +static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
19092 + struct nx_addr_v6 *nxa = kmem_cache_alloc(
19093 + nx_addr_v6_cachep, GFP_KERNEL);
19095 + if (!IS_ERR(nxa))
19096 + memset(nxa, 0, sizeof(*nxa));
19100 +/* __dealloc_nx_addr_v6() */
19102 +static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
19104 + kmem_cache_free(nx_addr_v6_cachep, nxa);
19107 +/* __dealloc_nx_addr_v6_all() */
19109 +static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
19112 + struct nx_addr_v6 *next = nxa->next;
19114 + __dealloc_nx_addr_v6(nxa);
19119 +#endif /* CONFIG_IPV6 */
19121 +/* __alloc_nx_info()
19123 + * allocate an initialized nx_info struct
19124 + * doesn't make it visible (hash) */
19126 +static struct nx_info *__alloc_nx_info(vnid_t nid)
19128 + struct nx_info *new = NULL;
19130 + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
19132 + /* would this benefit from a slab cache? */
19133 + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
19137 + memset(new, 0, sizeof(struct nx_info));
19138 + new->nx_id = nid;
19139 + INIT_HLIST_NODE(&new->nx_hlist);
19140 + atomic_set(&new->nx_usecnt, 0);
19141 + atomic_set(&new->nx_tasks, 0);
19142 + spin_lock_init(&new->addr_lock);
19143 + new->nx_state = 0;
19145 + new->nx_flags = NXF_INIT_SET;
19147 + /* rest of init goes here */
19149 + new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
19150 + new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
19152 + vxdprintk(VXD_CBIT(nid, 0),
19153 + "alloc_nx_info(%d) = %p", nid, new);
19154 + atomic_inc(&nx_global_ctotal);
19158 +/* __dealloc_nx_info()
19160 + * final disposal of nx_info */
19162 +static void __dealloc_nx_info(struct nx_info *nxi)
19164 + vxdprintk(VXD_CBIT(nid, 0),
19165 + "dealloc_nx_info(%p)", nxi);
19167 + nxi->nx_hlist.next = LIST_POISON1;
19170 + BUG_ON(atomic_read(&nxi->nx_usecnt));
19171 + BUG_ON(atomic_read(&nxi->nx_tasks));
19173 + __dealloc_nx_addr_v4_all(nxi->v4.next);
19174 +#ifdef CONFIG_IPV6
19175 + __dealloc_nx_addr_v6_all(nxi->v6.next);
19178 + nxi->nx_state |= NXS_RELEASED;
19180 + atomic_dec(&nx_global_ctotal);
19183 +static void __shutdown_nx_info(struct nx_info *nxi)
19185 + nxi->nx_state |= NXS_SHUTDOWN;
19186 + vs_net_change(nxi, VSC_NETDOWN);
19189 +/* exported stuff */
19191 +void free_nx_info(struct nx_info *nxi)
19193 + /* context shutdown is mandatory */
19194 + BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
19196 + /* context must not be hashed */
19197 + BUG_ON(nxi->nx_state & NXS_HASHED);
19199 + BUG_ON(atomic_read(&nxi->nx_usecnt));
19200 + BUG_ON(atomic_read(&nxi->nx_tasks));
19202 + __dealloc_nx_info(nxi);
19206 +void __nx_set_lback(struct nx_info *nxi)
19208 + int nid = nxi->nx_id;
19209 + __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
19211 + nxi->v4_lback.s_addr = lback;
19214 +extern int __nx_inet_add_lback(__be32 addr);
19215 +extern int __nx_inet_del_lback(__be32 addr);
19218 +/* hash table for nx_info hash */
19220 +#define NX_HASH_SIZE 13
19222 +struct hlist_head nx_info_hash[NX_HASH_SIZE];
19224 +static DEFINE_SPINLOCK(nx_info_hash_lock);
19227 +static inline unsigned int __hashval(vnid_t nid)
19229 + return (nid % NX_HASH_SIZE);
19234 +/* __hash_nx_info()
19236 + * add the nxi to the global hash table
19237 + * requires the hash_lock to be held */
19239 +static inline void __hash_nx_info(struct nx_info *nxi)
19241 + struct hlist_head *head;
19243 + vxd_assert_lock(&nx_info_hash_lock);
19244 + vxdprintk(VXD_CBIT(nid, 4),
19245 + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
19247 + /* context must not be hashed */
19248 + BUG_ON(nx_info_state(nxi, NXS_HASHED));
19250 + nxi->nx_state |= NXS_HASHED;
19251 + head = &nx_info_hash[__hashval(nxi->nx_id)];
19252 + hlist_add_head(&nxi->nx_hlist, head);
19253 + atomic_inc(&nx_global_cactive);
19256 +/* __unhash_nx_info()
19258 + * remove the nxi from the global hash table
19259 + * requires the hash_lock to be held */
19261 +static inline void __unhash_nx_info(struct nx_info *nxi)
19263 + vxd_assert_lock(&nx_info_hash_lock);
19264 + vxdprintk(VXD_CBIT(nid, 4),
19265 + "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
19266 + atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
19268 + /* context must be hashed */
19269 + BUG_ON(!nx_info_state(nxi, NXS_HASHED));
19270 + /* but without tasks */
19271 + BUG_ON(atomic_read(&nxi->nx_tasks));
19273 + nxi->nx_state &= ~NXS_HASHED;
19274 + hlist_del(&nxi->nx_hlist);
19275 + atomic_dec(&nx_global_cactive);
19279 +/* __lookup_nx_info()
19281 + * requires the hash_lock to be held
19282 + * doesn't increment the nx_refcnt */
19284 +static inline struct nx_info *__lookup_nx_info(vnid_t nid)
19286 + struct hlist_head *head = &nx_info_hash[__hashval(nid)];
19287 + struct hlist_node *pos;
19288 + struct nx_info *nxi;
19290 + vxd_assert_lock(&nx_info_hash_lock);
19291 + hlist_for_each(pos, head) {
19292 + nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19294 + if (nxi->nx_id == nid)
19299 + vxdprintk(VXD_CBIT(nid, 0),
19300 + "__lookup_nx_info(#%u): %p[#%u]",
19301 + nid, nxi, nxi ? nxi->nx_id : 0);
19306 +/* __create_nx_info()
19308 + * create the requested context
19309 + * get(), claim() and hash it */
19311 +static struct nx_info *__create_nx_info(int id)
19313 + struct nx_info *new, *nxi = NULL;
19315 + vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
19317 + if (!(new = __alloc_nx_info(id)))
19318 + return ERR_PTR(-ENOMEM);
19320 + /* required to make dynamic xids unique */
19321 + spin_lock(&nx_info_hash_lock);
19323 + /* static context requested */
19324 + if ((nxi = __lookup_nx_info(id))) {
19325 + vxdprintk(VXD_CBIT(nid, 0),
19326 + "create_nx_info(%d) = %p (already there)", id, nxi);
19327 + if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19328 + nxi = ERR_PTR(-EBUSY);
19330 + nxi = ERR_PTR(-EEXIST);
19333 + /* new context */
19334 + vxdprintk(VXD_CBIT(nid, 0),
19335 + "create_nx_info(%d) = %p (new)", id, new);
19336 + claim_nx_info(new, NULL);
19337 + __nx_set_lback(new);
19338 + __hash_nx_info(get_nx_info(new));
19339 + nxi = new, new = NULL;
19342 + spin_unlock(&nx_info_hash_lock);
19344 + __dealloc_nx_info(new);
19350 +/* exported stuff */
19353 +void unhash_nx_info(struct nx_info *nxi)
19355 + __shutdown_nx_info(nxi);
19356 + spin_lock(&nx_info_hash_lock);
19357 + __unhash_nx_info(nxi);
19358 + spin_unlock(&nx_info_hash_lock);
19361 +/* lookup_nx_info()
19363 + * search for a nx_info and get() it
19364 + * negative id means current */
19366 +struct nx_info *lookup_nx_info(int id)
19368 + struct nx_info *nxi = NULL;
19371 + nxi = get_nx_info(current_nx_info());
19372 + } else if (id > 1) {
19373 + spin_lock(&nx_info_hash_lock);
19374 + nxi = get_nx_info(__lookup_nx_info(id));
19375 + spin_unlock(&nx_info_hash_lock);
19380 +/* nid_is_hashed()
19382 + * verify that nid is still hashed */
19384 +int nid_is_hashed(vnid_t nid)
19388 + spin_lock(&nx_info_hash_lock);
19389 + hashed = (__lookup_nx_info(nid) != NULL);
19390 + spin_unlock(&nx_info_hash_lock);
19395 +#ifdef CONFIG_PROC_FS
19399 + * get a subset of hashed nids for proc
19400 + * assumes size is at least one */
19402 +int get_nid_list(int index, unsigned int *nids, int size)
19404 + int hindex, nr_nids = 0;
19406 + /* only show current and children */
19407 + if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
19410 + nids[nr_nids] = nx_current_nid();
19414 + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
19415 + struct hlist_head *head = &nx_info_hash[hindex];
19416 + struct hlist_node *pos;
19418 + spin_lock(&nx_info_hash_lock);
19419 + hlist_for_each(pos, head) {
19420 + struct nx_info *nxi;
19425 + nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19426 + nids[nr_nids] = nxi->nx_id;
19427 + if (++nr_nids >= size) {
19428 + spin_unlock(&nx_info_hash_lock);
19432 + /* keep the lock time short */
19433 + spin_unlock(&nx_info_hash_lock);
19442 + * migrate task to new network
19443 + * gets nxi, puts old_nxi on change
19446 +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
19448 + struct nx_info *old_nxi;
19454 + vxdprintk(VXD_CBIT(nid, 5),
19455 + "nx_migrate_task(%p,%p[#%d.%d.%d])",
19456 + p, nxi, nxi->nx_id,
19457 + atomic_read(&nxi->nx_usecnt),
19458 + atomic_read(&nxi->nx_tasks));
19460 + if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
19461 + !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19464 + if (nx_info_state(nxi, NXS_SHUTDOWN))
19467 + /* maybe disallow this completely? */
19468 + old_nxi = task_get_nx_info(p);
19469 + if (old_nxi == nxi)
19474 + clr_nx_info(&p->nx_info);
19475 + claim_nx_info(nxi, p);
19476 + set_nx_info(&p->nx_info, nxi);
19477 + p->nid = nxi->nx_id;
19480 + vxdprintk(VXD_CBIT(nid, 5),
19481 + "moved task %p into nxi:%p[#%d]",
19482 + p, nxi, nxi->nx_id);
19485 + release_nx_info(old_nxi, p);
19488 + put_nx_info(old_nxi);
19493 +void nx_set_persistent(struct nx_info *nxi)
19495 + vxdprintk(VXD_CBIT(nid, 6),
19496 + "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
19498 + get_nx_info(nxi);
19499 + claim_nx_info(nxi, NULL);
19502 +void nx_clear_persistent(struct nx_info *nxi)
19504 + vxdprintk(VXD_CBIT(nid, 6),
19505 + "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
19507 + release_nx_info(nxi, NULL);
19508 + put_nx_info(nxi);
19511 +void nx_update_persistent(struct nx_info *nxi)
19513 + if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
19514 + nx_set_persistent(nxi);
19516 + nx_clear_persistent(nxi);
19519 +/* vserver syscall commands below here */
19521 +/* taks nid and nx_info functions */
19523 +#include <asm/uaccess.h>
19526 +int vc_task_nid(uint32_t id)
19531 + struct task_struct *tsk;
19534 + tsk = find_task_by_real_pid(id);
19535 + nid = (tsk) ? tsk->nid : -ESRCH;
19536 + rcu_read_unlock();
19538 + nid = nx_current_nid();
19543 +int vc_nx_info(struct nx_info *nxi, void __user *data)
19545 + struct vcmd_nx_info_v0 vc_data;
19547 + vc_data.nid = nxi->nx_id;
19549 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19555 +/* network functions */
19557 +int vc_net_create(uint32_t nid, void __user *data)
19559 + struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
19560 + struct nx_info *new_nxi;
19563 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19566 + if ((nid > MAX_S_CONTEXT) || (nid < 2))
19569 + new_nxi = __create_nx_info(nid);
19570 + if (IS_ERR(new_nxi))
19571 + return PTR_ERR(new_nxi);
19573 + /* initial flags */
19574 + new_nxi->nx_flags = vc_data.flagword;
19577 + if (vs_net_change(new_nxi, VSC_NETUP))
19580 + ret = nx_migrate_task(current, new_nxi);
19584 + /* return context id on success */
19585 + ret = new_nxi->nx_id;
19587 + /* get a reference for persistent contexts */
19588 + if ((vc_data.flagword & NXF_PERSISTENT))
19589 + nx_set_persistent(new_nxi);
19591 + release_nx_info(new_nxi, NULL);
19592 + put_nx_info(new_nxi);
19597 +int vc_net_migrate(struct nx_info *nxi, void __user *data)
19599 + return nx_migrate_task(current, nxi);
19604 +struct nx_addr_v4 *__find_v4_addr(struct nx_info *nxi,
19605 + __be32 ip, __be32 ip2, __be32 mask, uint16_t type, uint16_t flags,
19606 + struct nx_addr_v4 **prev)
19608 + struct nx_addr_v4 *nxa = &nxi->v4;
19610 + for (; nxa; nxa = nxa->next) {
19611 + if ((nxa->ip[0].s_addr == ip) &&
19612 + (nxa->ip[1].s_addr == ip2) &&
19613 + (nxa->mask.s_addr == mask) &&
19614 + (nxa->type == type) &&
19615 + (nxa->flags == flags))
19618 + /* save previous entry */
19625 +int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19626 + uint16_t type, uint16_t flags)
19628 + struct nx_addr_v4 *nxa = NULL;
19629 + struct nx_addr_v4 *new = __alloc_nx_addr_v4();
19630 + unsigned long irqflags;
19631 + int ret = -EEXIST;
19634 + return PTR_ERR(new);
19636 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19637 + if (__find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa))
19640 + if (NX_IPV4(nxi)) {
19645 + /* remove single ip for ip list */
19646 + nxi->nx_flags &= ~NXF_SINGLE_IP;
19649 + nxa->ip[0].s_addr = ip;
19650 + nxa->ip[1].s_addr = ip2;
19651 + nxa->mask.s_addr = mask;
19652 + nxa->type = type;
19653 + nxa->flags = flags;
19656 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19658 + __dealloc_nx_addr_v4(new);
19662 +int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19663 + uint16_t type, uint16_t flags)
19665 + struct nx_addr_v4 *nxa = NULL;
19666 + struct nx_addr_v4 *old = NULL;
19667 + unsigned long irqflags;
19670 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19672 + case NXA_TYPE_ADDR:
19673 + old = __find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa);
19676 + nxa->next = old->next;
19677 + old->next = NULL;
19683 + old->next = NULL;
19685 + memset(old, 0, sizeof(*old));
19693 + case NXA_TYPE_ANY:
19696 + memset(nxa, 0, sizeof(*nxa));
19702 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19703 + __dealloc_nx_addr_v4_all(old);
19708 +int vc_net_add(struct nx_info *nxi, void __user *data)
19710 + struct vcmd_net_addr_v0 vc_data;
19711 + int index, ret = 0;
19713 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19716 + switch (vc_data.type) {
19717 + case NXA_TYPE_IPV4:
19718 + if ((vc_data.count < 1) || (vc_data.count > 4))
19722 + while (index < vc_data.count) {
19723 + ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
19724 + vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
19732 + case NXA_TYPE_IPV4|NXA_MOD_BCAST:
19733 + nxi->v4_bcast = vc_data.ip[0];
19737 + case NXA_TYPE_IPV4|NXA_MOD_LBACK:
19738 + nxi->v4_lback = vc_data.ip[0];
19749 +int vc_net_remove(struct nx_info *nxi, void __user *data)
19751 + struct vcmd_net_addr_v0 vc_data;
19753 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19756 + switch (vc_data.type) {
19757 + case NXA_TYPE_ANY:
19758 + return do_remove_v4_addr(nxi, 0, 0, 0, vc_data.type, 0);
19766 +int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
19768 + struct vcmd_net_addr_ipv4_v1 vc_data;
19770 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19773 + switch (vc_data.type) {
19774 + case NXA_TYPE_ADDR:
19775 + case NXA_TYPE_MASK:
19776 + return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
19777 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19779 + case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19780 + nxi->v4_bcast = vc_data.ip;
19783 + case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19784 + nxi->v4_lback = vc_data.ip;
19793 +int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
19795 + struct vcmd_net_addr_ipv4_v2 vc_data;
19797 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19800 + switch (vc_data.type) {
19801 + case NXA_TYPE_ADDR:
19802 + case NXA_TYPE_MASK:
19803 + case NXA_TYPE_RANGE:
19804 + return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19805 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19807 + case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19808 + nxi->v4_bcast = vc_data.ip;
19811 + case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19812 + nxi->v4_lback = vc_data.ip;
19821 +int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
19823 + struct vcmd_net_addr_ipv4_v1 vc_data;
19825 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19828 + return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
19829 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19832 +int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
19834 + struct vcmd_net_addr_ipv4_v2 vc_data;
19836 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19839 + return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19840 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19843 +#ifdef CONFIG_IPV6
19846 +struct nx_addr_v6 *__find_v6_addr(struct nx_info *nxi,
19847 + struct in6_addr *ip, struct in6_addr *mask,
19848 + uint32_t prefix, uint16_t type, uint16_t flags,
19849 + struct nx_addr_v6 **prev)
19851 + struct nx_addr_v6 *nxa = &nxi->v6;
19853 + for (; nxa; nxa = nxa->next) {
19854 + if (ipv6_addr_equal(&nxa->ip, ip) &&
19855 + ipv6_addr_equal(&nxa->mask, mask) &&
19856 + (nxa->prefix == prefix) &&
19857 + (nxa->type == type) &&
19858 + (nxa->flags == flags))
19861 + /* save previous entry */
19869 +int do_add_v6_addr(struct nx_info *nxi,
19870 + struct in6_addr *ip, struct in6_addr *mask,
19871 + uint32_t prefix, uint16_t type, uint16_t flags)
19873 + struct nx_addr_v6 *nxa = NULL;
19874 + struct nx_addr_v6 *new = __alloc_nx_addr_v6();
19875 + unsigned long irqflags;
19876 + int ret = -EEXIST;
19879 + return PTR_ERR(new);
19881 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19882 + if (__find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa))
19885 + if (NX_IPV6(nxi)) {
19892 + nxa->mask = *mask;
19893 + nxa->prefix = prefix;
19894 + nxa->type = type;
19895 + nxa->flags = flags;
19898 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19900 + __dealloc_nx_addr_v6(new);
19904 +int do_remove_v6_addr(struct nx_info *nxi,
19905 + struct in6_addr *ip, struct in6_addr *mask,
19906 + uint32_t prefix, uint16_t type, uint16_t flags)
19908 + struct nx_addr_v6 *nxa = NULL;
19909 + struct nx_addr_v6 *old = NULL;
19910 + unsigned long irqflags;
19913 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19915 + case NXA_TYPE_ADDR:
19916 + old = __find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa);
19919 + nxa->next = old->next;
19920 + old->next = NULL;
19926 + old->next = NULL;
19928 + memset(old, 0, sizeof(*old));
19936 + case NXA_TYPE_ANY:
19939 + memset(nxa, 0, sizeof(*nxa));
19945 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19946 + __dealloc_nx_addr_v6_all(old);
19950 +int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
19952 + struct vcmd_net_addr_ipv6_v1 vc_data;
19954 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19957 + switch (vc_data.type) {
19958 + case NXA_TYPE_ADDR:
19959 + memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19960 + /* fallthrough */
19961 + case NXA_TYPE_MASK:
19962 + return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19963 + vc_data.prefix, vc_data.type, vc_data.flags);
19970 +int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
19972 + struct vcmd_net_addr_ipv6_v1 vc_data;
19974 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19977 + switch (vc_data.type) {
19978 + case NXA_TYPE_ADDR:
19979 + memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19980 + /* fallthrough */
19981 + case NXA_TYPE_MASK:
19982 + return do_remove_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19983 + vc_data.prefix, vc_data.type, vc_data.flags);
19984 + case NXA_TYPE_ANY:
19985 + return do_remove_v6_addr(nxi, NULL, NULL, 0, vc_data.type, 0);
19992 +#endif /* CONFIG_IPV6 */
19995 +int vc_get_nflags(struct nx_info *nxi, void __user *data)
19997 + struct vcmd_net_flags_v0 vc_data;
19999 + vc_data.flagword = nxi->nx_flags;
20001 + /* special STATE flag handling */
20002 + vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
20004 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20009 +int vc_set_nflags(struct nx_info *nxi, void __user *data)
20011 + struct vcmd_net_flags_v0 vc_data;
20012 + uint64_t mask, trigger;
20014 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20017 + /* special STATE flag handling */
20018 + mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
20019 + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
20021 + nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
20022 + vc_data.flagword, mask);
20023 + if (trigger & NXF_PERSISTENT)
20024 + nx_update_persistent(nxi);
20029 +int vc_get_ncaps(struct nx_info *nxi, void __user *data)
20031 + struct vcmd_net_caps_v0 vc_data;
20033 + vc_data.ncaps = nxi->nx_ncaps;
20034 + vc_data.cmask = ~0ULL;
20036 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20041 +int vc_set_ncaps(struct nx_info *nxi, void __user *data)
20043 + struct vcmd_net_caps_v0 vc_data;
20045 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20048 + nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
20049 + vc_data.ncaps, vc_data.cmask);
20054 +#include <linux/module.h>
20056 +module_init(init_network);
20058 +EXPORT_SYMBOL_GPL(free_nx_info);
20059 +EXPORT_SYMBOL_GPL(unhash_nx_info);
20061 diff -NurpP --minimal linux-4.9.217/kernel/vserver/proc.c linux-4.9.217-vs2.3.9.12/kernel/vserver/proc.c
20062 --- linux-4.9.217/kernel/vserver/proc.c 1970-01-01 00:00:00.000000000 +0000
20063 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/proc.c 2018-10-20 04:58:15.000000000 +0000
20066 + * linux/kernel/vserver/proc.c
20068 + * Virtual Context Support
20070 + * Copyright (C) 2003-2011 Herbert P?tzl
20072 + * V0.01 basic structure
20073 + * V0.02 adaptation vs1.3.0
20074 + * V0.03 proc permissions
20075 + * V0.04 locking/generic
20076 + * V0.05 next generation procfs
20077 + * V0.06 inode validation
20078 + * V0.07 generic rewrite vid
20079 + * V0.08 remove inode type
20080 + * V0.09 added u/wmask info
20084 +#include <linux/proc_fs.h>
20085 +#include <linux/fs_struct.h>
20086 +#include <linux/mount.h>
20087 +#include <linux/namei.h>
20088 +#include <asm/unistd.h>
20090 +#include <linux/vs_context.h>
20091 +#include <linux/vs_network.h>
20092 +#include <linux/vs_cvirt.h>
20094 +#include <linux/in.h>
20095 +#include <linux/inetdevice.h>
20096 +#include <linux/vs_inet.h>
20097 +#include <linux/vs_inet6.h>
20099 +#include <linux/vserver/global.h>
20101 +#include "cvirt_proc.h"
20102 +#include "cacct_proc.h"
20103 +#include "limit_proc.h"
20104 +#include "sched_proc.h"
20105 +#include "vci_config.h"
20107 +#include <../../fs/proc/internal.h>
20110 +static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
20114 + CAP_FOR_EACH_U32(__capi) {
20115 + buffer += sprintf(buffer, "%08x",
20116 + c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
20122 +static struct proc_dir_entry *proc_virtual;
20124 +static struct proc_dir_entry *proc_virtnet;
20127 +/* first the actual feeds */
20130 +static int proc_vci(char *buffer)
20132 + return sprintf(buffer,
20133 + "VCIVersion:\t%04x:%04x\n"
20134 + "VCISyscall:\t%d\n"
20135 + "VCIKernel:\t%08x\n",
20136 + VCI_VERSION >> 16,
20137 + VCI_VERSION & 0xFFFF,
20139 + vci_kernel_config());
20142 +static int proc_virtual_info(char *buffer)
20144 + return proc_vci(buffer);
20147 +static int proc_virtual_status(char *buffer)
20149 + return sprintf(buffer,
20151 + "#CActive:\t%d\n"
20152 + "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
20153 + "#InitTask:\t%d\t%d %d\n",
20154 + atomic_read(&vx_global_ctotal),
20155 + atomic_read(&vx_global_cactive),
20156 + atomic_read(&vs_global_nsproxy),
20157 + atomic_read(&vs_global_fs),
20158 + atomic_read(&vs_global_mnt_ns),
20159 + atomic_read(&vs_global_uts_ns),
20160 + atomic_read(&vs_global_ipc_ns),
20161 + atomic_read(&vs_global_user_ns),
20162 + atomic_read(&vs_global_pid_ns),
20163 + atomic_read(&init_task.usage),
20164 + atomic_read(&init_task.nsproxy->count),
20165 + init_task.fs->users);
20169 +int proc_vxi_info(struct vx_info *vxi, char *buffer)
20173 + length = sprintf(buffer,
20181 + vxi->vx_badness_bias);
20185 +int proc_vxi_status(struct vx_info *vxi, char *buffer)
20187 + char *orig = buffer;
20189 + buffer += sprintf(buffer,
20192 + "Flags:\t%016llx\n",
20193 + atomic_read(&vxi->vx_usecnt),
20194 + atomic_read(&vxi->vx_tasks),
20195 + (unsigned long long)vxi->vx_flags);
20197 + buffer += sprintf(buffer, "BCaps:\t");
20198 + buffer = print_cap_t(buffer, &vxi->vx_bcaps);
20199 + buffer += sprintf(buffer, "\n");
20201 + buffer += sprintf(buffer,
20202 + "CCaps:\t%016llx\n"
20203 + "Umask:\t%16llx\n"
20204 + "Wmask:\t%16llx\n"
20205 + "Spaces:\t%08lx %08lx\n",
20206 + (unsigned long long)vxi->vx_ccaps,
20207 + (unsigned long long)vxi->vx_umask,
20208 + (unsigned long long)vxi->vx_wmask,
20209 + vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
20210 + return buffer - orig;
20213 +int proc_vxi_limit(struct vx_info *vxi, char *buffer)
20215 + return vx_info_proc_limit(&vxi->limit, buffer);
20218 +int proc_vxi_sched(struct vx_info *vxi, char *buffer)
20222 + length = vx_info_proc_sched(&vxi->sched, buffer);
20223 + for_each_online_cpu(cpu) {
20224 + length += vx_info_proc_sched_pc(
20225 + &vx_per_cpu(vxi, sched_pc, cpu),
20226 + buffer + length, cpu);
20231 +int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
20233 + return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
20236 +int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
20238 + return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
20241 +int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
20245 + vx_update_load(vxi);
20246 + length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
20247 + for_each_online_cpu(cpu) {
20248 + length += vx_info_proc_cvirt_pc(
20249 + &vx_per_cpu(vxi, cvirt_pc, cpu),
20250 + buffer + length, cpu);
20255 +int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
20257 + return vx_info_proc_cacct(&vxi->cacct, buffer);
20261 +static int proc_virtnet_info(char *buffer)
20263 + return proc_vci(buffer);
20266 +static int proc_virtnet_status(char *buffer)
20268 + return sprintf(buffer,
20270 + "#CActive:\t%d\n",
20271 + atomic_read(&nx_global_ctotal),
20272 + atomic_read(&nx_global_cactive));
20275 +int proc_nxi_info(struct nx_info *nxi, char *buffer)
20277 + struct nx_addr_v4 *v4a;
20278 +#ifdef CONFIG_IPV6
20279 + struct nx_addr_v6 *v6a;
20283 + length = sprintf(buffer,
20286 + "Bcast:\t" NIPQUAD_FMT "\n"
20287 + "Lback:\t" NIPQUAD_FMT "\n",
20290 + NIPQUAD(nxi->v4_bcast.s_addr),
20291 + NIPQUAD(nxi->v4_lback.s_addr));
20293 + if (!NX_IPV4(nxi))
20295 + for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
20296 + length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
20299 +#ifdef CONFIG_IPV6
20300 + if (!NX_IPV6(nxi))
20302 + for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
20303 + length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
20310 +int proc_nxi_status(struct nx_info *nxi, char *buffer)
20314 + length = sprintf(buffer,
20317 + "Flags:\t%016llx\n"
20318 + "NCaps:\t%016llx\n",
20319 + atomic_read(&nxi->nx_usecnt),
20320 + atomic_read(&nxi->nx_tasks),
20321 + (unsigned long long)nxi->nx_flags,
20322 + (unsigned long long)nxi->nx_ncaps);
20328 +/* here the inode helpers */
20334 + struct inode_operations *iop;
20335 + struct file_operations *fop;
20336 + union proc_op op;
20339 +static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
20341 + struct inode *inode = new_inode(sb);
20346 + inode->i_mode = p->mode;
20348 + inode->i_op = p->iop;
20350 + inode->i_fop = p->fop;
20352 + set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
20353 + inode->i_flags |= S_IMMUTABLE;
20355 + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
20357 + i_uid_write(inode, 0);
20358 + i_gid_write(inode, 0);
20359 + i_tag_write(inode, 0);
20364 +static struct dentry *vs_proc_instantiate(struct inode *dir,
20365 + struct dentry *dentry, int id, void *ptr)
20367 + struct vs_entry *p = ptr;
20368 + struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
20369 + struct dentry *error = ERR_PTR(-EINVAL);
20374 + PROC_I(inode)->op = p->op;
20375 + PROC_I(inode)->fd = id;
20376 + d_add(dentry, inode);
20384 +typedef struct dentry *vx_instantiate_t(struct inode *, struct dentry *, int, void *);
20388 + * Fill a directory entry.
20390 + * If possible create the dcache entry and derive our inode number and
20391 + * file type from dcache entry.
20393 + * Since all of the proc inode numbers are dynamically generated, the inode
20394 + * numbers do not exist until the inode is cache. This means creating the
20395 + * the dcache entry in iterate is necessary to keep the inode numbers
20396 + * reported by iterate in sync with the inode numbers reported
20399 +static int vx_proc_fill_cache(struct file *filp, struct dir_context *ctx,
20400 + char *name, int len, vx_instantiate_t instantiate, int id, void *ptr)
20402 + struct dentry *child, *dir = filp->f_path.dentry;
20403 + struct inode *inode;
20404 + struct qstr qname;
20406 + unsigned type = DT_UNKNOWN;
20408 + qname.name = name;
20410 + qname.hash = full_name_hash(NULL, name, len);
20412 + child = d_lookup(dir, &qname);
20414 + struct dentry *new;
20415 + new = d_alloc(dir, &qname);
20417 + child = instantiate(dir->d_inode, new, id, ptr);
20424 + if (!child || IS_ERR(child) || !child->d_inode)
20425 + goto end_instantiate;
20426 + inode = child->d_inode;
20428 + ino = inode->i_ino;
20429 + type = inode->i_mode >> 12;
20435 + return !dir_emit(ctx, name, len, ino, type);
20440 +/* get and revalidate vx_info/xid */
20443 +struct vx_info *get_proc_vx_info(struct inode *inode)
20445 + return lookup_vx_info(PROC_I(inode)->fd);
20448 +static int proc_xid_revalidate(struct dentry *dentry, unsigned int flags)
20450 + struct inode *inode = dentry->d_inode;
20451 + vxid_t xid = PROC_I(inode)->fd;
20453 + if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20456 + if (!xid || xid_is_hashed(xid))
20463 +/* get and revalidate nx_info/nid */
20465 +static int proc_nid_revalidate(struct dentry *dentry, unsigned int flags)
20467 + struct inode *inode = dentry->d_inode;
20468 + vnid_t nid = PROC_I(inode)->fd;
20470 + if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20473 + if (!nid || nid_is_hashed(nid))
20481 +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
20483 +static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
20484 + size_t count, loff_t *ppos)
20486 + struct inode *inode = file->f_path.dentry->d_inode;
20487 + unsigned long page;
20488 + ssize_t length = 0;
20490 + if (count > PROC_BLOCK_SIZE)
20491 + count = PROC_BLOCK_SIZE;
20493 + /* fade that out as soon as stable */
20494 + WARN_ON(PROC_I(inode)->fd);
20496 + if (!(page = __get_free_page(GFP_KERNEL)))
20499 + BUG_ON(!PROC_I(inode)->op.proc_vs_read);
20500 + length = PROC_I(inode)->op.proc_vs_read((char *)page);
20503 + length = simple_read_from_buffer(buf, count, ppos,
20504 + (char *)page, length);
20510 +static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
20511 + size_t count, loff_t *ppos)
20513 + struct inode *inode = file->f_path.dentry->d_inode;
20514 + struct vx_info *vxi = NULL;
20515 + vxid_t xid = PROC_I(inode)->fd;
20516 + unsigned long page;
20517 + ssize_t length = 0;
20519 + if (count > PROC_BLOCK_SIZE)
20520 + count = PROC_BLOCK_SIZE;
20522 + /* fade that out as soon as stable */
20524 + vxi = lookup_vx_info(xid);
20528 + length = -ENOMEM;
20529 + if (!(page = __get_free_page(GFP_KERNEL)))
20532 + BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
20533 + length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
20536 + length = simple_read_from_buffer(buf, count, ppos,
20537 + (char *)page, length);
20541 + put_vx_info(vxi);
20546 +static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
20547 + size_t count, loff_t *ppos)
20549 + struct inode *inode = file->f_path.dentry->d_inode;
20550 + struct nx_info *nxi = NULL;
20551 + vnid_t nid = PROC_I(inode)->fd;
20552 + unsigned long page;
20553 + ssize_t length = 0;
20555 + if (count > PROC_BLOCK_SIZE)
20556 + count = PROC_BLOCK_SIZE;
20558 + /* fade that out as soon as stable */
20560 + nxi = lookup_nx_info(nid);
20564 + length = -ENOMEM;
20565 + if (!(page = __get_free_page(GFP_KERNEL)))
20568 + BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
20569 + length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
20572 + length = simple_read_from_buffer(buf, count, ppos,
20573 + (char *)page, length);
20577 + put_nx_info(nxi);
20584 +/* here comes the lower level */
20587 +#define NOD(NAME, MODE, IOP, FOP, OP) { \
20588 + .len = sizeof(NAME) - 1, \
20589 + .name = (NAME), \
20597 +#define DIR(NAME, MODE, OTYPE) \
20598 + NOD(NAME, (S_IFDIR | (MODE)), \
20599 + &proc_ ## OTYPE ## _inode_operations, \
20600 + &proc_ ## OTYPE ## _file_operations, { } )
20602 +#define INF(NAME, MODE, OTYPE) \
20603 + NOD(NAME, (S_IFREG | (MODE)), NULL, \
20604 + &proc_vs_info_file_operations, \
20605 + { .proc_vs_read = &proc_##OTYPE } )
20607 +#define VINF(NAME, MODE, OTYPE) \
20608 + NOD(NAME, (S_IFREG | (MODE)), NULL, \
20609 + &proc_vx_info_file_operations, \
20610 + { .proc_vxi_read = &proc_##OTYPE } )
20612 +#define NINF(NAME, MODE, OTYPE) \
20613 + NOD(NAME, (S_IFREG | (MODE)), NULL, \
20614 + &proc_nx_info_file_operations, \
20615 + { .proc_nxi_read = &proc_##OTYPE } )
20618 +static struct file_operations proc_vs_info_file_operations = {
20619 + .read = proc_vs_info_read,
20622 +static struct file_operations proc_vx_info_file_operations = {
20623 + .read = proc_vx_info_read,
20626 +static struct dentry_operations proc_xid_dentry_operations = {
20627 + .d_revalidate = proc_xid_revalidate,
20630 +static struct vs_entry vx_base_stuff[] = {
20631 + VINF("info", S_IRUGO, vxi_info),
20632 + VINF("status", S_IRUGO, vxi_status),
20633 + VINF("limit", S_IRUGO, vxi_limit),
20634 + VINF("sched", S_IRUGO, vxi_sched),
20635 + VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
20636 + VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
20637 + VINF("cvirt", S_IRUGO, vxi_cvirt),
20638 + VINF("cacct", S_IRUGO, vxi_cacct),
20645 +static struct dentry *proc_xid_instantiate(struct inode *dir,
20646 + struct dentry *dentry, int id, void *ptr)
20648 + dentry->d_op = &proc_xid_dentry_operations;
20649 + return vs_proc_instantiate(dir, dentry, id, ptr);
20652 +static struct dentry *proc_xid_lookup(struct inode *dir,
20653 + struct dentry *dentry, unsigned int flags)
20655 + struct vs_entry *p = vx_base_stuff;
20656 + struct dentry *error = ERR_PTR(-ENOENT);
20658 + for (; p->name; p++) {
20659 + if (p->len != dentry->d_name.len)
20661 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20667 + error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20672 +static int proc_xid_iterate(struct file *filp, struct dir_context *ctx)
20674 + struct dentry *dentry = filp->f_path.dentry;
20675 + struct inode *inode = dentry->d_inode;
20676 + struct vs_entry *p = vx_base_stuff;
20677 + int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
20680 + if (!dir_emit_dots(filp, ctx))
20683 + index = ctx->pos - 2;
20684 + if (index < size) {
20685 + for (p += index; p->name; p++) {
20686 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20687 + vs_proc_instantiate, PROC_I(inode)->fd, p))
20697 +static struct file_operations proc_nx_info_file_operations = {
20698 + .read = proc_nx_info_read,
20701 +static struct dentry_operations proc_nid_dentry_operations = {
20702 + .d_revalidate = proc_nid_revalidate,
20705 +static struct vs_entry nx_base_stuff[] = {
20706 + NINF("info", S_IRUGO, nxi_info),
20707 + NINF("status", S_IRUGO, nxi_status),
20712 +static struct dentry *proc_nid_instantiate(struct inode *dir,
20713 + struct dentry *dentry, int id, void *ptr)
20715 + dentry->d_op = &proc_nid_dentry_operations;
20716 + return vs_proc_instantiate(dir, dentry, id, ptr);
20719 +static struct dentry *proc_nid_lookup(struct inode *dir,
20720 + struct dentry *dentry, unsigned int flags)
20722 + struct vs_entry *p = nx_base_stuff;
20723 + struct dentry *error = ERR_PTR(-ENOENT);
20725 + for (; p->name; p++) {
20726 + if (p->len != dentry->d_name.len)
20728 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20734 + error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20739 +static int proc_nid_iterate(struct file *filp, struct dir_context *ctx)
20741 + struct dentry *dentry = filp->f_path.dentry;
20742 + struct inode *inode = dentry->d_inode;
20743 + struct vs_entry *p = nx_base_stuff;
20744 + int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
20747 + if (!dir_emit_dots(filp, ctx))
20750 + index = ctx->pos - 2;
20751 + if (index < size) {
20752 + for (p += index; p->name; p++) {
20753 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20754 + vs_proc_instantiate, PROC_I(inode)->fd, p))
20763 +#define MAX_MULBY10 ((~0U - 9) / 10)
20765 +static inline int atovid(const char *str, int len)
20770 + while (len-- > 0) {
20775 + if (vid >= MAX_MULBY10)
20785 +/* now the upper level (virtual) */
20788 +static struct file_operations proc_xid_file_operations = {
20789 + .read = generic_read_dir,
20790 + .iterate = proc_xid_iterate,
20793 +static struct inode_operations proc_xid_inode_operations = {
20794 + .lookup = proc_xid_lookup,
20797 +static struct vs_entry vx_virtual_stuff[] = {
20798 + INF("info", S_IRUGO, virtual_info),
20799 + INF("status", S_IRUGO, virtual_status),
20800 + DIR(NULL, S_IRUGO | S_IXUGO, xid),
20804 +static struct dentry *proc_virtual_lookup(struct inode *dir,
20805 + struct dentry *dentry, unsigned int flags)
20807 + struct vs_entry *p = vx_virtual_stuff;
20808 + struct dentry *error = ERR_PTR(-ENOENT);
20811 + for (; p->name; p++) {
20812 + if (p->len != dentry->d_name.len)
20814 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20818 + goto instantiate;
20820 + id = atovid(dentry->d_name.name, dentry->d_name.len);
20821 + if ((id < 0) || !xid_is_hashed(id))
20825 + error = proc_xid_instantiate(dir, dentry, id, p);
20830 +static struct file_operations proc_nid_file_operations = {
20831 + .read = generic_read_dir,
20832 + .iterate = proc_nid_iterate,
20835 +static struct inode_operations proc_nid_inode_operations = {
20836 + .lookup = proc_nid_lookup,
20839 +static struct vs_entry nx_virtnet_stuff[] = {
20840 + INF("info", S_IRUGO, virtnet_info),
20841 + INF("status", S_IRUGO, virtnet_status),
20842 + DIR(NULL, S_IRUGO | S_IXUGO, nid),
20846 +static struct dentry *proc_virtnet_lookup(struct inode *dir,
20847 + struct dentry *dentry, unsigned int flags)
20849 + struct vs_entry *p = nx_virtnet_stuff;
20850 + struct dentry *error = ERR_PTR(-ENOENT);
20853 + for (; p->name; p++) {
20854 + if (p->len != dentry->d_name.len)
20856 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20860 + goto instantiate;
20862 + id = atovid(dentry->d_name.name, dentry->d_name.len);
20863 + if ((id < 0) || !nid_is_hashed(id))
20867 + error = proc_nid_instantiate(dir, dentry, id, p);
20873 +#define PROC_MAXVIDS 32
20875 +int proc_virtual_iterate(struct file *filp, struct dir_context *ctx)
20877 + struct vs_entry *p = vx_virtual_stuff;
20878 + int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
20880 + unsigned int xid_array[PROC_MAXVIDS];
20881 + char buf[PROC_NUMBUF];
20882 + unsigned int nr_xids, i;
20884 + if (!dir_emit_dots(filp, ctx))
20887 + index = ctx->pos - 2;
20888 + if (index < size) {
20889 + for (p += index; p->name; p++) {
20890 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20891 + vs_proc_instantiate, 0, p))
20897 + index = ctx->pos - size;
20898 + p = &vx_virtual_stuff[size - 1];
20899 + nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
20900 + for (i = 0; i < nr_xids; i++) {
20901 + int n, xid = xid_array[i];
20902 + unsigned int j = PROC_NUMBUF;
20906 + buf[--j] = '0' + (n % 10);
20909 + if (vx_proc_fill_cache(filp, ctx,
20910 + buf + j, PROC_NUMBUF - j,
20911 + vs_proc_instantiate, xid, p))
20918 +static int proc_virtual_getattr(struct vfsmount *mnt,
20919 + struct dentry *dentry, struct kstat *stat)
20921 + struct inode *inode = dentry->d_inode;
20923 + generic_fillattr(inode, stat);
20924 + stat->nlink = 2 + atomic_read(&vx_global_cactive);
20928 +static struct file_operations proc_virtual_dir_operations = {
20929 + .read = generic_read_dir,
20930 + .iterate = proc_virtual_iterate,
20933 +static struct inode_operations proc_virtual_dir_inode_operations = {
20934 + .getattr = proc_virtual_getattr,
20935 + .lookup = proc_virtual_lookup,
20940 +int proc_virtnet_iterate(struct file *filp, struct dir_context *ctx)
20942 + struct vs_entry *p = nx_virtnet_stuff;
20943 + int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
20945 + unsigned int nid_array[PROC_MAXVIDS];
20946 + char buf[PROC_NUMBUF];
20947 + unsigned int nr_nids, i;
20949 + if (!dir_emit_dots(filp, ctx))
20952 + index = ctx->pos - 2;
20953 + if (index < size) {
20954 + for (p += index; p->name; p++) {
20955 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20956 + vs_proc_instantiate, 0, p))
20962 + index = ctx->pos - size;
20963 + p = &nx_virtnet_stuff[size - 1];
20964 + nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
20965 + for (i = 0; i < nr_nids; i++) {
20966 + int n, nid = nid_array[i];
20967 + unsigned int j = PROC_NUMBUF;
20971 + buf[--j] = '0' + (n % 10);
20974 + if (vx_proc_fill_cache(filp, ctx,
20975 + buf + j, PROC_NUMBUF - j,
20976 + vs_proc_instantiate, nid, p))
20983 +static int proc_virtnet_getattr(struct vfsmount *mnt,
20984 + struct dentry *dentry, struct kstat *stat)
20986 + struct inode *inode = dentry->d_inode;
20988 + generic_fillattr(inode, stat);
20989 + stat->nlink = 2 + atomic_read(&nx_global_cactive);
20993 +static struct file_operations proc_virtnet_dir_operations = {
20994 + .read = generic_read_dir,
20995 + .iterate = proc_virtnet_iterate,
20998 +static struct inode_operations proc_virtnet_dir_inode_operations = {
20999 + .getattr = proc_virtnet_getattr,
21000 + .lookup = proc_virtnet_lookup,
21005 +void proc_vx_init(void)
21007 + struct proc_dir_entry *ent;
21009 + ent = proc_mkdir("virtual", 0);
21011 + ent->proc_fops = &proc_virtual_dir_operations;
21012 + ent->proc_iops = &proc_virtual_dir_inode_operations;
21014 + proc_virtual = ent;
21016 + ent = proc_mkdir("virtnet", 0);
21018 + ent->proc_fops = &proc_virtnet_dir_operations;
21019 + ent->proc_iops = &proc_virtnet_dir_inode_operations;
21021 + proc_virtnet = ent;
21027 +/* per pid info */
21029 +void render_cap_t(struct seq_file *, const char *,
21030 + struct vx_info *, kernel_cap_t *);
21033 +int proc_pid_vx_info(
21034 + struct seq_file *m,
21035 + struct pid_namespace *ns,
21037 + struct task_struct *p)
21039 + struct vx_info *vxi;
21041 + seq_printf(m, "XID:\t%d\n", vx_task_xid(p));
21043 + vxi = task_get_vx_info(p);
21047 + render_cap_t(m, "BCaps:\t", vxi, &vxi->vx_bcaps);
21048 + seq_printf(m, "CCaps:\t%016llx\n",
21049 + (unsigned long long)vxi->vx_ccaps);
21050 + seq_printf(m, "CFlags:\t%016llx\n",
21051 + (unsigned long long)vxi->vx_flags);
21052 + seq_printf(m, "CIPid:\t%d\n", vxi->vx_initpid);
21054 + put_vx_info(vxi);
21059 +int proc_pid_nx_info(
21060 + struct seq_file *m,
21061 + struct pid_namespace *ns,
21063 + struct task_struct *p)
21065 + struct nx_info *nxi;
21066 + struct nx_addr_v4 *v4a;
21067 +#ifdef CONFIG_IPV6
21068 + struct nx_addr_v6 *v6a;
21072 + seq_printf(m, "NID:\t%d\n", nx_task_nid(p));
21074 + nxi = task_get_nx_info(p);
21078 + seq_printf(m, "NCaps:\t%016llx\n",
21079 + (unsigned long long)nxi->nx_ncaps);
21080 + seq_printf(m, "NFlags:\t%016llx\n",
21081 + (unsigned long long)nxi->nx_flags);
21083 + seq_printf(m, "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
21084 + NIPQUAD(nxi->v4_bcast.s_addr));
21085 + seq_printf(m, "V4Root[lback]:\t" NIPQUAD_FMT "\n",
21086 + NIPQUAD(nxi->v4_lback.s_addr));
21087 + if (!NX_IPV4(nxi))
21089 + for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
21090 + seq_printf(m, "V4Root[%d]:\t" NXAV4_FMT "\n",
21093 +#ifdef CONFIG_IPV6
21094 + if (!NX_IPV6(nxi))
21096 + for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
21097 + seq_printf(m, "V6Root[%d]:\t" NXAV6_FMT "\n",
21101 + put_nx_info(nxi);
21105 diff -NurpP --minimal linux-4.9.217/kernel/vserver/sched.c linux-4.9.217-vs2.3.9.12/kernel/vserver/sched.c
21106 --- linux-4.9.217/kernel/vserver/sched.c 1970-01-01 00:00:00.000000000 +0000
21107 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/sched.c 2018-10-20 04:58:15.000000000 +0000
21110 + * linux/kernel/vserver/sched.c
21112 + * Virtual Server: Scheduler Support
21114 + * Copyright (C) 2004-2010 Herbert P?tzl
21116 + * V0.01 adapted Sam Vilains version to 2.6.3
21117 + * V0.02 removed legacy interface
21118 + * V0.03 changed vcmds to vxi arg
21119 + * V0.04 removed older and legacy interfaces
21120 + * V0.05 removed scheduler code/commands
21124 +#include <linux/vs_context.h>
21125 +#include <linux/vs_sched.h>
21126 +#include <linux/cpumask.h>
21127 +#include <linux/vserver/sched_cmd.h>
21129 +#include <asm/uaccess.h>
21132 +void vx_update_sched_param(struct _vx_sched *sched,
21133 + struct _vx_sched_pc *sched_pc)
21135 + sched_pc->prio_bias = sched->prio_bias;
21138 +static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
21142 + if (data->prio_bias > MAX_PRIO_BIAS)
21143 + data->prio_bias = MAX_PRIO_BIAS;
21144 + if (data->prio_bias < MIN_PRIO_BIAS)
21145 + data->prio_bias = MIN_PRIO_BIAS;
21147 + if (data->cpu_id != ~0) {
21148 + vxi->sched.update = *get_cpu_mask(data->cpu_id);
21149 + cpumask_and(&vxi->sched.update, &vxi->sched.update,
21150 + cpu_online_mask);
21152 + cpumask_copy(&vxi->sched.update, cpu_online_mask);
21154 + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)vxi->sched.update)
21155 + vx_update_sched_param(&vxi->sched,
21156 + &vx_per_cpu(vxi, sched_pc, cpu));
21160 +int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
21162 + struct vcmd_prio_bias vc_data;
21164 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21167 + return do_set_prio_bias(vxi, &vc_data);
21170 +int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
21172 + struct vcmd_prio_bias vc_data;
21173 + struct _vx_sched_pc *pcd;
21176 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21179 + cpu = vc_data.cpu_id;
21181 + if (!cpu_possible(cpu))
21184 + pcd = &vx_per_cpu(vxi, sched_pc, cpu);
21185 + vc_data.prio_bias = pcd->prio_bias;
21187 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21192 diff -NurpP --minimal linux-4.9.217/kernel/vserver/sched_init.h linux-4.9.217-vs2.3.9.12/kernel/vserver/sched_init.h
21193 --- linux-4.9.217/kernel/vserver/sched_init.h 1970-01-01 00:00:00.000000000 +0000
21194 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/sched_init.h 2018-10-20 04:58:15.000000000 +0000
21197 +static inline void vx_info_init_sched(struct _vx_sched *sched)
21199 + /* scheduling; hard code starting values as constants */
21200 + sched->prio_bias = 0;
21204 +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
21206 + sched_pc->prio_bias = 0;
21208 + sched_pc->user_ticks = 0;
21209 + sched_pc->sys_ticks = 0;
21210 + sched_pc->hold_ticks = 0;
21213 +static inline void vx_info_exit_sched(struct _vx_sched *sched)
21219 +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
21223 diff -NurpP --minimal linux-4.9.217/kernel/vserver/sched_proc.h linux-4.9.217-vs2.3.9.12/kernel/vserver/sched_proc.h
21224 --- linux-4.9.217/kernel/vserver/sched_proc.h 1970-01-01 00:00:00.000000000 +0000
21225 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/sched_proc.h 2018-10-20 04:58:15.000000000 +0000
21227 +#ifndef _VX_SCHED_PROC_H
21228 +#define _VX_SCHED_PROC_H
21232 +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
21236 + length += sprintf(buffer,
21237 + "PrioBias:\t%8d\n",
21238 + sched->prio_bias);
21243 +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
21244 + char *buffer, int cpu)
21248 + length += sprintf(buffer + length,
21249 + "cpu %d: %lld %lld %lld", cpu,
21250 + (unsigned long long)sched_pc->user_ticks,
21251 + (unsigned long long)sched_pc->sys_ticks,
21252 + (unsigned long long)sched_pc->hold_ticks);
21253 + length += sprintf(buffer + length,
21254 + " %d\n", sched_pc->prio_bias);
21258 +#endif /* _VX_SCHED_PROC_H */
21259 diff -NurpP --minimal linux-4.9.217/kernel/vserver/signal.c linux-4.9.217-vs2.3.9.12/kernel/vserver/signal.c
21260 --- linux-4.9.217/kernel/vserver/signal.c 1970-01-01 00:00:00.000000000 +0000
21261 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/signal.c 2018-10-20 04:58:15.000000000 +0000
21264 + * linux/kernel/vserver/signal.c
21266 + * Virtual Server: Signal Support
21268 + * Copyright (C) 2003-2007 Herbert P?tzl
21270 + * V0.01 broken out from vcontext V0.05
21271 + * V0.02 changed vcmds to vxi arg
21272 + * V0.03 adjusted siginfo for kill
21276 +#include <asm/uaccess.h>
21278 +#include <linux/vs_context.h>
21279 +#include <linux/vs_pid.h>
21280 +#include <linux/vserver/signal_cmd.h>
21283 +int vx_info_kill(struct vx_info *vxi, int pid, int sig)
21285 + int retval, count = 0;
21286 + struct task_struct *p;
21287 + struct siginfo *sip = SEND_SIG_PRIV;
21290 + vxdprintk(VXD_CBIT(misc, 4),
21291 + "vx_info_kill(%p[#%d],%d,%d)*",
21292 + vxi, vxi->vx_id, pid, sig);
21293 + read_lock(&tasklist_lock);
21297 + for_each_process(p) {
21300 + if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
21301 + (pid && vxi->vx_initpid == p->pid))
21304 + err = group_send_sig_info(sig, sip, p);
21306 + if (err != -EPERM)
21312 + if (vxi->vx_initpid) {
21313 + pid = vxi->vx_initpid;
21314 + /* for now, only SIGINT to private init ... */
21315 + if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21316 + /* ... as long as there are tasks left */
21317 + (atomic_read(&vxi->vx_tasks) > 1))
21320 + /* fallthrough */
21323 + p = find_task_by_real_pid(pid);
21324 + rcu_read_unlock();
21326 + if (vx_task_xid(p) == vxi->vx_id)
21327 + retval = group_send_sig_info(sig, sip, p);
21331 + read_unlock(&tasklist_lock);
21332 + vxdprintk(VXD_CBIT(misc, 4),
21333 + "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
21334 + vxi, vxi->vx_id, pid, sig, (long)sip, retval);
21338 +int vc_ctx_kill(struct vx_info *vxi, void __user *data)
21340 + struct vcmd_ctx_kill_v0 vc_data;
21342 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21345 + /* special check to allow guest shutdown */
21346 + if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21347 + /* forbid killall pid=0 when init is present */
21348 + (((vc_data.pid < 1) && vxi->vx_initpid) ||
21349 + (vc_data.pid > 1)))
21352 + return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
21356 +static int __wait_exit(struct vx_info *vxi)
21358 + DECLARE_WAITQUEUE(wait, current);
21361 + add_wait_queue(&vxi->vx_wait, &wait);
21362 + set_current_state(TASK_INTERRUPTIBLE);
21365 + if (vx_info_state(vxi,
21366 + VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
21368 + if (signal_pending(current)) {
21369 + ret = -ERESTARTSYS;
21376 + set_current_state(TASK_RUNNING);
21377 + remove_wait_queue(&vxi->vx_wait, &wait);
21383 +int vc_wait_exit(struct vx_info *vxi, void __user *data)
21385 + struct vcmd_wait_exit_v0 vc_data;
21388 + ret = __wait_exit(vxi);
21389 + vc_data.reboot_cmd = vxi->reboot_cmd;
21390 + vc_data.exit_code = vxi->exit_code;
21392 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21397 diff -NurpP --minimal linux-4.9.217/kernel/vserver/space.c linux-4.9.217-vs2.3.9.12/kernel/vserver/space.c
21398 --- linux-4.9.217/kernel/vserver/space.c 1970-01-01 00:00:00.000000000 +0000
21399 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/space.c 2018-10-20 04:58:15.000000000 +0000
21402 + * linux/kernel/vserver/space.c
21404 + * Virtual Server: Context Space Support
21406 + * Copyright (C) 2003-2010 Herbert P?tzl
21408 + * V0.01 broken out from context.c 0.07
21409 + * V0.02 added task locking for namespace
21410 + * V0.03 broken out vx_enter_namespace
21411 + * V0.04 added *space support and commands
21412 + * V0.05 added credential support
21416 +#include <linux/utsname.h>
21417 +#include <linux/nsproxy.h>
21418 +#include <linux/err.h>
21419 +#include <linux/fs_struct.h>
21420 +#include <linux/cred.h>
21421 +#include <asm/uaccess.h>
21423 +#include <linux/vs_context.h>
21424 +#include <linux/vserver/space.h>
21425 +#include <linux/vserver/space_cmd.h>
21427 +atomic_t vs_global_nsproxy = ATOMIC_INIT(0);
21428 +atomic_t vs_global_fs = ATOMIC_INIT(0);
21429 +atomic_t vs_global_mnt_ns = ATOMIC_INIT(0);
21430 +atomic_t vs_global_uts_ns = ATOMIC_INIT(0);
21431 +atomic_t vs_global_ipc_ns = ATOMIC_INIT(0);
21432 +atomic_t vs_global_user_ns = ATOMIC_INIT(0);
21433 +atomic_t vs_global_pid_ns = ATOMIC_INIT(0);
21436 +/* namespace functions */
21438 +#include <linux/mnt_namespace.h>
21439 +#include <linux/user_namespace.h>
21440 +#include <linux/pid_namespace.h>
21441 +#include <linux/ipc_namespace.h>
21442 +#include <net/net_namespace.h>
21443 +#include "../fs/mount.h"
21446 +static const struct vcmd_space_mask_v1 space_mask_v0 = {
21447 + .mask = CLONE_FS |
21449 +#ifdef CONFIG_UTS_NS
21452 +#ifdef CONFIG_IPC_NS
21455 +#ifdef CONFIG_USER_NS
21461 +static const struct vcmd_space_mask_v1 space_mask = {
21462 + .mask = CLONE_FS |
21464 +#ifdef CONFIG_UTS_NS
21467 +#ifdef CONFIG_IPC_NS
21470 +#ifdef CONFIG_USER_NS
21473 +#ifdef CONFIG_PID_NS
21476 +#ifdef CONFIG_NET_NS
21482 +static const struct vcmd_space_mask_v1 default_space_mask = {
21483 + .mask = CLONE_FS |
21485 +#ifdef CONFIG_UTS_NS
21488 +#ifdef CONFIG_IPC_NS
21491 +#ifdef CONFIG_USER_NS
21492 +// CLONE_NEWUSER |
21494 +#ifdef CONFIG_PID_NS
21501 + * build a new nsproxy mix
21502 + * assumes that both proxies are 'const'
21503 + * does not touch nsproxy refcounts
21504 + * will hold a reference on the result.
21507 +struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
21508 + struct nsproxy *new_nsproxy, unsigned long mask)
21510 + struct mnt_namespace *old_ns;
21511 + struct uts_namespace *old_uts;
21512 + struct ipc_namespace *old_ipc;
21513 +#ifdef CONFIG_PID_NS
21514 + struct pid_namespace *old_pid;
21516 +#ifdef CONFIG_NET_NS
21517 + struct net *old_net;
21519 + struct nsproxy *nsproxy;
21521 + nsproxy = copy_nsproxy(old_nsproxy);
21525 + if (mask & CLONE_NEWNS) {
21526 + old_ns = nsproxy->mnt_ns;
21527 + nsproxy->mnt_ns = new_nsproxy->mnt_ns;
21528 + if (nsproxy->mnt_ns)
21529 + get_mnt_ns(nsproxy->mnt_ns);
21533 + if (mask & CLONE_NEWUTS) {
21534 + old_uts = nsproxy->uts_ns;
21535 + nsproxy->uts_ns = new_nsproxy->uts_ns;
21536 + if (nsproxy->uts_ns)
21537 + get_uts_ns(nsproxy->uts_ns);
21541 + if (mask & CLONE_NEWIPC) {
21542 + old_ipc = nsproxy->ipc_ns;
21543 + nsproxy->ipc_ns = new_nsproxy->ipc_ns;
21544 + if (nsproxy->ipc_ns)
21545 + get_ipc_ns(nsproxy->ipc_ns);
21549 +#ifdef CONFIG_PID_NS
21550 + if (mask & CLONE_NEWPID) {
21551 + old_pid = nsproxy->pid_ns_for_children;
21552 + nsproxy->pid_ns_for_children = new_nsproxy->pid_ns_for_children;
21553 + if (nsproxy->pid_ns_for_children)
21554 + get_pid_ns(nsproxy->pid_ns_for_children);
21558 +#ifdef CONFIG_NET_NS
21559 + if (mask & CLONE_NEWNET) {
21560 + old_net = nsproxy->net_ns;
21561 + nsproxy->net_ns = new_nsproxy->net_ns;
21562 + if (nsproxy->net_ns)
21563 + get_net(nsproxy->net_ns);
21568 + put_mnt_ns(old_ns);
21570 + put_uts_ns(old_uts);
21572 + put_ipc_ns(old_ipc);
21573 +#ifdef CONFIG_PID_NS
21575 + put_pid_ns(old_pid);
21577 +#ifdef CONFIG_NET_NS
21579 + put_net(old_net);
21587 + * merge two nsproxy structs into a new one.
21588 + * will hold a reference on the result.
21592 +struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
21593 + struct nsproxy *proxy, unsigned long mask)
21595 + struct nsproxy null_proxy = { .mnt_ns = NULL };
21601 + /* vs_mix_nsproxy returns with reference */
21602 + return vs_mix_nsproxy(old ? old : &null_proxy,
21605 + get_nsproxy(proxy);
21610 +int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21612 + struct nsproxy *proxy, *proxy_cur, *proxy_new;
21613 + struct fs_struct *fs_cur, *fs = NULL;
21614 + struct _vx_space *space;
21615 + int ret, kill = 0;
21617 + vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
21618 + vxi, vxi->vx_id, mask, index);
21620 + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
21623 + if (index >= VX_SPACES)
21626 + space = &vxi->space[index];
21629 + mask = space->vx_nsmask;
21631 + if ((mask & space->vx_nsmask) != mask)
21634 + if (mask & CLONE_FS) {
21635 + fs = copy_fs_struct(space->vx_fs);
21639 + proxy = space->vx_nsproxy;
21641 + vxdprintk(VXD_CBIT(space, 9),
21642 + "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
21643 + vxi, vxi->vx_id, mask, index, proxy, fs);
21645 + task_lock(current);
21646 + fs_cur = current->fs;
21648 + if (mask & CLONE_FS) {
21649 + spin_lock(&fs_cur->lock);
21650 + current->fs = fs;
21651 + kill = !--fs_cur->users;
21652 + spin_unlock(&fs_cur->lock);
21655 + proxy_cur = current->nsproxy;
21656 + get_nsproxy(proxy_cur);
21657 + task_unlock(current);
21660 + free_fs_struct(fs_cur);
21662 + proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
21663 + if (IS_ERR(proxy_new)) {
21664 + ret = PTR_ERR(proxy_new);
21668 + proxy_new = xchg(¤t->nsproxy, proxy_new);
21670 + if (mask & CLONE_NEWUSER) {
21671 + struct cred *cred;
21673 + vxdprintk(VXD_CBIT(space, 10),
21674 + "vx_enter_space(%p[#%u],%p) cred (%p,%p)",
21675 + vxi, vxi->vx_id, space->vx_cred,
21676 + current->real_cred, current->cred);
21678 + if (space->vx_cred) {
21679 + cred = __prepare_creds(space->vx_cred);
21681 + commit_creds(cred);
21688 + put_nsproxy(proxy_new);
21691 + put_nsproxy(proxy_cur);
21696 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21698 + struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
21699 + struct fs_struct *fs_vxi, *fs = NULL;
21700 + struct _vx_space *space;
21701 + int ret, kill = 0;
21703 + vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
21704 + vxi, vxi->vx_id, mask, index);
21706 + if ((mask & space_mask.mask) != mask)
21709 + if (index >= VX_SPACES)
21712 + space = &vxi->space[index];
21714 + proxy_vxi = space->vx_nsproxy;
21715 + fs_vxi = space->vx_fs;
21717 + if (mask & CLONE_FS) {
21718 + fs = copy_fs_struct(current->fs);
21723 + task_lock(current);
21725 + if (mask & CLONE_FS) {
21726 + spin_lock(&fs_vxi->lock);
21727 + space->vx_fs = fs;
21728 + kill = !--fs_vxi->users;
21729 + spin_unlock(&fs_vxi->lock);
21732 + proxy_cur = current->nsproxy;
21733 + get_nsproxy(proxy_cur);
21734 + task_unlock(current);
21737 + free_fs_struct(fs_vxi);
21739 + proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
21740 + if (IS_ERR(proxy_new)) {
21741 + ret = PTR_ERR(proxy_new);
21745 + proxy_new = xchg(&space->vx_nsproxy, proxy_new);
21746 + space->vx_nsmask |= mask;
21748 + if (mask & CLONE_NEWUSER) {
21749 + struct cred *cred;
21751 + vxdprintk(VXD_CBIT(space, 10),
21752 + "vx_set_space(%p[#%u],%p) cred (%p,%p)",
21753 + vxi, vxi->vx_id, space->vx_cred,
21754 + current->real_cred, current->cred);
21756 + cred = prepare_creds();
21757 + cred = (struct cred *)xchg(&space->vx_cred, cred);
21759 + abort_creds(cred);
21765 + put_nsproxy(proxy_new);
21768 + put_nsproxy(proxy_cur);
21773 +int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
21775 + struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
21777 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21780 + return vx_enter_space(vxi, vc_data.mask, 0);
21783 +int vc_enter_space(struct vx_info *vxi, void __user *data)
21785 + struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
21787 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21790 + if (vc_data.index >= VX_SPACES)
21793 + return vx_enter_space(vxi, vc_data.mask, vc_data.index);
21796 +int vc_set_space_v1(struct vx_info *vxi, void __user *data)
21798 + struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
21800 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21803 + return vx_set_space(vxi, vc_data.mask, 0);
21806 +int vc_set_space(struct vx_info *vxi, void __user *data)
21808 + struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
21810 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21813 + if (vc_data.index >= VX_SPACES)
21816 + return vx_set_space(vxi, vc_data.mask, vc_data.index);
21819 +int vc_get_space_mask(void __user *data, int type)
21821 + const struct vcmd_space_mask_v1 *mask;
21824 + mask = &space_mask_v0;
21825 + else if (type == 1)
21826 + mask = &space_mask;
21828 + mask = &default_space_mask;
21830 + vxdprintk(VXD_CBIT(space, 10),
21831 + "vc_get_space_mask(%d) = %08llx", type, mask->mask);
21833 + if (copy_to_user(data, mask, sizeof(*mask)))
21838 diff -NurpP --minimal linux-4.9.217/kernel/vserver/switch.c linux-4.9.217-vs2.3.9.12/kernel/vserver/switch.c
21839 --- linux-4.9.217/kernel/vserver/switch.c 1970-01-01 00:00:00.000000000 +0000
21840 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/switch.c 2018-10-20 04:58:15.000000000 +0000
21843 + * linux/kernel/vserver/switch.c
21845 + * Virtual Server: Syscall Switch
21847 + * Copyright (C) 2003-2011 Herbert P?tzl
21849 + * V0.01 syscall switch
21850 + * V0.02 added signal to context
21851 + * V0.03 added rlimit functions
21852 + * V0.04 added iattr, task/xid functions
21853 + * V0.05 added debug/history stuff
21854 + * V0.06 added compat32 layer
21855 + * V0.07 vcmd args and perms
21856 + * V0.08 added status commands
21857 + * V0.09 added tag commands
21858 + * V0.10 added oom bias
21859 + * V0.11 added device commands
21860 + * V0.12 added warn mask
21864 +#include <linux/vs_context.h>
21865 +#include <linux/vs_network.h>
21866 +#include <linux/vserver/switch.h>
21868 +#include "vci_config.h"
21872 +int vc_get_version(uint32_t id)
21874 + return VCI_VERSION;
21878 +int vc_get_vci(uint32_t id)
21880 + return vci_kernel_config();
21883 +#include <linux/vserver/context_cmd.h>
21884 +#include <linux/vserver/cvirt_cmd.h>
21885 +#include <linux/vserver/cacct_cmd.h>
21886 +#include <linux/vserver/limit_cmd.h>
21887 +#include <linux/vserver/network_cmd.h>
21888 +#include <linux/vserver/sched_cmd.h>
21889 +#include <linux/vserver/debug_cmd.h>
21890 +#include <linux/vserver/inode_cmd.h>
21891 +#include <linux/vserver/dlimit_cmd.h>
21892 +#include <linux/vserver/signal_cmd.h>
21893 +#include <linux/vserver/space_cmd.h>
21894 +#include <linux/vserver/tag_cmd.h>
21895 +#include <linux/vserver/device_cmd.h>
21897 +#include <linux/vserver/inode.h>
21898 +#include <linux/vserver/dlimit.h>
21901 +#ifdef CONFIG_COMPAT
21902 +#define __COMPAT(name, id, data, compat) \
21903 + (compat) ? name ## _x32(id, data) : name(id, data)
21904 +#define __COMPAT_NO_ID(name, data, compat) \
21905 + (compat) ? name ## _x32(data) : name(data)
21907 +#define __COMPAT(name, id, data, compat) \
21909 +#define __COMPAT_NO_ID(name, data, compat) \
21915 +long do_vcmd(uint32_t cmd, uint32_t id,
21916 + struct vx_info *vxi, struct nx_info *nxi,
21917 + void __user *data, int compat)
21921 + case VCMD_get_version:
21922 + return vc_get_version(id);
21923 + case VCMD_get_vci:
21924 + return vc_get_vci(id);
21926 + case VCMD_task_xid:
21927 + return vc_task_xid(id);
21928 + case VCMD_vx_info:
21929 + return vc_vx_info(vxi, data);
21931 + case VCMD_task_nid:
21932 + return vc_task_nid(id);
21933 + case VCMD_nx_info:
21934 + return vc_nx_info(nxi, data);
21936 + case VCMD_task_tag:
21937 + return vc_task_tag(id);
21939 + case VCMD_set_space_v1:
21940 + return vc_set_space_v1(vxi, data);
21941 + /* this is version 2 */
21942 + case VCMD_set_space:
21943 + return vc_set_space(vxi, data);
21945 + case VCMD_get_space_mask_v0:
21946 + return vc_get_space_mask(data, 0);
21947 + /* this is version 1 */
21948 + case VCMD_get_space_mask:
21949 + return vc_get_space_mask(data, 1);
21951 + case VCMD_get_space_default:
21952 + return vc_get_space_mask(data, -1);
21954 + case VCMD_set_umask:
21955 + return vc_set_umask(vxi, data);
21957 + case VCMD_get_umask:
21958 + return vc_get_umask(vxi, data);
21960 + case VCMD_set_wmask:
21961 + return vc_set_wmask(vxi, data);
21963 + case VCMD_get_wmask:
21964 + return vc_get_wmask(vxi, data);
21965 +#ifdef CONFIG_IA32_EMULATION
21966 + case VCMD_get_rlimit:
21967 + return __COMPAT(vc_get_rlimit, vxi, data, compat);
21968 + case VCMD_set_rlimit:
21969 + return __COMPAT(vc_set_rlimit, vxi, data, compat);
21971 + case VCMD_get_rlimit:
21972 + return vc_get_rlimit(vxi, data);
21973 + case VCMD_set_rlimit:
21974 + return vc_set_rlimit(vxi, data);
21976 + case VCMD_get_rlimit_mask:
21977 + return vc_get_rlimit_mask(id, data);
21978 + case VCMD_reset_hits:
21979 + return vc_reset_hits(vxi, data);
21980 + case VCMD_reset_minmax:
21981 + return vc_reset_minmax(vxi, data);
21983 + case VCMD_get_vhi_name:
21984 + return vc_get_vhi_name(vxi, data);
21985 + case VCMD_set_vhi_name:
21986 + return vc_set_vhi_name(vxi, data);
21988 + case VCMD_ctx_stat:
21989 + return vc_ctx_stat(vxi, data);
21990 + case VCMD_virt_stat:
21991 + return vc_virt_stat(vxi, data);
21992 + case VCMD_sock_stat:
21993 + return vc_sock_stat(vxi, data);
21994 + case VCMD_rlimit_stat:
21995 + return vc_rlimit_stat(vxi, data);
21997 + case VCMD_set_cflags:
21998 + return vc_set_cflags(vxi, data);
21999 + case VCMD_get_cflags:
22000 + return vc_get_cflags(vxi, data);
22002 + /* this is version 1 */
22003 + case VCMD_set_ccaps:
22004 + return vc_set_ccaps(vxi, data);
22005 + /* this is version 1 */
22006 + case VCMD_get_ccaps:
22007 + return vc_get_ccaps(vxi, data);
22008 + case VCMD_set_bcaps:
22009 + return vc_set_bcaps(vxi, data);
22010 + case VCMD_get_bcaps:
22011 + return vc_get_bcaps(vxi, data);
22013 + case VCMD_set_badness:
22014 + return vc_set_badness(vxi, data);
22015 + case VCMD_get_badness:
22016 + return vc_get_badness(vxi, data);
22018 + case VCMD_set_nflags:
22019 + return vc_set_nflags(nxi, data);
22020 + case VCMD_get_nflags:
22021 + return vc_get_nflags(nxi, data);
22023 + case VCMD_set_ncaps:
22024 + return vc_set_ncaps(nxi, data);
22025 + case VCMD_get_ncaps:
22026 + return vc_get_ncaps(nxi, data);
22028 + case VCMD_set_prio_bias:
22029 + return vc_set_prio_bias(vxi, data);
22030 + case VCMD_get_prio_bias:
22031 + return vc_get_prio_bias(vxi, data);
22032 + case VCMD_add_dlimit:
22033 + return __COMPAT(vc_add_dlimit, id, data, compat);
22034 + case VCMD_rem_dlimit:
22035 + return __COMPAT(vc_rem_dlimit, id, data, compat);
22036 + case VCMD_set_dlimit:
22037 + return __COMPAT(vc_set_dlimit, id, data, compat);
22038 + case VCMD_get_dlimit:
22039 + return __COMPAT(vc_get_dlimit, id, data, compat);
22041 + case VCMD_ctx_kill:
22042 + return vc_ctx_kill(vxi, data);
22044 + case VCMD_wait_exit:
22045 + return vc_wait_exit(vxi, data);
22047 + case VCMD_get_iattr:
22048 + return __COMPAT_NO_ID(vc_get_iattr, data, compat);
22049 + case VCMD_set_iattr:
22050 + return __COMPAT_NO_ID(vc_set_iattr, data, compat);
22052 + case VCMD_fget_iattr:
22053 + return vc_fget_iattr(id, data);
22054 + case VCMD_fset_iattr:
22055 + return vc_fset_iattr(id, data);
22057 + case VCMD_enter_space_v0:
22058 + return vc_enter_space_v1(vxi, NULL);
22059 + case VCMD_enter_space_v1:
22060 + return vc_enter_space_v1(vxi, data);
22061 + /* this is version 2 */
22062 + case VCMD_enter_space:
22063 + return vc_enter_space(vxi, data);
22065 + case VCMD_ctx_create_v0:
22066 + return vc_ctx_create(id, NULL);
22067 + case VCMD_ctx_create:
22068 + return vc_ctx_create(id, data);
22069 + case VCMD_ctx_migrate_v0:
22070 + return vc_ctx_migrate(vxi, NULL);
22071 + case VCMD_ctx_migrate:
22072 + return vc_ctx_migrate(vxi, data);
22074 + case VCMD_net_create_v0:
22075 + return vc_net_create(id, NULL);
22076 + case VCMD_net_create:
22077 + return vc_net_create(id, data);
22078 + case VCMD_net_migrate:
22079 + return vc_net_migrate(nxi, data);
22081 + case VCMD_tag_migrate:
22082 + return vc_tag_migrate(id);
22084 + case VCMD_net_add:
22085 + return vc_net_add(nxi, data);
22086 + case VCMD_net_remove:
22087 + return vc_net_remove(nxi, data);
22089 + case VCMD_net_add_ipv4_v1:
22090 + return vc_net_add_ipv4_v1(nxi, data);
22091 + /* this is version 2 */
22092 + case VCMD_net_add_ipv4:
22093 + return vc_net_add_ipv4(nxi, data);
22095 + case VCMD_net_rem_ipv4_v1:
22096 + return vc_net_rem_ipv4_v1(nxi, data);
22097 + /* this is version 2 */
22098 + case VCMD_net_rem_ipv4:
22099 + return vc_net_rem_ipv4(nxi, data);
22100 +#ifdef CONFIG_IPV6
22101 + case VCMD_net_add_ipv6:
22102 + return vc_net_add_ipv6(nxi, data);
22103 + case VCMD_net_remove_ipv6:
22104 + return vc_net_remove_ipv6(nxi, data);
22106 +/* case VCMD_add_match_ipv4:
22107 + return vc_add_match_ipv4(nxi, data);
22108 + case VCMD_get_match_ipv4:
22109 + return vc_get_match_ipv4(nxi, data);
22110 +#ifdef CONFIG_IPV6
22111 + case VCMD_add_match_ipv6:
22112 + return vc_add_match_ipv6(nxi, data);
22113 + case VCMD_get_match_ipv6:
22114 + return vc_get_match_ipv6(nxi, data);
22117 +#ifdef CONFIG_VSERVER_DEVICE
22118 + case VCMD_set_mapping:
22119 + return __COMPAT(vc_set_mapping, vxi, data, compat);
22120 + case VCMD_unset_mapping:
22121 + return __COMPAT(vc_unset_mapping, vxi, data, compat);
22123 +#ifdef CONFIG_VSERVER_HISTORY
22124 + case VCMD_dump_history:
22125 + return vc_dump_history(id);
22126 + case VCMD_read_history:
22127 + return __COMPAT(vc_read_history, id, data, compat);
22130 + vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
22131 + VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
22137 +#define __VCMD(vcmd, _perm, _args, _flags) \
22138 + case VCMD_ ## vcmd: perm = _perm; \
22139 + args = _args; flags = _flags; break
22142 +#define VCA_NONE 0x00
22143 +#define VCA_VXI 0x01
22144 +#define VCA_NXI 0x02
22146 +#define VCF_NONE 0x00
22147 +#define VCF_INFO 0x01
22148 +#define VCF_ADMIN 0x02
22149 +#define VCF_ARES 0x06 /* includes admin */
22150 +#define VCF_SETUP 0x08
22152 +#define VCF_ZIDOK 0x10 /* zero id okay */
22156 +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
22159 + int permit = -1, state = 0;
22160 + int perm = -1, args = 0, flags = 0;
22161 + struct vx_info *vxi = NULL;
22162 + struct nx_info *nxi = NULL;
22165 + /* unpriviledged commands */
22166 + __VCMD(get_version, 0, VCA_NONE, 0);
22167 + __VCMD(get_vci, 0, VCA_NONE, 0);
22168 + __VCMD(get_rlimit_mask, 0, VCA_NONE, 0);
22169 + __VCMD(get_space_mask_v0,0, VCA_NONE, 0);
22170 + __VCMD(get_space_mask, 0, VCA_NONE, 0);
22171 + __VCMD(get_space_default,0, VCA_NONE, 0);
22173 + /* info commands */
22174 + __VCMD(task_xid, 2, VCA_NONE, 0);
22175 + __VCMD(reset_hits, 2, VCA_VXI, 0);
22176 + __VCMD(reset_minmax, 2, VCA_VXI, 0);
22177 + __VCMD(vx_info, 3, VCA_VXI, VCF_INFO);
22178 + __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO);
22179 + __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO);
22180 + __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO);
22181 + __VCMD(get_umask, 3, VCA_VXI, VCF_INFO);
22182 + __VCMD(get_wmask, 3, VCA_VXI, VCF_INFO);
22183 + __VCMD(get_badness, 3, VCA_VXI, VCF_INFO);
22184 + __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO);
22185 + __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO);
22187 + __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO);
22188 + __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO);
22189 + __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO);
22190 + __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO);
22192 + __VCMD(task_nid, 2, VCA_NONE, 0);
22193 + __VCMD(nx_info, 3, VCA_NXI, VCF_INFO);
22194 + __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO);
22195 + __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO);
22197 + __VCMD(task_tag, 2, VCA_NONE, 0);
22199 + __VCMD(get_iattr, 2, VCA_NONE, 0);
22200 + __VCMD(fget_iattr, 2, VCA_NONE, 0);
22201 + __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO);
22202 + __VCMD(get_prio_bias, 3, VCA_VXI, VCF_INFO);
22204 + /* lower admin commands */
22205 + __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO);
22206 + __VCMD(ctx_create_v0, 5, VCA_NONE, 0);
22207 + __VCMD(ctx_create, 5, VCA_NONE, 0);
22208 + __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN);
22209 + __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN);
22210 + __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN);
22211 + __VCMD(enter_space_v1, 5, VCA_VXI, VCF_ADMIN);
22212 + __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN);
22214 + __VCMD(net_create_v0, 5, VCA_NONE, 0);
22215 + __VCMD(net_create, 5, VCA_NONE, 0);
22216 + __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN);
22218 + __VCMD(tag_migrate, 5, VCA_NONE, VCF_ADMIN);
22220 + /* higher admin commands */
22221 + __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES);
22222 + __VCMD(set_space_v1, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22223 + __VCMD(set_space, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22225 + __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22226 + __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22227 + __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22228 + __VCMD(set_umask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22229 + __VCMD(set_wmask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22230 + __VCMD(set_badness, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22232 + __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22233 + __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22234 + __VCMD(set_prio_bias, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22236 + __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22237 + __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22238 + __VCMD(net_add, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22239 + __VCMD(net_remove, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22240 + __VCMD(net_add_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22241 + __VCMD(net_rem_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22242 + __VCMD(net_add_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22243 + __VCMD(net_rem_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22244 +#ifdef CONFIG_IPV6
22245 + __VCMD(net_add_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22246 + __VCMD(net_remove_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22248 + __VCMD(set_iattr, 7, VCA_NONE, 0);
22249 + __VCMD(fset_iattr, 7, VCA_NONE, 0);
22250 + __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES);
22251 + __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES);
22252 + __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES);
22254 +#ifdef CONFIG_VSERVER_DEVICE
22255 + __VCMD(set_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22256 + __VCMD(unset_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22258 + /* debug level admin commands */
22259 +#ifdef CONFIG_VSERVER_HISTORY
22260 + __VCMD(dump_history, 9, VCA_NONE, 0);
22261 + __VCMD(read_history, 9, VCA_NONE, 0);
22268 + vxdprintk(VXD_CBIT(switch, 0),
22269 + "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
22270 + VC_CATEGORY(cmd), VC_COMMAND(cmd),
22271 + VC_VERSION(cmd), id, data, compat,
22272 + perm, args, flags);
22279 + if (!capable(CAP_CONTEXT))
22283 + /* moved here from the individual commands */
22285 + if ((perm > 1) && !capable(CAP_SYS_ADMIN))
22289 + /* vcmd involves resource management */
22291 + if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
22295 + /* various legacy exceptions */
22297 + /* will go away when spectator is a cap */
22298 + case VCMD_ctx_migrate_v0:
22299 + case VCMD_ctx_migrate:
22301 + current->xid = 1;
22307 + /* will go away when spectator is a cap */
22308 + case VCMD_net_migrate:
22310 + current->nid = 1;
22317 + /* vcmds are fine by default */
22320 + /* admin type vcmds require admin ... */
22321 + if (flags & VCF_ADMIN)
22322 + permit = vx_check(0, VS_ADMIN) ? 1 : 0;
22324 + /* ... but setup type vcmds override that */
22325 + if (!permit && (flags & VCF_SETUP))
22326 + permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
22334 + if (!id && (flags & VCF_ZIDOK))
22338 + if (args & VCA_VXI) {
22339 + vxi = lookup_vx_info(id);
22343 + if ((flags & VCF_ADMIN) &&
22344 + /* special case kill for shutdown */
22345 + (cmd != VCMD_ctx_kill) &&
22346 + /* can context be administrated? */
22347 + !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
22353 + if (args & VCA_NXI) {
22354 + nxi = lookup_nx_info(id);
22358 + if ((flags & VCF_ADMIN) &&
22359 + /* can context be administrated? */
22360 + !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
22367 + ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
22370 + if ((args & VCA_NXI) && nxi)
22371 + put_nx_info(nxi);
22373 + if ((args & VCA_VXI) && vxi)
22374 + put_vx_info(vxi);
22376 + vxdprintk(VXD_CBIT(switch, 1),
22377 + "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
22378 + VC_CATEGORY(cmd), VC_COMMAND(cmd),
22379 + VC_VERSION(cmd), ret, ret, state, permit);
22384 +sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
22386 + return do_vserver(cmd, id, data, 0);
22389 +#ifdef CONFIG_COMPAT
22392 +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
22394 + return do_vserver(cmd, id, data, 1);
22397 +#endif /* CONFIG_COMPAT */
22398 diff -NurpP --minimal linux-4.9.217/kernel/vserver/sysctl.c linux-4.9.217-vs2.3.9.12/kernel/vserver/sysctl.c
22399 --- linux-4.9.217/kernel/vserver/sysctl.c 1970-01-01 00:00:00.000000000 +0000
22400 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/sysctl.c 2018-10-20 04:58:15.000000000 +0000
22403 + * kernel/vserver/sysctl.c
22405 + * Virtual Context Support
22407 + * Copyright (C) 2004-2007 Herbert P?tzl
22409 + * V0.01 basic structure
22413 +#include <linux/module.h>
22414 +#include <linux/ctype.h>
22415 +#include <linux/sysctl.h>
22416 +#include <linux/parser.h>
22417 +#include <linux/utsname.h>
22419 +#include <asm/uaccess.h>
22420 +#include <asm/sections.h>
22423 + CTL_DEBUG_ERROR = 0,
22424 + CTL_DEBUG_SWITCH = 1,
22440 +unsigned int vs_debug_switch = 0;
22441 +unsigned int vs_debug_xid = 0;
22442 +unsigned int vs_debug_nid = 0;
22443 +unsigned int vs_debug_tag = 0;
22444 +unsigned int vs_debug_net = 0;
22445 +unsigned int vs_debug_limit = 0;
22446 +unsigned int vs_debug_cres = 0;
22447 +unsigned int vs_debug_dlim = 0;
22448 +unsigned int vs_debug_quota = 0;
22449 +unsigned int vs_debug_cvirt = 0;
22450 +unsigned int vs_debug_space = 0;
22451 +unsigned int vs_debug_perm = 0;
22452 +unsigned int vs_debug_misc = 0;
22455 +static struct ctl_table_header *vserver_table_header;
22456 +static struct ctl_table vserver_root_table[];
22459 +void vserver_register_sysctl(void)
22461 + if (!vserver_table_header) {
22462 + vserver_table_header = register_sysctl_table(vserver_root_table);
22467 +void vserver_unregister_sysctl(void)
22469 + if (vserver_table_header) {
22470 + unregister_sysctl_table(vserver_table_header);
22471 + vserver_table_header = NULL;
22475 +static int proc_dodebug(struct ctl_table *table, int write,
22476 + void __user *buffer, size_t *lenp, loff_t *ppos)
22478 + char tmpbuf[20], *p, c;
22479 + unsigned int value;
22480 + size_t left, len;
22482 + if ((*ppos && !write) || !*lenp) {
22490 + if (!access_ok(VERIFY_READ, buffer, left))
22492 + p = (char *)buffer;
22493 + while (left && __get_user(c, p) >= 0 && isspace(c))
22498 + if (left > sizeof(tmpbuf) - 1)
22500 + if (copy_from_user(tmpbuf, p, left))
22502 + tmpbuf[left] = '\0';
22504 + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
22505 + value = 10 * value + (*p - '0');
22506 + if (*p && !isspace(*p))
22508 + while (left && isspace(*p))
22510 + *(unsigned int *)table->data = value;
22512 + if (!access_ok(VERIFY_WRITE, buffer, left))
22514 + len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
22517 + if (__copy_to_user(buffer, tmpbuf, len))
22519 + if ((left -= len) > 0) {
22520 + if (put_user('\n', (char *)buffer + len))
22534 +#define CTL_ENTRY(ctl, name) \
22536 + .procname = #name, \
22537 + .data = &vs_ ## name, \
22538 + .maxlen = sizeof(int), \
22540 + .proc_handler = &proc_dodebug, \
22541 + .extra1 = &zero, \
22542 + .extra2 = &zero, \
22545 +static struct ctl_table vserver_debug_table[] = {
22546 + CTL_ENTRY(CTL_DEBUG_SWITCH, debug_switch),
22547 + CTL_ENTRY(CTL_DEBUG_XID, debug_xid),
22548 + CTL_ENTRY(CTL_DEBUG_NID, debug_nid),
22549 + CTL_ENTRY(CTL_DEBUG_TAG, debug_tag),
22550 + CTL_ENTRY(CTL_DEBUG_NET, debug_net),
22551 + CTL_ENTRY(CTL_DEBUG_LIMIT, debug_limit),
22552 + CTL_ENTRY(CTL_DEBUG_CRES, debug_cres),
22553 + CTL_ENTRY(CTL_DEBUG_DLIM, debug_dlim),
22554 + CTL_ENTRY(CTL_DEBUG_QUOTA, debug_quota),
22555 + CTL_ENTRY(CTL_DEBUG_CVIRT, debug_cvirt),
22556 + CTL_ENTRY(CTL_DEBUG_SPACE, debug_space),
22557 + CTL_ENTRY(CTL_DEBUG_PERM, debug_perm),
22558 + CTL_ENTRY(CTL_DEBUG_MISC, debug_misc),
22562 +static struct ctl_table vserver_root_table[] = {
22564 + .procname = "vserver",
22566 + .child = vserver_debug_table
22572 +static match_table_t tokens = {
22573 + { CTL_DEBUG_SWITCH, "switch=%x" },
22574 + { CTL_DEBUG_XID, "xid=%x" },
22575 + { CTL_DEBUG_NID, "nid=%x" },
22576 + { CTL_DEBUG_TAG, "tag=%x" },
22577 + { CTL_DEBUG_NET, "net=%x" },
22578 + { CTL_DEBUG_LIMIT, "limit=%x" },
22579 + { CTL_DEBUG_CRES, "cres=%x" },
22580 + { CTL_DEBUG_DLIM, "dlim=%x" },
22581 + { CTL_DEBUG_QUOTA, "quota=%x" },
22582 + { CTL_DEBUG_CVIRT, "cvirt=%x" },
22583 + { CTL_DEBUG_SPACE, "space=%x" },
22584 + { CTL_DEBUG_PERM, "perm=%x" },
22585 + { CTL_DEBUG_MISC, "misc=%x" },
22586 + { CTL_DEBUG_ERROR, NULL }
22589 +#define HANDLE_CASE(id, name, val) \
22590 + case CTL_DEBUG_ ## id: \
22591 + vs_debug_ ## name = val; \
22592 + printk("vs_debug_" #name "=0x%x\n", val); \
22596 +static int __init vs_debug_setup(char *str)
22601 + printk("vs_debug_setup(%s)\n", str);
22602 + while ((p = strsep(&str, ",")) != NULL) {
22603 + substring_t args[MAX_OPT_ARGS];
22604 + unsigned int value;
22609 + token = match_token(p, tokens, args);
22610 + value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
22613 + HANDLE_CASE(SWITCH, switch, value);
22614 + HANDLE_CASE(XID, xid, value);
22615 + HANDLE_CASE(NID, nid, value);
22616 + HANDLE_CASE(TAG, tag, value);
22617 + HANDLE_CASE(NET, net, value);
22618 + HANDLE_CASE(LIMIT, limit, value);
22619 + HANDLE_CASE(CRES, cres, value);
22620 + HANDLE_CASE(DLIM, dlim, value);
22621 + HANDLE_CASE(QUOTA, quota, value);
22622 + HANDLE_CASE(CVIRT, cvirt, value);
22623 + HANDLE_CASE(SPACE, space, value);
22624 + HANDLE_CASE(PERM, perm, value);
22625 + HANDLE_CASE(MISC, misc, value);
22634 +__setup("vsdebug=", vs_debug_setup);
22638 +EXPORT_SYMBOL_GPL(vs_debug_switch);
22639 +EXPORT_SYMBOL_GPL(vs_debug_xid);
22640 +EXPORT_SYMBOL_GPL(vs_debug_nid);
22641 +EXPORT_SYMBOL_GPL(vs_debug_net);
22642 +EXPORT_SYMBOL_GPL(vs_debug_limit);
22643 +EXPORT_SYMBOL_GPL(vs_debug_cres);
22644 +EXPORT_SYMBOL_GPL(vs_debug_dlim);
22645 +EXPORT_SYMBOL_GPL(vs_debug_quota);
22646 +EXPORT_SYMBOL_GPL(vs_debug_cvirt);
22647 +EXPORT_SYMBOL_GPL(vs_debug_space);
22648 +EXPORT_SYMBOL_GPL(vs_debug_perm);
22649 +EXPORT_SYMBOL_GPL(vs_debug_misc);
22651 diff -NurpP --minimal linux-4.9.217/kernel/vserver/tag.c linux-4.9.217-vs2.3.9.12/kernel/vserver/tag.c
22652 --- linux-4.9.217/kernel/vserver/tag.c 1970-01-01 00:00:00.000000000 +0000
22653 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/tag.c 2018-10-20 04:58:15.000000000 +0000
22656 + * linux/kernel/vserver/tag.c
22658 + * Virtual Server: Shallow Tag Space
22660 + * Copyright (C) 2007 Herbert P?tzl
22662 + * V0.01 basic implementation
22666 +#include <linux/sched.h>
22667 +#include <linux/vserver/debug.h>
22668 +#include <linux/vs_pid.h>
22669 +#include <linux/vs_tag.h>
22671 +#include <linux/vserver/tag_cmd.h>
22674 +int dx_migrate_task(struct task_struct *p, vtag_t tag)
22679 + vxdprintk(VXD_CBIT(tag, 5),
22680 + "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
22686 + vxdprintk(VXD_CBIT(tag, 5),
22687 + "moved task %p into [#%d]", p, tag);
22691 +/* vserver syscall commands below here */
22693 +/* taks xid and vx_info functions */
22696 +int vc_task_tag(uint32_t id)
22701 + struct task_struct *tsk;
22703 + tsk = find_task_by_real_pid(id);
22704 + tag = (tsk) ? tsk->tag : -ESRCH;
22705 + rcu_read_unlock();
22707 + tag = dx_current_tag();
22712 +int vc_tag_migrate(uint32_t tag)
22714 + return dx_migrate_task(current, tag & 0xFFFF);
22718 diff -NurpP --minimal linux-4.9.217/kernel/vserver/vci_config.h linux-4.9.217-vs2.3.9.12/kernel/vserver/vci_config.h
22719 --- linux-4.9.217/kernel/vserver/vci_config.h 1970-01-01 00:00:00.000000000 +0000
22720 +++ linux-4.9.217-vs2.3.9.12/kernel/vserver/vci_config.h 2018-10-20 04:58:15.000000000 +0000
22723 +/* interface version */
22725 +#define VCI_VERSION 0x00020308
22729 + VCI_KCBIT_NO_DYNAMIC = 0,
22731 + VCI_KCBIT_PROC_SECURE = 4,
22732 + /* VCI_KCBIT_HARDCPU = 5, */
22733 + /* VCI_KCBIT_IDLELIMIT = 6, */
22734 + /* VCI_KCBIT_IDLETIME = 7, */
22736 + VCI_KCBIT_COWBL = 8,
22737 + VCI_KCBIT_FULLCOWBL = 9,
22738 + VCI_KCBIT_SPACES = 10,
22739 + VCI_KCBIT_NETV2 = 11,
22740 + VCI_KCBIT_MEMCG = 12,
22741 + VCI_KCBIT_MEMCG_SWAP = 13,
22743 + VCI_KCBIT_DEBUG = 16,
22744 + VCI_KCBIT_HISTORY = 20,
22745 + VCI_KCBIT_TAGGED = 24,
22746 + VCI_KCBIT_PPTAG = 28,
22748 + VCI_KCBIT_MORE = 31,
22752 +static inline uint32_t vci_kernel_config(void)
22755 + (1 << VCI_KCBIT_NO_DYNAMIC) |
22757 + /* configured features */
22758 +#ifdef CONFIG_VSERVER_PROC_SECURE
22759 + (1 << VCI_KCBIT_PROC_SECURE) |
22761 +#ifdef CONFIG_VSERVER_COWBL
22762 + (1 << VCI_KCBIT_COWBL) |
22763 + (1 << VCI_KCBIT_FULLCOWBL) |
22765 + (1 << VCI_KCBIT_SPACES) |
22766 + (1 << VCI_KCBIT_NETV2) |
22767 +#ifdef CONFIG_MEMCG
22768 + (1 << VCI_KCBIT_MEMCG) |
22770 +#ifdef CONFIG_MEMCG_SWAP
22771 + (1 << VCI_KCBIT_MEMCG_SWAP) |
22774 + /* debug options */
22775 +#ifdef CONFIG_VSERVER_DEBUG
22776 + (1 << VCI_KCBIT_DEBUG) |
22778 +#ifdef CONFIG_VSERVER_HISTORY
22779 + (1 << VCI_KCBIT_HISTORY) |
22782 + /* inode context tagging */
22783 +#if defined(CONFIG_TAGGING_NONE)
22784 + (0 << VCI_KCBIT_TAGGED) |
22785 +#elif defined(CONFIG_TAGGING_UID16)
22786 + (1 << VCI_KCBIT_TAGGED) |
22787 +#elif defined(CONFIG_TAGGING_GID16)
22788 + (2 << VCI_KCBIT_TAGGED) |
22789 +#elif defined(CONFIG_TAGGING_ID24)
22790 + (3 << VCI_KCBIT_TAGGED) |
22791 +#elif defined(CONFIG_TAGGING_INTERN)
22792 + (4 << VCI_KCBIT_TAGGED) |
22793 +#elif defined(CONFIG_TAGGING_RUNTIME)
22794 + (5 << VCI_KCBIT_TAGGED) |
22796 + (7 << VCI_KCBIT_TAGGED) |
22798 + (1 << VCI_KCBIT_PPTAG) |
22802 diff -NurpP --minimal linux-4.9.217/mm/memcontrol.c linux-4.9.217-vs2.3.9.12/mm/memcontrol.c
22803 --- linux-4.9.217/mm/memcontrol.c 2020-03-27 00:51:44.810116540 +0000
22804 +++ linux-4.9.217-vs2.3.9.12/mm/memcontrol.c 2020-04-01 09:40:32.745397028 +0000
22805 @@ -2854,6 +2854,41 @@ static u64 mem_cgroup_read_u64(struct cg
22809 +unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg)
22811 + return mem_cgroup_usage(memcg, false);
22814 +unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg)
22816 + return memcg->memory.limit;
22819 +unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg)
22821 + return mem_cgroup_usage(memcg, true);
22824 +unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg)
22826 + return memcg->memsw.limit;
22829 +void dump_mem_cgroup(struct mem_cgroup *memcg)
22831 + printk(KERN_INFO "memcg: %p/%d:\n"
22832 + "\tmemory:\t%lu/%lu %lu/%lu\n"
22833 + "\tmemsw:\t%lu/%lu %lu/%lu\n"
22834 + "\tkmem:\t%lu/%lu %lu/%lu\n",
22835 + memcg, memcg->id.id,
22836 + page_counter_read(&memcg->memory), memcg->memory.limit,
22837 + memcg->memory.watermark, memcg->memory.failcnt,
22838 + page_counter_read(&memcg->memsw), memcg->memsw.limit,
22839 + memcg->memsw.watermark, memcg->memsw.failcnt,
22840 + page_counter_read(&memcg->kmem), memcg->kmem.limit,
22841 + memcg->kmem.watermark, memcg->kmem.failcnt);
22844 #ifndef CONFIG_SLOB
22845 static int memcg_online_kmem(struct mem_cgroup *memcg)
22847 diff -NurpP --minimal linux-4.9.217/mm/oom_kill.c linux-4.9.217-vs2.3.9.12/mm/oom_kill.c
22848 --- linux-4.9.217/mm/oom_kill.c 2020-03-27 00:51:44.980113860 +0000
22849 +++ linux-4.9.217-vs2.3.9.12/mm/oom_kill.c 2019-02-22 08:37:56.143042249 +0000
22851 #include <linux/kthread.h>
22852 #include <linux/init.h>
22853 #include <linux/mmu_notifier.h>
22854 +#include <linux/reboot.h>
22855 +#include <linux/vs_context.h>
22857 #include <asm/tlb.h>
22858 #include "internal.h"
22859 @@ -142,11 +144,18 @@ static inline bool is_memcg_oom(struct o
22860 static bool oom_unkillable_task(struct task_struct *p,
22861 struct mem_cgroup *memcg, const nodemask_t *nodemask)
22863 - if (is_global_init(p))
22864 + unsigned xid = vx_current_xid();
22866 + /* skip the init task, global and per guest */
22867 + if (task_is_init(p))
22869 if (p->flags & PF_KTHREAD)
22872 + /* skip other guest and host processes if oom in guest */
22873 + if (xid && vx_task_xid(p) != xid)
22876 /* When mem_cgroup_out_of_memory() and p is not member of the group */
22877 if (memcg && !task_in_mem_cgroup(p, memcg))
22879 @@ -851,8 +860,8 @@ static void oom_kill_process(struct oom_
22880 if (__ratelimit(&oom_rs))
22881 dump_header(oc, p);
22883 - pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
22884 - message, task_pid_nr(p), p->comm, points);
22885 + pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
22886 + message, task_pid_nr(p), p->xid, p->comm, points);
22889 * If any of p's children has a different mm and is eligible for kill,
22890 @@ -910,8 +919,8 @@ static void oom_kill_process(struct oom_
22892 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
22893 mark_oom_victim(victim);
22894 - pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
22895 - task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
22896 + pr_err("Killed process %d:%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
22897 + task_pid_nr(victim), victim->xid, victim->comm, K(victim->mm->total_vm),
22898 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
22899 K(get_mm_counter(victim->mm, MM_FILEPAGES)),
22900 K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
22901 @@ -958,6 +967,8 @@ static void oom_kill_process(struct oom_
22905 +long vs_oom_action(unsigned int);
22908 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
22910 @@ -1063,7 +1074,12 @@ bool out_of_memory(struct oom_control *o
22911 /* Found nothing?!?! Either we hang forever, or we panic. */
22912 if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
22913 dump_header(oc, NULL);
22914 - panic("Out of memory and no killable processes...\n");
22916 + /* avoid panic for guest OOM */
22917 + if (vx_current_xid())
22918 + vs_oom_action(LINUX_REBOOT_CMD_OOM);
22920 + panic("Out of memory and no killable processes...\n");
22922 if (oc->chosen && oc->chosen != (void *)-1UL) {
22923 oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
22924 diff -NurpP --minimal linux-4.9.217/mm/page_alloc.c linux-4.9.217-vs2.3.9.12/mm/page_alloc.c
22925 --- linux-4.9.217/mm/page_alloc.c 2020-03-27 00:51:44.980113860 +0000
22926 +++ linux-4.9.217-vs2.3.9.12/mm/page_alloc.c 2020-04-01 09:45:52.210102682 +0000
22928 #include <linux/kthread.h>
22929 #include <linux/memcontrol.h>
22930 #include <linux/khugepaged.h>
22931 +#include <linux/vs_base.h>
22932 +#include <linux/vs_limit.h>
22934 #include <asm/sections.h>
22935 #include <asm/tlbflush.h>
22936 @@ -4164,14 +4166,17 @@ long si_mem_available(void)
22938 pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
22939 pagecache -= min(pagecache / 2, wmark_low);
22940 - available += pagecache;
22941 + if (!vx_flags(VXF_VIRT_MEM, 0))
22942 + available += pagecache;
22945 * Part of the reclaimable slab consists of items that are in use,
22946 * and cannot be freed. Cap this estimate at the low watermark.
22948 - available += global_page_state(NR_SLAB_RECLAIMABLE) -
22949 - min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
22950 + if (!vx_flags(VXF_VIRT_MEM, 0))
22951 + available += global_page_state(NR_SLAB_RECLAIMABLE) -
22952 + min(global_page_state(NR_SLAB_RECLAIMABLE) / 2,
22957 @@ -4188,6 +4193,9 @@ void si_meminfo(struct sysinfo *val)
22958 val->totalhigh = totalhigh_pages;
22959 val->freehigh = nr_free_highpages();
22960 val->mem_unit = PAGE_SIZE;
22962 + if (vx_flags(VXF_VIRT_MEM, 0))
22963 + vx_vsi_meminfo(val);
22966 EXPORT_SYMBOL(si_meminfo);
22967 @@ -4222,6 +4230,9 @@ void si_meminfo_node(struct sysinfo *val
22968 val->freehigh = free_highpages;
22970 val->mem_unit = PAGE_SIZE;
22972 + if (vx_flags(VXF_VIRT_MEM, 0))
22973 + vx_vsi_meminfo(val);
22977 diff -NurpP --minimal linux-4.9.217/mm/pgtable-generic.c linux-4.9.217-vs2.3.9.12/mm/pgtable-generic.c
22978 --- linux-4.9.217/mm/pgtable-generic.c 2016-12-11 19:17:54.000000000 +0000
22979 +++ linux-4.9.217-vs2.3.9.12/mm/pgtable-generic.c 2018-10-20 04:58:15.000000000 +0000
22981 * Copyright (C) 2010 Linus Torvalds
22984 +#include <linux/mm.h>
22986 #include <linux/pagemap.h>
22987 #include <asm/tlb.h>
22988 #include <asm-generic/pgtable.h>
22989 diff -NurpP --minimal linux-4.9.217/mm/shmem.c linux-4.9.217-vs2.3.9.12/mm/shmem.c
22990 --- linux-4.9.217/mm/shmem.c 2020-03-27 00:51:45.000113543 +0000
22991 +++ linux-4.9.217-vs2.3.9.12/mm/shmem.c 2019-12-25 15:37:52.838415659 +0000
22992 @@ -2803,7 +2803,7 @@ static int shmem_statfs(struct dentry *d
22994 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
22996 - buf->f_type = TMPFS_MAGIC;
22997 + buf->f_type = TMPFS_SUPER_MAGIC;
22998 buf->f_bsize = PAGE_SIZE;
22999 buf->f_namelen = NAME_MAX;
23000 if (sbinfo->max_blocks) {
23001 @@ -3628,7 +3628,7 @@ int shmem_fill_super(struct super_block
23002 sb->s_maxbytes = MAX_LFS_FILESIZE;
23003 sb->s_blocksize = PAGE_SIZE;
23004 sb->s_blocksize_bits = PAGE_SHIFT;
23005 - sb->s_magic = TMPFS_MAGIC;
23006 + sb->s_magic = TMPFS_SUPER_MAGIC;
23007 sb->s_op = &shmem_ops;
23008 sb->s_time_gran = 1;
23009 #ifdef CONFIG_TMPFS_XATTR
23010 diff -NurpP --minimal linux-4.9.217/mm/slab.c linux-4.9.217-vs2.3.9.12/mm/slab.c
23011 --- linux-4.9.217/mm/slab.c 2020-03-27 00:51:45.010113388 +0000
23012 +++ linux-4.9.217-vs2.3.9.12/mm/slab.c 2019-10-05 14:58:46.200299086 +0000
23013 @@ -307,6 +307,8 @@ static void kmem_cache_node_init(struct
23014 #define STATS_INC_FREEMISS(x) do { } while (0)
23017 +#include "slab_vs.h"
23022 @@ -3344,6 +3346,7 @@ slab_alloc_node(struct kmem_cache *cache
23023 /* ___cache_alloc_node can fall back to other nodes */
23024 ptr = ____cache_alloc_node(cachep, flags, nodeid);
23026 + vx_slab_alloc(cachep, flags);
23027 local_irq_restore(save_flags);
23028 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
23030 @@ -3525,6 +3528,7 @@ void ___cache_free(struct kmem_cache *ca
23032 kmemleak_free_recursive(objp, cachep->flags);
23033 objp = cache_free_debugcheck(cachep, objp, caller);
23034 + vx_slab_free(cachep);
23036 kmemcheck_slab_free(cachep, objp, cachep->object_size);
23038 diff -NurpP --minimal linux-4.9.217/mm/slab_vs.h linux-4.9.217-vs2.3.9.12/mm/slab_vs.h
23039 --- linux-4.9.217/mm/slab_vs.h 1970-01-01 00:00:00.000000000 +0000
23040 +++ linux-4.9.217-vs2.3.9.12/mm/slab_vs.h 2018-10-20 04:58:15.000000000 +0000
23043 +#include <linux/vserver/context.h>
23045 +#include <linux/vs_context.h>
23048 +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
23050 + int what = gfp_zone(cachep->allocflags);
23051 + struct vx_info *vxi = current_vx_info();
23056 + atomic_add(cachep->size, &vxi->cacct.slab[what]);
23060 +void vx_slab_free(struct kmem_cache *cachep)
23062 + int what = gfp_zone(cachep->allocflags);
23063 + struct vx_info *vxi = current_vx_info();
23068 + atomic_sub(cachep->size, &vxi->cacct.slab[what]);
23071 diff -NurpP --minimal linux-4.9.217/mm/swapfile.c linux-4.9.217-vs2.3.9.12/mm/swapfile.c
23072 --- linux-4.9.217/mm/swapfile.c 2020-03-27 00:51:45.030113073 +0000
23073 +++ linux-4.9.217-vs2.3.9.12/mm/swapfile.c 2018-10-20 05:55:43.000000000 +0000
23075 #include <asm/tlbflush.h>
23076 #include <linux/swapops.h>
23077 #include <linux/swap_cgroup.h>
23078 +#include <linux/vs_base.h>
23080 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
23082 @@ -2083,6 +2084,16 @@ static int swap_show(struct seq_file *sw
23084 if (si == SEQ_START_TOKEN) {
23085 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
23086 + if (vx_flags(VXF_VIRT_MEM, 0)) {
23087 + struct sysinfo si = { 0 };
23089 + vx_vsi_swapinfo(&si);
23090 + if (si.totalswap < (1 << 10))
23092 + seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
23093 + "hdv0", "partition", si.totalswap >> 10,
23094 + (si.totalswap - si.freeswap) >> 10, -1);
23099 @@ -2630,6 +2641,8 @@ void si_swapinfo(struct sysinfo *val)
23100 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
23101 val->totalswap = total_swap_pages + nr_to_be_unused;
23102 spin_unlock(&swap_lock);
23103 + if (vx_flags(VXF_VIRT_MEM, 0))
23104 + vx_vsi_swapinfo(val);
23108 diff -NurpP --minimal linux-4.9.217/net/bridge/br_multicast.c linux-4.9.217-vs2.3.9.12/net/bridge/br_multicast.c
23109 --- linux-4.9.217/net/bridge/br_multicast.c 2020-03-27 00:51:45.750101724 +0000
23110 +++ linux-4.9.217-vs2.3.9.12/net/bridge/br_multicast.c 2019-10-05 14:58:46.250298288 +0000
23111 @@ -465,7 +465,7 @@ static struct sk_buff *br_ip6_multicast_
23112 ip6h->hop_limit = 1;
23113 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
23114 if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
23116 + &ip6h->saddr, NULL)) {
23118 br->has_ipv6_addr = 0;
23120 diff -NurpP --minimal linux-4.9.217/net/core/dev.c linux-4.9.217-vs2.3.9.12/net/core/dev.c
23121 --- linux-4.9.217/net/core/dev.c 2020-03-27 00:51:45.850100149 +0000
23122 +++ linux-4.9.217-vs2.3.9.12/net/core/dev.c 2019-12-25 15:37:52.958413722 +0000
23123 @@ -126,6 +126,7 @@
23124 #include <linux/in.h>
23125 #include <linux/jhash.h>
23126 #include <linux/random.h>
23127 +#include <linux/vs_inet.h>
23128 #include <trace/events/napi.h>
23129 #include <trace/events/net.h>
23130 #include <trace/events/skb.h>
23131 @@ -730,7 +731,8 @@ struct net_device *__dev_get_by_name(str
23132 struct hlist_head *head = dev_name_hash(net, name);
23134 hlist_for_each_entry(dev, head, name_hlist)
23135 - if (!strncmp(dev->name, name, IFNAMSIZ))
23136 + if (!strncmp(dev->name, name, IFNAMSIZ) &&
23137 + nx_dev_visible(current_nx_info(), dev))
23141 @@ -755,7 +757,8 @@ struct net_device *dev_get_by_name_rcu(s
23142 struct hlist_head *head = dev_name_hash(net, name);
23144 hlist_for_each_entry_rcu(dev, head, name_hlist)
23145 - if (!strncmp(dev->name, name, IFNAMSIZ))
23146 + if (!strncmp(dev->name, name, IFNAMSIZ) &&
23147 + nx_dev_visible(current_nx_info(), dev))
23151 @@ -805,7 +808,8 @@ struct net_device *__dev_get_by_index(st
23152 struct hlist_head *head = dev_index_hash(net, ifindex);
23154 hlist_for_each_entry(dev, head, index_hlist)
23155 - if (dev->ifindex == ifindex)
23156 + if ((dev->ifindex == ifindex) &&
23157 + nx_dev_visible(current_nx_info(), dev))
23161 @@ -823,7 +827,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
23162 * about locking. The caller must hold RCU lock.
23165 -struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23166 +struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
23168 struct net_device *dev;
23169 struct hlist_head *head = dev_index_hash(net, ifindex);
23170 @@ -834,6 +838,16 @@ struct net_device *dev_get_by_index_rcu(
23174 +EXPORT_SYMBOL(dev_get_by_index_real_rcu);
23176 +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23178 + struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
23180 + if (nx_dev_visible(current_nx_info(), dev))
23184 EXPORT_SYMBOL(dev_get_by_index_rcu);
23187 @@ -916,7 +930,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
23189 for_each_netdev_rcu(net, dev)
23190 if (dev->type == type &&
23191 - !memcmp(dev->dev_addr, ha, dev->addr_len))
23192 + !memcmp(dev->dev_addr, ha, dev->addr_len) &&
23193 + nx_dev_visible(current_nx_info(), dev))
23197 @@ -928,9 +943,11 @@ struct net_device *__dev_getfirstbyhwtyp
23198 struct net_device *dev;
23201 - for_each_netdev(net, dev)
23202 - if (dev->type == type)
23203 + for_each_netdev(net, dev) {
23204 + if ((dev->type == type) &&
23205 + nx_dev_visible(current_nx_info(), dev))
23211 @@ -942,7 +959,8 @@ struct net_device *dev_getfirstbyhwtype(
23214 for_each_netdev_rcu(net, dev)
23215 - if (dev->type == type) {
23216 + if ((dev->type == type) &&
23217 + nx_dev_visible(current_nx_info(), dev)) {
23221 @@ -972,7 +990,8 @@ struct net_device *__dev_get_by_flags(st
23224 for_each_netdev(net, dev) {
23225 - if (((dev->flags ^ if_flags) & mask) == 0) {
23226 + if ((((dev->flags ^ if_flags) & mask) == 0) &&
23227 + nx_dev_visible(current_nx_info(), dev)) {
23231 @@ -1050,6 +1069,8 @@ static int __dev_alloc_name(struct net *
23233 if (i < 0 || i >= max_netdevices)
23235 + if (!nx_dev_visible(current_nx_info(), d))
23238 /* avoid cases where sscanf is not exact inverse of printf */
23239 snprintf(buf, IFNAMSIZ, name, i);
23240 diff -NurpP --minimal linux-4.9.217/net/core/net-procfs.c linux-4.9.217-vs2.3.9.12/net/core/net-procfs.c
23241 --- linux-4.9.217/net/core/net-procfs.c 2016-12-11 19:17:54.000000000 +0000
23242 +++ linux-4.9.217-vs2.3.9.12/net/core/net-procfs.c 2018-10-20 04:58:15.000000000 +0000
23244 #include <linux/netdevice.h>
23245 #include <linux/proc_fs.h>
23246 #include <linux/seq_file.h>
23247 +#include <linux/vs_inet.h>
23248 #include <net/wext.h>
23250 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
23251 @@ -77,8 +78,13 @@ static void dev_seq_stop(struct seq_file
23252 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
23254 struct rtnl_link_stats64 temp;
23255 - const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
23256 + const struct rtnl_link_stats64 *stats;
23258 + /* device visible inside network context? */
23259 + if (!nx_dev_visible(current_nx_info(), dev))
23262 + stats = dev_get_stats(dev, &temp);
23263 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
23264 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
23265 dev->name, stats->rx_bytes, stats->rx_packets,
23266 diff -NurpP --minimal linux-4.9.217/net/core/rtnetlink.c linux-4.9.217-vs2.3.9.12/net/core/rtnetlink.c
23267 --- linux-4.9.217/net/core/rtnetlink.c 2020-03-27 00:51:45.900099361 +0000
23268 +++ linux-4.9.217-vs2.3.9.12/net/core/rtnetlink.c 2019-12-25 15:37:52.958413722 +0000
23269 @@ -1615,6 +1615,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
23273 + if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
23275 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
23276 NETLINK_CB(cb->skb).portid,
23277 cb->nlh->nlmsg_seq, 0,
23278 @@ -2841,6 +2843,9 @@ void rtmsg_ifinfo(int type, struct net_d
23280 struct sk_buff *skb;
23282 + if (!nx_dev_visible(current_nx_info(), dev))
23285 if (dev->reg_state != NETREG_REGISTERED)
23288 diff -NurpP --minimal linux-4.9.217/net/core/sock.c linux-4.9.217-vs2.3.9.12/net/core/sock.c
23289 --- linux-4.9.217/net/core/sock.c 2020-03-27 00:51:45.900099361 +0000
23290 +++ linux-4.9.217-vs2.3.9.12/net/core/sock.c 2020-02-26 14:31:15.988223949 +0000
23291 @@ -135,6 +135,10 @@
23293 #include <linux/filter.h>
23294 #include <net/sock_reuseport.h>
23295 +#include <linux/vs_socket.h>
23296 +#include <linux/vs_limit.h>
23297 +#include <linux/vs_context.h>
23298 +#include <linux/vs_network.h>
23300 #include <trace/events/sock.h>
23302 @@ -1339,6 +1343,9 @@ static struct sock *sk_prot_alloc(struct
23303 if (!try_module_get(prot->owner))
23305 sk_tx_queue_clear(sk);
23307 + sock_vx_init(sk);
23308 + sock_nx_init(sk);
23312 @@ -1443,6 +1450,11 @@ static void __sk_destruct(struct rcu_hea
23313 put_pid(sk->sk_peer_pid);
23314 if (likely(sk->sk_net_refcnt))
23315 put_net(sock_net(sk));
23317 + clr_vx_info(&sk->sk_vx_info);
23319 + clr_nx_info(&sk->sk_nx_info);
23321 sk_prot_free(sk->sk_prot_creator, sk);
23324 @@ -1504,6 +1516,8 @@ struct sock *sk_clone_lock(const struct
23326 if (likely(newsk->sk_net_refcnt))
23327 get_net(sock_net(newsk));
23328 + sock_vx_init(newsk);
23329 + sock_nx_init(newsk);
23330 sk_node_init(&newsk->sk_node);
23331 sock_lock_init(newsk);
23332 bh_lock_sock(newsk);
23333 @@ -1574,6 +1588,12 @@ struct sock *sk_clone_lock(const struct
23335 atomic_set(&newsk->sk_refcnt, 2);
23337 + set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
23338 + newsk->sk_xid = sk->sk_xid;
23339 + vx_sock_inc(newsk);
23340 + set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
23341 + newsk->sk_nid = sk->sk_nid;
23344 * Increment the counter in the same struct proto as the master
23345 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
23346 @@ -2477,6 +2497,12 @@ void sock_init_data(struct socket *sock,
23347 seqlock_init(&sk->sk_stamp_seq);
23350 + set_vx_info(&sk->sk_vx_info, current_vx_info());
23351 + sk->sk_xid = vx_current_xid();
23353 + set_nx_info(&sk->sk_nx_info, current_nx_info());
23354 + sk->sk_nid = nx_current_nid();
23356 #ifdef CONFIG_NET_RX_BUSY_POLL
23357 sk->sk_napi_id = 0;
23358 sk->sk_ll_usec = sysctl_net_busy_read;
23359 diff -NurpP --minimal linux-4.9.217/net/ipv4/af_inet.c linux-4.9.217-vs2.3.9.12/net/ipv4/af_inet.c
23360 --- linux-4.9.217/net/ipv4/af_inet.c 2020-03-27 00:51:46.330092583 +0000
23361 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/af_inet.c 2018-10-20 05:55:44.000000000 +0000
23362 @@ -303,10 +303,15 @@ lookup_protocol:
23366 + if ((protocol == IPPROTO_ICMP) &&
23367 + nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
23370 if (sock->type == SOCK_RAW && !kern &&
23371 !ns_capable(net->user_ns, CAP_NET_RAW))
23372 goto out_rcu_unlock;
23375 sock->ops = answer->ops;
23376 answer_prot = answer->prot;
23377 answer_flags = answer->flags;
23378 @@ -424,6 +429,7 @@ int inet_bind(struct socket *sock, struc
23379 struct sock *sk = sock->sk;
23380 struct inet_sock *inet = inet_sk(sk);
23381 struct net *net = sock_net(sk);
23382 + struct nx_v4_sock_addr nsa;
23383 unsigned short snum;
23385 u32 tb_id = RT_TABLE_LOCAL;
23386 @@ -449,7 +455,11 @@ int inet_bind(struct socket *sock, struc
23389 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
23390 - chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
23391 + err = v4_map_sock_addr(inet, addr, &nsa);
23395 + chk_addr_ret = inet_addr_type_table(net, nsa.saddr, tb_id);
23397 /* Not specified by any standard per-se, however it breaks too
23398 * many applications when removed. It is unfortunate since
23399 @@ -461,7 +471,7 @@ int inet_bind(struct socket *sock, struc
23400 err = -EADDRNOTAVAIL;
23401 if (!net->ipv4.sysctl_ip_nonlocal_bind &&
23402 !(inet->freebind || inet->transparent) &&
23403 - addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
23404 + nsa.saddr != htonl(INADDR_ANY) &&
23405 chk_addr_ret != RTN_LOCAL &&
23406 chk_addr_ret != RTN_MULTICAST &&
23407 chk_addr_ret != RTN_BROADCAST)
23408 @@ -487,7 +497,7 @@ int inet_bind(struct socket *sock, struc
23409 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
23410 goto out_release_sock;
23412 - inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23413 + v4_set_sock_addr(inet, &nsa);
23414 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23415 inet->inet_saddr = 0; /* Use device */
23417 @@ -706,11 +716,13 @@ int inet_getname(struct socket *sock, st
23420 sin->sin_port = inet->inet_dport;
23421 - sin->sin_addr.s_addr = inet->inet_daddr;
23422 + sin->sin_addr.s_addr =
23423 + nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
23425 __be32 addr = inet->inet_rcv_saddr;
23427 addr = inet->inet_saddr;
23428 + addr = nx_map_sock_lback(sk->sk_nx_info, addr);
23429 sin->sin_port = inet->inet_sport;
23430 sin->sin_addr.s_addr = addr;
23432 @@ -894,6 +906,7 @@ static int inet_compat_ioctl(struct sock
23436 +#include <linux/vs_limit.h>
23438 const struct proto_ops inet_stream_ops = {
23440 diff -NurpP --minimal linux-4.9.217/net/ipv4/arp.c linux-4.9.217-vs2.3.9.12/net/ipv4/arp.c
23441 --- linux-4.9.217/net/ipv4/arp.c 2020-03-27 00:51:46.330092583 +0000
23442 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/arp.c 2018-10-20 04:58:15.000000000 +0000
23443 @@ -1320,6 +1320,7 @@ static void arp_format_neigh_entry(struc
23444 struct net_device *dev = n->dev;
23445 int hatype = dev->type;
23447 + /* FIXME: check for network context */
23448 read_lock(&n->lock);
23449 /* Convert hardware address to XX:XX:XX:XX ... form. */
23450 #if IS_ENABLED(CONFIG_AX25)
23451 @@ -1351,6 +1352,7 @@ static void arp_format_pneigh_entry(stru
23452 int hatype = dev ? dev->type : 0;
23455 + /* FIXME: check for network context */
23456 sprintf(tbuf, "%pI4", n->key);
23457 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
23458 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
23459 diff -NurpP --minimal linux-4.9.217/net/ipv4/devinet.c linux-4.9.217-vs2.3.9.12/net/ipv4/devinet.c
23460 --- linux-4.9.217/net/ipv4/devinet.c 2020-03-27 00:51:46.330092583 +0000
23461 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/devinet.c 2019-12-25 15:37:52.998413076 +0000
23462 @@ -546,6 +546,7 @@ struct in_device *inetdev_by_index(struc
23464 EXPORT_SYMBOL(inetdev_by_index);
23467 /* Called only from RTNL semaphored context. No locks. */
23469 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
23470 @@ -1000,6 +1001,8 @@ int devinet_ioctl(struct net *net, unsig
23472 in_dev = __in_dev_get_rtnl(dev);
23474 + struct nx_info *nxi = current_nx_info();
23476 if (tryaddrmatch) {
23477 /* Matthias Andree */
23478 /* compare label and address (4.4BSD style) */
23479 @@ -1008,6 +1011,8 @@ int devinet_ioctl(struct net *net, unsig
23480 This is checked above. */
23481 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23482 ifap = &ifa->ifa_next) {
23483 + if (!nx_v4_ifa_visible(nxi, ifa))
23485 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
23486 sin_orig.sin_addr.s_addr ==
23488 @@ -1020,9 +1025,12 @@ int devinet_ioctl(struct net *net, unsig
23489 comparing just the label */
23491 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23492 - ifap = &ifa->ifa_next)
23493 + ifap = &ifa->ifa_next) {
23494 + if (!nx_v4_ifa_visible(nxi, ifa))
23496 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
23502 @@ -1176,6 +1184,8 @@ static int inet_gifconf(struct net_devic
23505 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
23506 + if (!nx_v4_ifa_visible(current_nx_info(), ifa))
23509 done += sizeof(ifr);
23511 @@ -1598,6 +1608,7 @@ static int inet_dump_ifaddr(struct sk_bu
23512 struct net_device *dev;
23513 struct in_device *in_dev;
23514 struct in_ifaddr *ifa;
23515 + struct sock *sk = skb->sk;
23516 struct hlist_head *head;
23519 @@ -1621,6 +1632,8 @@ static int inet_dump_ifaddr(struct sk_bu
23521 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
23522 ifa = ifa->ifa_next, ip_idx++) {
23523 + if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
23525 if (ip_idx < s_ip_idx)
23527 if (inet_fill_ifaddr(skb, ifa,
23528 diff -NurpP --minimal linux-4.9.217/net/ipv4/fib_trie.c linux-4.9.217-vs2.3.9.12/net/ipv4/fib_trie.c
23529 --- linux-4.9.217/net/ipv4/fib_trie.c 2020-03-27 00:51:46.330092583 +0000
23530 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/fib_trie.c 2019-02-22 08:37:56.413037657 +0000
23531 @@ -2627,6 +2627,7 @@ static int fib_route_seq_show(struct seq
23533 seq_setwidth(seq, 127);
23535 + /* FIXME: check for network context? */
23538 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
23539 diff -NurpP --minimal linux-4.9.217/net/ipv4/inet_connection_sock.c linux-4.9.217-vs2.3.9.12/net/ipv4/inet_connection_sock.c
23540 --- linux-4.9.217/net/ipv4/inet_connection_sock.c 2020-03-27 00:51:46.340092422 +0000
23541 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/inet_connection_sock.c 2019-10-05 14:58:46.260298130 +0000
23543 #include <linux/module.h>
23544 #include <linux/jhash.h>
23546 +#include <net/addrconf.h>
23547 #include <net/inet_connection_sock.h>
23548 #include <net/inet_hashtables.h>
23549 #include <net/inet_timewait_sock.h>
23550 @@ -44,6 +45,7 @@ void inet_get_local_port_range(struct ne
23552 EXPORT_SYMBOL(inet_get_local_port_range);
23555 int inet_csk_bind_conflict(const struct sock *sk,
23556 const struct inet_bind_bucket *tb, bool relax)
23558 @@ -72,15 +74,13 @@ int inet_csk_bind_conflict(const struct
23559 (sk2->sk_state != TCP_TIME_WAIT &&
23560 !uid_eq(uid, sock_i_uid(sk2))))) {
23562 - if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23563 - sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
23564 + if (ipv4_rcv_saddr_equal(sk, sk2, true))
23567 if (!relax && reuse && sk2->sk_reuse &&
23568 sk2->sk_state != TCP_LISTEN) {
23570 - if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23571 - sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
23572 + if (ipv4_rcv_saddr_equal(sk, sk2, true))
23576 diff -NurpP --minimal linux-4.9.217/net/ipv4/inet_diag.c linux-4.9.217-vs2.3.9.12/net/ipv4/inet_diag.c
23577 --- linux-4.9.217/net/ipv4/inet_diag.c 2020-03-27 00:51:46.350092266 +0000
23578 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/inet_diag.c 2020-04-01 09:40:33.315387585 +0000
23581 #include <linux/inet.h>
23582 #include <linux/stddef.h>
23583 +#include <linux/vs_network.h>
23584 +#include <linux/vs_inet.h>
23586 #include <linux/inet_diag.h>
23587 #include <linux/sock_diag.h>
23588 @@ -87,8 +89,8 @@ void inet_diag_msg_common_fill(struct in
23589 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
23590 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
23592 - r->id.idiag_src[0] = sk->sk_rcv_saddr;
23593 - r->id.idiag_dst[0] = sk->sk_daddr;
23594 + r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_rcv_saddr);
23595 + r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_daddr);
23598 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
23599 @@ -880,6 +882,9 @@ void inet_diag_dump_icsk(struct inet_has
23600 if (!net_eq(sock_net(sk), net))
23603 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23609 @@ -942,6 +947,8 @@ skip_listen_ht:
23611 if (!net_eq(sock_net(sk), net))
23613 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23617 state = (sk->sk_state == TCP_TIME_WAIT) ?
23618 diff -NurpP --minimal linux-4.9.217/net/ipv4/inet_hashtables.c linux-4.9.217-vs2.3.9.12/net/ipv4/inet_hashtables.c
23619 --- linux-4.9.217/net/ipv4/inet_hashtables.c 2020-03-27 00:51:46.350092266 +0000
23620 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/inet_hashtables.c 2020-04-01 09:40:33.335387251 +0000
23622 #include <net/inet_connection_sock.h>
23623 #include <net/inet_hashtables.h>
23624 #include <net/secure_seq.h>
23625 +#include <net/route.h>
23626 #include <net/ip.h>
23627 #include <net/tcp.h>
23628 #include <net/sock_reuseport.h>
23629 @@ -186,6 +187,11 @@ static inline int compute_score(struct s
23630 if (rcv_saddr != daddr)
23634 + /* block non nx_info ips */
23635 + if (!v4_addr_in_nx_info(sk->sk_nx_info,
23636 + daddr, NXA_MASK_BIND))
23639 if (sk->sk_bound_dev_if || exact_dif) {
23640 if (sk->sk_bound_dev_if != dif)
23641 @@ -301,6 +307,7 @@ begin:
23647 * if the nulls value we got at the end of this lookup is
23648 * not the expected one, we must restart lookup.
23649 diff -NurpP --minimal linux-4.9.217/net/ipv4/netfilter.c linux-4.9.217-vs2.3.9.12/net/ipv4/netfilter.c
23650 --- linux-4.9.217/net/ipv4/netfilter.c 2020-03-27 00:51:46.400091478 +0000
23651 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/netfilter.c 2018-10-20 04:58:15.000000000 +0000
23653 #include <linux/skbuff.h>
23654 #include <linux/gfp.h>
23655 #include <linux/export.h>
23656 -#include <net/route.h>
23657 +// #include <net/route.h>
23658 #include <net/xfrm.h>
23659 #include <net/ip.h>
23660 #include <net/netfilter/nf_queue.h>
23661 diff -NurpP --minimal linux-4.9.217/net/ipv4/raw.c linux-4.9.217-vs2.3.9.12/net/ipv4/raw.c
23662 --- linux-4.9.217/net/ipv4/raw.c 2020-03-27 00:51:46.490090059 +0000
23663 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/raw.c 2019-10-05 14:58:46.260298130 +0000
23664 @@ -128,7 +128,7 @@ static struct sock *__raw_v4_lookup(stru
23666 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
23667 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
23668 - !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
23669 + v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
23670 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23671 goto found; /* gotcha */
23673 @@ -418,6 +418,12 @@ static int raw_send_hdrinc(struct sock *
23674 skb_transport_header(skb))->type);
23678 + if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
23679 + sk->sk_nx_info &&
23680 + !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
23683 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
23684 net, sk, skb, NULL, rt->dst.dev,
23686 @@ -623,6 +629,16 @@ static int raw_sendmsg(struct sock *sk,
23690 + if (sk->sk_nx_info) {
23691 + rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
23692 + if (IS_ERR(rt)) {
23693 + err = PTR_ERR(rt);
23700 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
23701 rt = ip_route_output_flow(net, &fl4, sk);
23703 @@ -701,17 +717,19 @@ static int raw_bind(struct sock *sk, str
23705 struct inet_sock *inet = inet_sk(sk);
23706 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
23707 + struct nx_v4_sock_addr nsa = { 0 };
23711 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
23713 - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
23714 + v4_map_sock_addr(inet, addr, &nsa);
23715 + chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
23716 ret = -EADDRNOTAVAIL;
23717 - if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
23718 + if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
23719 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
23721 - inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23722 + v4_set_sock_addr(inet, &nsa);
23723 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23724 inet->inet_saddr = 0; /* Use device */
23726 @@ -760,7 +778,8 @@ static int raw_recvmsg(struct sock *sk,
23727 /* Copy the address. */
23729 sin->sin_family = AF_INET;
23730 - sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23731 + sin->sin_addr.s_addr =
23732 + nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
23734 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
23735 *addr_len = sizeof(*sin);
23736 @@ -956,7 +975,8 @@ static struct sock *raw_get_first(struct
23737 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
23739 sk_for_each(sk, &state->h->ht[state->bucket])
23740 - if (sock_net(sk) == seq_file_net(seq))
23741 + if ((sock_net(sk) == seq_file_net(seq)) &&
23742 + nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23746 @@ -972,7 +992,8 @@ static struct sock *raw_get_next(struct
23750 - } while (sk && sock_net(sk) != seq_file_net(seq));
23751 + } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
23752 + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23754 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
23755 sk = sk_head(&state->h->ht[state->bucket]);
23756 diff -NurpP --minimal linux-4.9.217/net/ipv4/route.c linux-4.9.217-vs2.3.9.12/net/ipv4/route.c
23757 --- linux-4.9.217/net/ipv4/route.c 2020-03-27 00:51:46.490090059 +0000
23758 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/route.c 2019-12-25 15:37:52.998413076 +0000
23759 @@ -2289,7 +2289,7 @@ struct rtable *__ip_route_output_key_has
23762 if (fl4->flowi4_oif) {
23763 - dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
23764 + dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
23765 rth = ERR_PTR(-ENODEV);
23768 diff -NurpP --minimal linux-4.9.217/net/ipv4/tcp.c linux-4.9.217-vs2.3.9.12/net/ipv4/tcp.c
23769 --- linux-4.9.217/net/ipv4/tcp.c 2020-03-27 00:51:46.490090059 +0000
23770 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/tcp.c 2020-04-01 09:40:33.355386921 +0000
23771 @@ -269,6 +269,7 @@
23772 #include <linux/err.h>
23773 #include <linux/time.h>
23774 #include <linux/slab.h>
23775 +#include <linux/in.h>
23777 #include <net/icmp.h>
23778 #include <net/inet_common.h>
23779 diff -NurpP --minimal linux-4.9.217/net/ipv4/tcp_ipv4.c linux-4.9.217-vs2.3.9.12/net/ipv4/tcp_ipv4.c
23780 --- linux-4.9.217/net/ipv4/tcp_ipv4.c 2020-03-27 00:51:46.520089586 +0000
23781 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/tcp_ipv4.c 2020-04-01 09:42:52.973073408 +0000
23782 @@ -1935,8 +1935,12 @@ get_head:
23783 sk = sk_nulls_next(sk);
23785 sk_nulls_for_each_from(sk, node) {
23786 + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
23787 + sk, sk->sk_nid, nx_current_nid());
23788 if (!net_eq(sock_net(sk), net))
23790 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23792 if (sk->sk_family == st->family)
23795 @@ -1990,6 +1994,11 @@ static void *established_get_first(struc
23797 spin_lock_bh(lock);
23798 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
23799 + vxdprintk(VXD_CBIT(net, 6),
23800 + "sk,egf: %p [#%d] (from %d)",
23801 + sk, sk->sk_nid, nx_current_nid());
23802 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23804 if (sk->sk_family != st->family ||
23805 !net_eq(sock_net(sk), net)) {
23807 @@ -2016,6 +2025,11 @@ static void *established_get_next(struct
23808 sk = sk_nulls_next(sk);
23810 sk_nulls_for_each_from(sk, node) {
23811 + vxdprintk(VXD_CBIT(net, 6),
23812 + "sk,egn: %p [#%d] (from %d)",
23813 + sk, sk->sk_nid, nx_current_nid());
23814 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23816 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
23819 @@ -2207,9 +2221,9 @@ static void get_openreq4(const struct re
23820 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
23821 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
23823 - ireq->ir_loc_addr,
23824 + nx_map_sock_lback(current_nx_info(), ireq->ir_loc_addr),
23826 - ireq->ir_rmt_addr,
23827 + nx_map_sock_lback(current_nx_info(), ireq->ir_rmt_addr),
23828 ntohs(ireq->ir_rmt_port),
23830 0, 0, /* could print option size, but that is af dependent. */
23831 @@ -2232,8 +2246,8 @@ static void get_tcp4_sock(struct sock *s
23832 const struct inet_connection_sock *icsk = inet_csk(sk);
23833 const struct inet_sock *inet = inet_sk(sk);
23834 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
23835 - __be32 dest = inet->inet_daddr;
23836 - __be32 src = inet->inet_rcv_saddr;
23837 + __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23838 + __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23839 __u16 destp = ntohs(inet->inet_dport);
23840 __u16 srcp = ntohs(inet->inet_sport);
23842 @@ -2292,8 +2306,8 @@ static void get_timewait4_sock(const str
23846 - dest = tw->tw_daddr;
23847 - src = tw->tw_rcv_saddr;
23848 + dest = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
23849 + src = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
23850 destp = ntohs(tw->tw_dport);
23851 srcp = ntohs(tw->tw_sport);
23853 diff -NurpP --minimal linux-4.9.217/net/ipv4/tcp_minisocks.c linux-4.9.217-vs2.3.9.12/net/ipv4/tcp_minisocks.c
23854 --- linux-4.9.217/net/ipv4/tcp_minisocks.c 2020-03-27 00:51:46.520089586 +0000
23855 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/tcp_minisocks.c 2018-10-20 05:55:44.000000000 +0000
23857 #include <linux/slab.h>
23858 #include <linux/sysctl.h>
23859 #include <linux/workqueue.h>
23860 +#include <linux/vs_limit.h>
23861 +#include <linux/vs_socket.h>
23862 +#include <linux/vs_context.h>
23863 #include <net/tcp.h>
23864 #include <net/inet_common.h>
23865 #include <net/xfrm.h>
23866 @@ -286,6 +289,11 @@ void tcp_time_wait(struct sock *sk, int
23867 tcptw->tw_ts_offset = tp->tsoffset;
23868 tcptw->tw_last_oow_ack_time = 0;
23870 + tw->tw_xid = sk->sk_xid;
23871 + tw->tw_vx_info = NULL;
23872 + tw->tw_nid = sk->sk_nid;
23873 + tw->tw_nx_info = NULL;
23875 #if IS_ENABLED(CONFIG_IPV6)
23876 if (tw->tw_family == PF_INET6) {
23877 struct ipv6_pinfo *np = inet6_sk(sk);
23878 diff -NurpP --minimal linux-4.9.217/net/ipv4/udp.c linux-4.9.217-vs2.3.9.12/net/ipv4/udp.c
23879 --- linux-4.9.217/net/ipv4/udp.c 2020-03-27 00:51:46.530089432 +0000
23880 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/udp.c 2019-10-05 14:58:46.260298130 +0000
23881 @@ -361,12 +361,26 @@ int ipv4_rcv_saddr_equal(const struct so
23882 bool match_wildcard)
23884 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
23885 + __be32 sk1_rcv_saddr = inet1->inet_rcv_saddr,
23886 + sk2_rcv_saddr = inet2->inet_rcv_saddr;
23888 - if (!ipv6_only_sock(sk2)) {
23889 - if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
23891 - if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
23892 - return match_wildcard;
23893 + if (ipv6_only_sock(sk2))
23896 + if (sk1_rcv_saddr && sk2_rcv_saddr && sk1_rcv_saddr == sk2_rcv_saddr)
23899 + if (match_wildcard) {
23900 + if (!sk2_rcv_saddr && !sk1_rcv_saddr)
23901 + return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
23903 + if (!sk2_rcv_saddr && sk1_rcv_saddr)
23904 + return v4_addr_in_nx_info(sk2->sk_nx_info,
23905 + sk1_rcv_saddr, NXA_MASK_BIND);
23907 + if (!sk1_rcv_saddr && sk2_rcv_saddr)
23908 + return v4_addr_in_nx_info(sk1->sk_nx_info,
23909 + sk2_rcv_saddr, NXA_MASK_BIND);
23913 @@ -408,6 +422,11 @@ static int compute_score(struct sock *sk
23914 if (inet->inet_rcv_saddr != daddr)
23918 + /* block non nx_info ips */
23919 + if (!v4_addr_in_nx_info(sk->sk_nx_info,
23920 + daddr, NXA_MASK_BIND))
23924 if (inet->inet_daddr) {
23925 @@ -483,6 +502,7 @@ static struct sock *udp4_lib_lookup2(str
23930 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
23931 * harder than this. -DaveM
23933 @@ -607,7 +627,7 @@ static inline bool __udp_is_mcast_sock(s
23934 udp_sk(sk)->udp_port_hash != hnum ||
23935 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
23936 (inet->inet_dport != rmt_port && inet->inet_dport) ||
23937 - (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
23938 + !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
23939 ipv6_only_sock(sk) ||
23940 (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23942 @@ -1024,6 +1044,16 @@ int udp_sendmsg(struct sock *sk, struct
23944 faddr, saddr, dport, inet->inet_sport);
23946 + if (sk->sk_nx_info) {
23947 + rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
23948 + if (IS_ERR(rt)) {
23949 + err = PTR_ERR(rt);
23956 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
23957 rt = ip_route_output_flow(net, fl4, sk);
23959 @@ -1321,7 +1351,8 @@ try_again:
23961 sin->sin_family = AF_INET;
23962 sin->sin_port = udp_hdr(skb)->source;
23963 - sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23964 + sin->sin_addr.s_addr = nx_map_sock_lback(
23965 + skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
23966 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
23967 *addr_len = sizeof(*sin);
23969 @@ -2291,6 +2322,8 @@ static struct sock *udp_get_first(struct
23970 sk_for_each(sk, &hslot->head) {
23971 if (!net_eq(sock_net(sk), net))
23973 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23975 if (sk->sk_family == state->family)
23978 @@ -2308,7 +2341,9 @@ static struct sock *udp_get_next(struct
23982 - } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
23983 + } while (sk && (!net_eq(sock_net(sk), net) ||
23984 + sk->sk_family != state->family ||
23985 + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23988 if (state->bucket <= state->udp_table->mask)
23989 @@ -2404,8 +2439,8 @@ static void udp4_format_sock(struct sock
23992 struct inet_sock *inet = inet_sk(sp);
23993 - __be32 dest = inet->inet_daddr;
23994 - __be32 src = inet->inet_rcv_saddr;
23995 + __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23996 + __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23997 __u16 destp = ntohs(inet->inet_dport);
23998 __u16 srcp = ntohs(inet->inet_sport);
24000 diff -NurpP --minimal linux-4.9.217/net/ipv4/udp_diag.c linux-4.9.217-vs2.3.9.12/net/ipv4/udp_diag.c
24001 --- linux-4.9.217/net/ipv4/udp_diag.c 2016-12-11 19:17:54.000000000 +0000
24002 +++ linux-4.9.217-vs2.3.9.12/net/ipv4/udp_diag.c 2018-10-20 06:31:18.000000000 +0000
24003 @@ -120,6 +120,8 @@ static void udp_dump(struct udp_table *t
24005 if (!net_eq(sock_net(sk), net))
24007 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24011 if (!(r->idiag_states & (1 << sk->sk_state)))
24012 diff -NurpP --minimal linux-4.9.217/net/ipv6/addrconf.c linux-4.9.217-vs2.3.9.12/net/ipv6/addrconf.c
24013 --- linux-4.9.217/net/ipv6/addrconf.c 2020-03-27 00:51:46.560088955 +0000
24014 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/addrconf.c 2020-04-01 09:40:33.385386423 +0000
24016 #include <linux/proc_fs.h>
24017 #include <linux/seq_file.h>
24018 #include <linux/export.h>
24019 +#include <linux/vs_network.h>
24021 /* Set to 3 to get tracing... */
24022 #define ACONF_DEBUG 2
24023 @@ -1498,7 +1499,8 @@ static int __ipv6_dev_get_saddr(struct n
24024 struct ipv6_saddr_dst *dst,
24025 struct inet6_dev *idev,
24026 struct ipv6_saddr_score *scores,
24029 + struct nx_info *nxi)
24031 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
24033 @@ -1528,6 +1530,8 @@ static int __ipv6_dev_get_saddr(struct n
24037 + if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
24041 bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
24042 @@ -1578,26 +1582,27 @@ static int ipv6_get_saddr_master(struct
24043 const struct net_device *master,
24044 struct ipv6_saddr_dst *dst,
24045 struct ipv6_saddr_score *scores,
24048 + struct nx_info *nxi)
24050 struct inet6_dev *idev;
24052 idev = __in6_dev_get(dst_dev);
24054 - hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
24055 - scores, hiscore_idx);
24056 + hiscore_idx = __ipv6_dev_get_saddr(net, dst,
24057 + idev, scores, hiscore_idx, nxi);
24059 idev = __in6_dev_get(master);
24061 - hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
24062 - scores, hiscore_idx);
24063 + hiscore_idx = __ipv6_dev_get_saddr(net, dst,
24064 + idev, scores, hiscore_idx, nxi);
24066 return hiscore_idx;
24069 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
24070 const struct in6_addr *daddr, unsigned int prefs,
24071 - struct in6_addr *saddr)
24072 + struct in6_addr *saddr, struct nx_info *nxi)
24074 struct ipv6_saddr_score scores[2], *hiscore;
24075 struct ipv6_saddr_dst dst;
24076 @@ -1646,7 +1651,8 @@ int ipv6_dev_get_saddr(struct net *net,
24078 if (use_oif_addr) {
24080 - hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24081 + hiscore_idx = __ipv6_dev_get_saddr(net,
24082 + &dst, idev, scores, hiscore_idx, nxi);
24084 const struct net_device *master;
24085 int master_idx = 0;
24086 @@ -1660,8 +1666,8 @@ int ipv6_dev_get_saddr(struct net *net,
24087 master_idx = master->ifindex;
24089 hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
24091 - scores, hiscore_idx);
24092 + master, &dst, scores,
24093 + hiscore_idx, nxi);
24095 if (scores[hiscore_idx].ifa)
24097 @@ -1676,7 +1682,8 @@ int ipv6_dev_get_saddr(struct net *net,
24098 idev = __in6_dev_get(dev);
24101 - hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24102 + hiscore_idx = __ipv6_dev_get_saddr(net,
24103 + &dst, idev, scores, hiscore_idx, nxi);
24107 @@ -4134,7 +4141,10 @@ static void if6_seq_stop(struct seq_file
24108 static int if6_seq_show(struct seq_file *seq, void *v)
24110 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
24111 - seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24113 + if (nx_check(0, VS_ADMIN|VS_WATCH) ||
24114 + v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
24115 + seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24117 ifp->idev->dev->ifindex,
24119 @@ -4718,6 +4728,11 @@ static int in6_dump_addrs(struct inet6_d
24120 struct ifacaddr6 *ifaca;
24122 int ip_idx = *p_ip_idx;
24123 + struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
24125 + /* disable ipv6 on non v6 guests */
24126 + if (nxi && !nx_info_has_v6(nxi))
24129 read_lock_bh(&idev->lock);
24131 @@ -4728,6 +4743,8 @@ static int in6_dump_addrs(struct inet6_d
24132 list_for_each_entry(ifa, &idev->addr_list, if_list) {
24133 if (ip_idx < s_ip_idx)
24135 + if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
24137 err = inet6_fill_ifaddr(skb, ifa,
24138 NETLINK_CB(cb->skb).portid,
24139 cb->nlh->nlmsg_seq,
24140 @@ -4747,6 +4764,8 @@ next:
24141 ifmca = ifmca->next, ip_idx++) {
24142 if (ip_idx < s_ip_idx)
24144 + if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
24146 err = inet6_fill_ifmcaddr(skb, ifmca,
24147 NETLINK_CB(cb->skb).portid,
24148 cb->nlh->nlmsg_seq,
24149 @@ -4762,6 +4781,8 @@ next:
24150 ifaca = ifaca->aca_next, ip_idx++) {
24151 if (ip_idx < s_ip_idx)
24153 + if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
24155 err = inet6_fill_ifacaddr(skb, ifaca,
24156 NETLINK_CB(cb->skb).portid,
24157 cb->nlh->nlmsg_seq,
24158 @@ -4790,6 +4811,10 @@ static int inet6_dump_addr(struct sk_buf
24159 struct inet6_dev *idev;
24160 struct hlist_head *head;
24162 + /* FIXME: maybe disable ipv6 on non v6 guests?
24163 + if (skb->sk && skb->sk->sk_vx_info)
24164 + return skb->len; */
24167 s_idx = idx = cb->args[1];
24168 s_ip_idx = ip_idx = cb->args[2];
24169 @@ -5308,6 +5333,7 @@ static int inet6_dump_ifinfo(struct sk_b
24170 struct net_device *dev;
24171 struct inet6_dev *idev;
24172 struct hlist_head *head;
24173 + struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
24176 s_idx = cb->args[1];
24177 @@ -5319,6 +5345,8 @@ static int inet6_dump_ifinfo(struct sk_b
24178 hlist_for_each_entry_rcu(dev, head, index_hlist) {
24181 + if (!v6_dev_in_nx_info(dev, nxi))
24183 idev = __in6_dev_get(dev);
24186 diff -NurpP --minimal linux-4.9.217/net/ipv6/af_inet6.c linux-4.9.217-vs2.3.9.12/net/ipv6/af_inet6.c
24187 --- linux-4.9.217/net/ipv6/af_inet6.c 2020-03-27 00:51:46.560088955 +0000
24188 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/af_inet6.c 2019-02-22 08:37:56.413037657 +0000
24190 #include <linux/netdevice.h>
24191 #include <linux/icmpv6.h>
24192 #include <linux/netfilter_ipv6.h>
24193 +#include <linux/vs_inet.h>
24195 #include <net/ip.h>
24196 #include <net/ipv6.h>
24197 @@ -167,10 +168,13 @@ lookup_protocol:
24201 + if ((protocol == IPPROTO_ICMPV6) &&
24202 + nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
24204 if (sock->type == SOCK_RAW && !kern &&
24205 !ns_capable(net->user_ns, CAP_NET_RAW))
24206 goto out_rcu_unlock;
24209 sock->ops = answer->ops;
24210 answer_prot = answer->prot;
24211 answer_flags = answer->flags;
24212 @@ -272,6 +276,7 @@ int inet6_bind(struct socket *sock, stru
24213 struct inet_sock *inet = inet_sk(sk);
24214 struct ipv6_pinfo *np = inet6_sk(sk);
24215 struct net *net = sock_net(sk);
24216 + struct nx_v6_sock_addr nsa;
24218 unsigned short snum;
24219 bool saved_ipv6only;
24220 @@ -288,6 +293,10 @@ int inet6_bind(struct socket *sock, stru
24221 if (addr->sin6_family != AF_INET6)
24222 return -EAFNOSUPPORT;
24224 + err = v6_map_sock_addr(inet, addr, &nsa);
24228 addr_type = ipv6_addr_type(&addr->sin6_addr);
24229 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
24231 @@ -340,6 +349,10 @@ int inet6_bind(struct socket *sock, stru
24232 err = -EADDRNOTAVAIL;
24235 + if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
24236 + err = -EADDRNOTAVAIL;
24240 if (addr_type != IPV6_ADDR_ANY) {
24241 struct net_device *dev = NULL;
24242 @@ -369,6 +382,11 @@ int inet6_bind(struct socket *sock, stru
24246 + if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24247 + err = -EADDRNOTAVAIL;
24251 /* ipv4 addr of the socket is invalid. Only the
24252 * unspecified and mapped address have a v4 equivalent.
24254 @@ -386,6 +404,9 @@ int inet6_bind(struct socket *sock, stru
24258 + /* what's that for? */
24259 + v6_set_sock_addr(inet, &nsa);
24261 inet->inet_rcv_saddr = v4addr;
24262 inet->inet_saddr = v4addr;
24264 @@ -492,9 +513,11 @@ int inet6_getname(struct socket *sock, s
24266 sin->sin6_port = inet->inet_dport;
24267 sin->sin6_addr = sk->sk_v6_daddr;
24268 + /* FIXME: remap lback? */
24270 sin->sin6_flowinfo = np->flow_label;
24272 + /* FIXME: remap lback? */
24273 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
24274 sin->sin6_addr = np->saddr;
24276 diff -NurpP --minimal linux-4.9.217/net/ipv6/datagram.c linux-4.9.217-vs2.3.9.12/net/ipv6/datagram.c
24277 --- linux-4.9.217/net/ipv6/datagram.c 2020-03-27 00:51:46.560088955 +0000
24278 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/datagram.c 2019-02-22 08:37:56.423037487 +0000
24279 @@ -779,7 +779,7 @@ int ip6_datagram_send_ctl(struct net *ne
24282 if (fl6->flowi6_oif) {
24283 - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
24284 + dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
24288 diff -NurpP --minimal linux-4.9.217/net/ipv6/fib6_rules.c linux-4.9.217-vs2.3.9.12/net/ipv6/fib6_rules.c
24289 --- linux-4.9.217/net/ipv6/fib6_rules.c 2020-03-27 00:51:46.570088800 +0000
24290 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/fib6_rules.c 2018-10-20 04:58:15.000000000 +0000
24291 @@ -102,7 +102,7 @@ static int fib6_rule_action(struct fib_r
24292 ip6_dst_idev(&rt->dst)->dev,
24294 rt6_flags2srcprefs(flags),
24298 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
24300 diff -NurpP --minimal linux-4.9.217/net/ipv6/inet6_hashtables.c linux-4.9.217-vs2.3.9.12/net/ipv6/inet6_hashtables.c
24301 --- linux-4.9.217/net/ipv6/inet6_hashtables.c 2020-03-27 00:51:46.600088328 +0000
24302 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/inet6_hashtables.c 2020-04-01 09:40:33.385386423 +0000
24305 #include <linux/module.h>
24306 #include <linux/random.h>
24307 +#include <linux/vs_inet6.h>
24309 #include <net/addrconf.h>
24310 #include <net/inet_connection_sock.h>
24311 @@ -108,6 +109,9 @@ static inline int compute_score(struct s
24312 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
24316 + if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24319 if (sk->sk_bound_dev_if || exact_dif) {
24320 if (sk->sk_bound_dev_if != dif)
24321 @@ -283,39 +287,71 @@ EXPORT_SYMBOL_GPL(inet6_hash);
24322 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
24323 * and 0.0.0.0 equals to 0.0.0.0 only
24325 -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
24326 +int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
24327 bool match_wildcard)
24329 + const struct in6_addr *sk1_rcv_saddr6 = inet6_rcv_saddr(sk1);
24330 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
24331 + __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr;
24332 + __be32 sk2_rcv_saddr = sk2->sk_rcv_saddr;
24333 + int sk1_ipv6only = inet_v6_ipv6only(sk1);
24334 int sk2_ipv6only = inet_v6_ipv6only(sk2);
24335 - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
24336 + int addr_type1 = ipv6_addr_type(sk1_rcv_saddr6);
24337 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
24340 + /* if one is mapped and the other is ipv6only exit early */
24341 + if (addr_type1 == IPV6_ADDR_MAPPED && sk2_ipv6only)
24344 + if (addr_type2 == IPV6_ADDR_MAPPED && sk1_ipv6only)
24347 /* if both are mapped, treat as IPv4 */
24348 - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24349 - if (!sk2_ipv6only) {
24350 - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
24352 - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
24353 - return match_wildcard;
24355 + if (addr_type1 == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24356 + if (sk1_rcv_saddr == sk2_rcv_saddr)
24358 + if ((!sk1_rcv_saddr || !sk2_rcv_saddr) && match_wildcard)
24363 - if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
24365 + /* if both are wildcards, check for overlap */
24366 + if (addr_type1 == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
24367 + return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24369 - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
24370 - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
24371 + /* if both are valid ipv6 addresses, mapped handled above */
24372 + if (addr_type1 != IPV6_ADDR_ANY && addr_type2 != IPV6_ADDR_ANY &&
24373 + sk2_rcv_saddr6 && ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
24376 - if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
24377 - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
24379 + if (addr_type1 == IPV6_ADDR_ANY && match_wildcard) {
24380 + /* ipv6only case handled above */
24381 + if (addr_type2 == IPV6_ADDR_MAPPED)
24382 + return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24384 + return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
24387 - if (sk2_rcv_saddr6 &&
24388 - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24390 + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard) {
24391 + /* ipv6only case handled above */
24392 + if (addr_type1 == IPV6_ADDR_MAPPED)
24393 + return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24395 + return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
24401 + if (!sk1_rcv_saddr && !sk2_rcv_saddr)
24402 + return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24404 + if (!sk2_rcv_saddr)
24405 + return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24407 + if (!sk1_rcv_saddr)
24408 + return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24412 diff -NurpP --minimal linux-4.9.217/net/ipv6/ip6_fib.c linux-4.9.217-vs2.3.9.12/net/ipv6/ip6_fib.c
24413 --- linux-4.9.217/net/ipv6/ip6_fib.c 2020-03-27 00:51:46.610088167 +0000
24414 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/ip6_fib.c 2020-04-01 09:40:33.385386423 +0000
24415 @@ -1977,6 +1977,7 @@ static int ipv6_route_seq_show(struct se
24416 struct rt6_info *rt = v;
24417 struct ipv6_route_iter *iter = seq->private;
24419 + /* FIXME: check for network context? */
24420 seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
24422 #ifdef CONFIG_IPV6_SUBTREES
24423 diff -NurpP --minimal linux-4.9.217/net/ipv6/ip6_output.c linux-4.9.217-vs2.3.9.12/net/ipv6/ip6_output.c
24424 --- linux-4.9.217/net/ipv6/ip6_output.c 2020-03-27 00:51:46.610088167 +0000
24425 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/ip6_output.c 2019-10-05 14:58:46.270297968 +0000
24426 @@ -962,7 +962,8 @@ static int ip6_dst_lookup_tail(struct ne
24427 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
24428 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
24429 sk ? inet6_sk(sk)->srcprefs : 0,
24432 + sk ? sk->sk_nx_info : NULL);
24434 goto out_err_release;
24436 diff -NurpP --minimal linux-4.9.217/net/ipv6/ip6_tunnel.c linux-4.9.217-vs2.3.9.12/net/ipv6/ip6_tunnel.c
24437 --- linux-4.9.217/net/ipv6/ip6_tunnel.c 2020-03-27 00:51:46.610088167 +0000
24438 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/ip6_tunnel.c 2020-04-01 09:40:33.385386423 +0000
24439 @@ -1116,7 +1116,7 @@ route_lookup:
24441 if (t->parms.collect_md &&
24442 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24443 - &fl6->daddr, 0, &fl6->saddr))
24444 + &fl6->daddr, 0, &fl6->saddr, NULL))
24445 goto tx_err_link_failure;
24448 diff -NurpP --minimal linux-4.9.217/net/ipv6/ndisc.c linux-4.9.217-vs2.3.9.12/net/ipv6/ndisc.c
24449 --- linux-4.9.217/net/ipv6/ndisc.c 2020-03-27 00:51:46.620088013 +0000
24450 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/ndisc.c 2019-02-22 08:37:56.423037487 +0000
24451 @@ -512,7 +512,7 @@ void ndisc_send_na(struct net_device *de
24453 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
24454 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
24458 src_addr = &tmpaddr;
24460 diff -NurpP --minimal linux-4.9.217/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c linux-4.9.217-vs2.3.9.12/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
24461 --- linux-4.9.217/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2016-12-11 19:17:54.000000000 +0000
24462 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2018-10-20 04:58:15.000000000 +0000
24463 @@ -39,7 +39,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *s
24464 ctinfo == IP_CT_RELATED_REPLY));
24466 if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
24467 - &ipv6_hdr(skb)->daddr, 0, &src) < 0)
24468 + &ipv6_hdr(skb)->daddr, 0, &src, NULL) < 0)
24471 nfct_nat(ct)->masq_index = out->ifindex;
24472 diff -NurpP --minimal linux-4.9.217/net/ipv6/raw.c linux-4.9.217-vs2.3.9.12/net/ipv6/raw.c
24473 --- linux-4.9.217/net/ipv6/raw.c 2020-03-27 00:51:46.720086437 +0000
24474 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/raw.c 2019-10-05 14:58:46.270297968 +0000
24475 @@ -293,6 +293,13 @@ static int rawv6_bind(struct sock *sk, s
24479 + if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24480 + err = -EADDRNOTAVAIL;
24486 /* ipv4 addr of the socket is invalid. Only the
24487 * unspecified and mapped address have a v4 equivalent.
24489 diff -NurpP --minimal linux-4.9.217/net/ipv6/route.c linux-4.9.217-vs2.3.9.12/net/ipv6/route.c
24490 --- linux-4.9.217/net/ipv6/route.c 2020-03-27 00:51:46.720086437 +0000
24491 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/route.c 2020-04-01 09:40:33.385386423 +0000
24492 @@ -3295,7 +3295,8 @@ static int rt6_fill_node(struct net *net
24493 goto nla_put_failure;
24495 struct in6_addr saddr_buf;
24496 - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
24497 + if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
24498 + (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0 &&
24499 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
24500 goto nla_put_failure;
24502 diff -NurpP --minimal linux-4.9.217/net/ipv6/tcp_ipv6.c linux-4.9.217-vs2.3.9.12/net/ipv6/tcp_ipv6.c
24503 --- linux-4.9.217/net/ipv6/tcp_ipv6.c 2020-03-27 00:51:46.720086437 +0000
24504 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/tcp_ipv6.c 2019-10-05 14:58:46.270297968 +0000
24505 @@ -149,11 +149,18 @@ static int tcp_v6_connect(struct sock *s
24508 if (ipv6_addr_any(&usin->sin6_addr)) {
24509 - if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24510 - ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24511 - &usin->sin6_addr);
24513 - usin->sin6_addr = in6addr_loopback;
24514 + struct nx_info *nxi = sk->sk_nx_info;
24516 + if (nxi && nx_info_has_v6(nxi))
24517 + /* FIXME: remap lback? */
24518 + usin->sin6_addr = nxi->v6.ip;
24520 + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24521 + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24522 + &usin->sin6_addr);
24524 + usin->sin6_addr = in6addr_loopback;
24528 addr_type = ipv6_addr_type(&usin->sin6_addr);
24529 diff -NurpP --minimal linux-4.9.217/net/ipv6/udp.c linux-4.9.217-vs2.3.9.12/net/ipv6/udp.c
24530 --- linux-4.9.217/net/ipv6/udp.c 2020-03-27 00:51:46.720086437 +0000
24531 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/udp.c 2019-10-05 14:58:46.270297968 +0000
24532 @@ -135,6 +135,10 @@ static int compute_score(struct sock *sk
24533 if (inet->inet_dport != sport)
24537 + /* block non nx_info ips */
24538 + if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24542 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
24543 diff -NurpP --minimal linux-4.9.217/net/ipv6/xfrm6_policy.c linux-4.9.217-vs2.3.9.12/net/ipv6/xfrm6_policy.c
24544 --- linux-4.9.217/net/ipv6/xfrm6_policy.c 2020-03-27 00:51:46.730086276 +0000
24545 +++ linux-4.9.217-vs2.3.9.12/net/ipv6/xfrm6_policy.c 2018-10-20 04:58:15.000000000 +0000
24546 @@ -64,7 +64,8 @@ static int xfrm6_get_saddr(struct net *n
24547 return -EHOSTUNREACH;
24549 dev = ip6_dst_idev(dst)->dev;
24550 - ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
24551 + ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6,
24552 + 0, &saddr->in6, NULL);
24556 diff -NurpP --minimal linux-4.9.217/net/netfilter/ipvs/ip_vs_xmit.c linux-4.9.217-vs2.3.9.12/net/netfilter/ipvs/ip_vs_xmit.c
24557 --- linux-4.9.217/net/netfilter/ipvs/ip_vs_xmit.c 2016-12-11 19:17:54.000000000 +0000
24558 +++ linux-4.9.217-vs2.3.9.12/net/netfilter/ipvs/ip_vs_xmit.c 2018-10-20 04:58:15.000000000 +0000
24559 @@ -381,7 +381,7 @@ __ip_vs_route_output_v6(struct net *net,
24561 if (ipv6_addr_any(&fl6.saddr) &&
24562 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24563 - &fl6.daddr, 0, &fl6.saddr) < 0)
24564 + &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
24567 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
24568 diff -NurpP --minimal linux-4.9.217/net/netlink/af_netlink.c linux-4.9.217-vs2.3.9.12/net/netlink/af_netlink.c
24569 --- linux-4.9.217/net/netlink/af_netlink.c 2020-03-27 00:51:47.580072880 +0000
24570 +++ linux-4.9.217-vs2.3.9.12/net/netlink/af_netlink.c 2020-04-01 09:40:33.515384269 +0000
24572 #include <linux/hash.h>
24573 #include <linux/genetlink.h>
24574 #include <linux/nospec.h>
24575 +#include <linux/vs_context.h>
24576 +#include <linux/vs_network.h>
24578 #include <net/net_namespace.h>
24579 #include <net/sock.h>
24580 @@ -2478,7 +2480,8 @@ static void *__netlink_seq_next(struct s
24582 return ERR_PTR(err);
24584 - } while (sock_net(&nlk->sk) != seq_file_net(seq));
24585 + } while ((sock_net(&nlk->sk) != seq_file_net(seq)) ||
24586 + !nx_check(nlk->sk.sk_nid, VS_WATCH_P | VS_IDENT));
24590 diff -NurpP --minimal linux-4.9.217/net/packet/diag.c linux-4.9.217-vs2.3.9.12/net/packet/diag.c
24591 --- linux-4.9.217/net/packet/diag.c 2016-12-11 19:17:54.000000000 +0000
24592 +++ linux-4.9.217-vs2.3.9.12/net/packet/diag.c 2018-10-20 06:31:18.000000000 +0000
24594 #include <linux/netdevice.h>
24595 #include <linux/packet_diag.h>
24596 #include <linux/percpu.h>
24597 +#include <linux/vs_network.h>
24598 #include <net/net_namespace.h>
24599 #include <net/sock.h>
24601 @@ -201,6 +202,8 @@ static int packet_diag_dump(struct sk_bu
24602 sk_for_each(sk, &net->packet.sklist) {
24603 if (!net_eq(sock_net(sk), net))
24605 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24610 diff -NurpP --minimal linux-4.9.217/net/socket.c linux-4.9.217-vs2.3.9.12/net/socket.c
24611 --- linux-4.9.217/net/socket.c 2020-03-27 00:51:49.750038678 +0000
24612 +++ linux-4.9.217-vs2.3.9.12/net/socket.c 2020-04-01 09:40:33.905377809 +0000
24613 @@ -100,10 +100,12 @@
24615 #include <net/sock.h>
24616 #include <linux/netfilter.h>
24617 +#include <linux/vs_socket.h>
24618 +#include <linux/vs_inet.h>
24619 +#include <linux/vs_inet6.h>
24621 #include <linux/if_tun.h>
24622 #include <linux/ipv6_route.h>
24623 -#include <linux/route.h>
24624 #include <linux/sockios.h>
24625 #include <linux/atalk.h>
24626 #include <net/busy_poll.h>
24627 @@ -619,8 +621,24 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
24629 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
24631 - int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
24632 - BUG_ON(ret == -EIOCBQUEUED);
24633 + size_t size = msg_data_left(msg);
24634 + int ret = sock->ops->sendmsg(sock, msg, size);
24638 + vx_sock_fail(sock->sk, size);
24640 + vx_sock_send(sock->sk, size);
24643 + vxdprintk(VXD_CBIT(net, 7),
24644 + "sock_sendmsg_nosec: %p[%p,%p,%p;%d/%d]:%zu/%zu",
24646 + (sock->sk)?sock->sk->sk_nx_info:0,
24647 + (sock->sk)?sock->sk->sk_vx_info:0,
24648 + (sock->sk)?sock->sk->sk_xid:0,
24649 + (sock->sk)?sock->sk->sk_nid:0,
24650 + size, msg_data_left(msg));
24654 @@ -1110,6 +1128,13 @@ int __sock_create(struct net *net, int f
24655 if (type < 0 || type >= SOCK_MAX)
24658 + if (!nx_check(0, VS_ADMIN)) {
24659 + if (family == PF_INET && !current_nx_info_has_v4())
24660 + return -EAFNOSUPPORT;
24661 + if (family == PF_INET6 && !current_nx_info_has_v6())
24662 + return -EAFNOSUPPORT;
24667 This uglymoron is moved from INET layer to here to avoid
24668 @@ -1240,6 +1265,7 @@ SYSCALL_DEFINE3(socket, int, family, int
24672 + set_bit(SOCK_USER_SOCKET, &sock->flags);
24673 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
24676 @@ -1281,10 +1307,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
24677 err = sock_create(family, type, protocol, &sock1);
24680 + set_bit(SOCK_USER_SOCKET, &sock1->flags);
24682 err = sock_create(family, type, protocol, &sock2);
24684 goto out_release_1;
24685 + set_bit(SOCK_USER_SOCKET, &sock2->flags);
24687 err = sock1->ops->socketpair(sock1, sock2);
24689 diff -NurpP --minimal linux-4.9.217/net/sunrpc/auth.c linux-4.9.217-vs2.3.9.12/net/sunrpc/auth.c
24690 --- linux-4.9.217/net/sunrpc/auth.c 2016-12-11 19:17:54.000000000 +0000
24691 +++ linux-4.9.217-vs2.3.9.12/net/sunrpc/auth.c 2018-10-20 04:58:15.000000000 +0000
24693 #include <linux/sunrpc/clnt.h>
24694 #include <linux/sunrpc/gss_api.h>
24695 #include <linux/spinlock.h>
24696 +#include <linux/vs_tag.h>
24698 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
24699 # define RPCDBG_FACILITY RPCDBG_AUTH
24700 @@ -630,6 +631,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
24701 memset(&acred, 0, sizeof(acred));
24702 acred.uid = cred->fsuid;
24703 acred.gid = cred->fsgid;
24704 + acred.tag = make_ktag(&init_user_ns, dx_current_tag());
24705 acred.group_info = cred->group_info;
24706 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
24708 @@ -669,6 +671,7 @@ rpcauth_bind_root_cred(struct rpc_task *
24709 struct auth_cred acred = {
24710 .uid = GLOBAL_ROOT_UID,
24711 .gid = GLOBAL_ROOT_GID,
24712 + .tag = KTAGT_INIT(dx_current_tag()),
24715 dprintk("RPC: %5u looking up %s cred\n",
24716 diff -NurpP --minimal linux-4.9.217/net/sunrpc/auth_unix.c linux-4.9.217-vs2.3.9.12/net/sunrpc/auth_unix.c
24717 --- linux-4.9.217/net/sunrpc/auth_unix.c 2016-12-11 19:17:54.000000000 +0000
24718 +++ linux-4.9.217-vs2.3.9.12/net/sunrpc/auth_unix.c 2018-10-20 04:58:15.000000000 +0000
24719 @@ -13,11 +13,13 @@
24720 #include <linux/sunrpc/clnt.h>
24721 #include <linux/sunrpc/auth.h>
24722 #include <linux/user_namespace.h>
24723 +#include <linux/vs_tag.h>
24725 #define NFS_NGROUPS 16
24728 struct rpc_cred uc_base;
24731 kgid_t uc_gids[NFS_NGROUPS];
24733 @@ -86,6 +88,7 @@ unx_create_cred(struct rpc_auth *auth, s
24734 groups = NFS_NGROUPS;
24736 cred->uc_gid = acred->gid;
24737 + cred->uc_tag = acred->tag;
24738 for (i = 0; i < groups; i++)
24739 cred->uc_gids[i] = acred->group_info->gid[i];
24740 if (i < NFS_NGROUPS)
24741 @@ -127,7 +130,9 @@ unx_match(struct auth_cred *acred, struc
24745 - if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
24746 + if (!uid_eq(cred->uc_uid, acred->uid) ||
24747 + !gid_eq(cred->uc_gid, acred->gid) ||
24748 + !tag_eq(cred->uc_tag, acred->tag))
24751 if (acred->group_info != NULL)
24752 @@ -152,7 +157,7 @@ unx_marshal(struct rpc_task *task, __be3
24753 struct rpc_clnt *clnt = task->tk_client;
24754 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
24755 __be32 *base, *hold;
24759 *p++ = htonl(RPC_AUTH_UNIX);
24761 @@ -163,8 +168,11 @@ unx_marshal(struct rpc_task *task, __be3
24763 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
24765 - *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
24766 - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
24767 + tag = task->tk_client->cl_tag;
24768 + *p++ = htonl((u32) from_kuid(&init_user_ns,
24769 + TAGINO_KUID(tag, cred->uc_uid, cred->uc_tag)));
24770 + *p++ = htonl((u32) from_kgid(&init_user_ns,
24771 + TAGINO_KGID(tag, cred->uc_gid, cred->uc_tag)));
24773 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
24774 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
24775 diff -NurpP --minimal linux-4.9.217/net/sunrpc/clnt.c linux-4.9.217-vs2.3.9.12/net/sunrpc/clnt.c
24776 --- linux-4.9.217/net/sunrpc/clnt.c 2020-03-27 00:51:49.980035052 +0000
24777 +++ linux-4.9.217-vs2.3.9.12/net/sunrpc/clnt.c 2019-10-05 14:58:47.230282628 +0000
24779 #include <linux/in.h>
24780 #include <linux/in6.h>
24781 #include <linux/un.h>
24782 +#include <linux/vs_cvirt.h>
24784 #include <linux/sunrpc/clnt.h>
24785 #include <linux/sunrpc/addr.h>
24786 @@ -496,6 +497,9 @@ static struct rpc_clnt *rpc_create_xprt(
24787 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
24788 clnt->cl_chatty = 1;
24790 + /* TODO: handle RPC_CLNT_CREATE_TAGGED
24791 + if (args->flags & RPC_CLNT_CREATE_TAGGED)
24792 + clnt->cl_tag = 1; */
24796 diff -NurpP --minimal linux-4.9.217/net/unix/af_unix.c linux-4.9.217-vs2.3.9.12/net/unix/af_unix.c
24797 --- linux-4.9.217/net/unix/af_unix.c 2020-03-27 00:51:51.400012675 +0000
24798 +++ linux-4.9.217-vs2.3.9.12/net/unix/af_unix.c 2019-12-25 15:37:53.138410818 +0000
24799 @@ -117,6 +117,8 @@
24800 #include <net/checksum.h>
24801 #include <linux/security.h>
24802 #include <linux/freezer.h>
24803 +#include <linux/vs_context.h>
24804 +#include <linux/vs_limit.h>
24806 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
24807 EXPORT_SYMBOL_GPL(unix_socket_table);
24808 @@ -284,6 +286,8 @@ static struct sock *__unix_find_socket_b
24809 if (!net_eq(sock_net(s), net))
24812 + if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
24814 if (u->addr->len == len &&
24815 !memcmp(u->addr->name, sunname, len))
24817 @@ -2744,6 +2748,8 @@ static struct sock *unix_from_bucket(str
24818 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
24819 if (sock_net(sk) != seq_file_net(seq))
24821 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24823 if (++count == offset)
24826 @@ -2761,6 +2767,8 @@ static struct sock *unix_next_socket(str
24830 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24832 if (sock_net(sk) == seq_file_net(seq))
24835 diff -NurpP --minimal linux-4.9.217/net/unix/diag.c linux-4.9.217-vs2.3.9.12/net/unix/diag.c
24836 --- linux-4.9.217/net/unix/diag.c 2020-03-27 00:51:51.400012675 +0000
24837 +++ linux-4.9.217-vs2.3.9.12/net/unix/diag.c 2019-10-05 14:58:47.230282628 +0000
24839 #include <linux/unix_diag.h>
24840 #include <linux/skbuff.h>
24841 #include <linux/module.h>
24842 +#include <linux/vs_network.h>
24843 #include <net/netlink.h>
24844 #include <net/af_unix.h>
24845 #include <net/tcp_states.h>
24846 @@ -200,6 +201,8 @@ static int unix_diag_dump(struct sk_buff
24847 sk_for_each(sk, &unix_socket_table[slot]) {
24848 if (!net_eq(sock_net(sk), net))
24850 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24854 if (!(req->udiag_states & (1 << sk->sk_state)))
24855 diff -NurpP --minimal linux-4.9.217/scripts/checksyscalls.sh linux-4.9.217-vs2.3.9.12/scripts/checksyscalls.sh
24856 --- linux-4.9.217/scripts/checksyscalls.sh 2016-12-11 19:17:54.000000000 +0000
24857 +++ linux-4.9.217-vs2.3.9.12/scripts/checksyscalls.sh 2018-10-20 04:58:15.000000000 +0000
24858 @@ -196,7 +196,6 @@ cat << EOF
24859 #define __IGNORE_afs_syscall
24860 #define __IGNORE_getpmsg
24861 #define __IGNORE_putpmsg
24862 -#define __IGNORE_vserver
24866 diff -NurpP --minimal linux-4.9.217/security/commoncap.c linux-4.9.217-vs2.3.9.12/security/commoncap.c
24867 --- linux-4.9.217/security/commoncap.c 2016-12-11 19:17:54.000000000 +0000
24868 +++ linux-4.9.217-vs2.3.9.12/security/commoncap.c 2018-10-20 04:58:15.000000000 +0000
24869 @@ -71,6 +71,7 @@ static void warn_setuid_and_fcaps_mixed(
24870 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
24871 int cap, int audit)
24873 + struct vx_info *vxi = current_vx_info(); /* FIXME: get vxi from cred? */
24874 struct user_namespace *ns = targ_ns;
24876 /* See if cred has the capability in the target user namespace
24877 @@ -79,8 +80,12 @@ int cap_capable(const struct cred *cred,
24880 /* Do we have the necessary capabilities? */
24881 - if (ns == cred->user_ns)
24882 - return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
24883 + if (ns == cred->user_ns) {
24884 + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
24885 + cap_raised(cred->cap_effective, cap))
24887 + return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
24890 /* Have we tried all of the parent namespaces? */
24891 if (ns == &init_user_ns)
24892 @@ -667,7 +672,7 @@ int cap_inode_setxattr(struct dentry *de
24894 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24895 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24896 - !capable(CAP_SYS_ADMIN))
24897 + !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24901 @@ -693,7 +698,7 @@ int cap_inode_removexattr(struct dentry
24903 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24904 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24905 - !capable(CAP_SYS_ADMIN))
24906 + !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24910 diff -NurpP --minimal linux-4.9.217/security/selinux/hooks.c linux-4.9.217-vs2.3.9.12/security/selinux/hooks.c
24911 --- linux-4.9.217/security/selinux/hooks.c 2020-03-27 00:51:56.679929820 +0000
24912 +++ linux-4.9.217-vs2.3.9.12/security/selinux/hooks.c 2019-10-05 14:58:47.310281348 +0000
24914 #include <linux/dccp.h>
24915 #include <linux/quota.h>
24916 #include <linux/un.h> /* for Unix socket types */
24917 -#include <net/af_unix.h> /* for Unix socket types */
24918 #include <linux/parser.h>
24919 #include <linux/nfs_mount.h>
24920 #include <net/ipv6.h>