diff -NurpP --minimal linux-2.6.2-rc1/arch/alpha/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/alpha/kernel/ptrace.c --- linux-2.6.2-rc1/arch/alpha/kernel/ptrace.c Fri Jan 9 08:00:02 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/alpha/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -285,7 +286,7 @@ do_sys_ptrace(long request, long pid, lo if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out_notsk; if (request == PTRACE_ATTACH) { diff -NurpP --minimal linux-2.6.2-rc1/arch/alpha/kernel/systbls.S linux-2.6.2-rc1-vs0.05.1/arch/alpha/kernel/systbls.S --- linux-2.6.2-rc1/arch/alpha/kernel/systbls.S Fri Jan 9 07:59:45 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/alpha/kernel/systbls.S Sat Jan 24 06:45:48 2004 @@ -291,7 +291,7 @@ sys_call_table: .quad alpha_ni_syscall /* 270 */ .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad alpha_ni_syscall + .quad sys_vserver /* 273 sys_vserver */ .quad alpha_ni_syscall .quad alpha_ni_syscall /* 275 */ .quad alpha_ni_syscall diff -NurpP --minimal linux-2.6.2-rc1/arch/i386/kernel/entry.S linux-2.6.2-rc1-vs0.05.1/arch/i386/kernel/entry.S --- linux-2.6.2-rc1/arch/i386/kernel/entry.S Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/i386/kernel/entry.S Sat Jan 24 06:45:48 2004 @@ -881,6 +881,6 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver syscall_table_size=(.-sys_call_table) diff -NurpP --minimal linux-2.6.2-rc1/arch/i386/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/i386/kernel/ptrace.c --- linux-2.6.2-rc1/arch/i386/kernel/ptrace.c Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/i386/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -255,7 +256,7 @@ asmlinkage int sys_ptrace(long request, if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/arch/ia64/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/ia64/kernel/ptrace.c --- linux-2.6.2-rc1/arch/ia64/kernel/ptrace.c Fri Jan 9 08:00:12 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/ia64/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1282,7 +1283,7 @@ sys_ptrace (long request, pid_t pid, uns } } read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; if (pid == 1) /* no messing around with init! */ diff -NurpP --minimal linux-2.6.2-rc1/arch/m68k/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/m68k/kernel/ptrace.c --- linux-2.6.2-rc1/arch/m68k/kernel/ptrace.c Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/m68k/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -124,7 +125,7 @@ asmlinkage int sys_ptrace(long request, if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/arch/mips/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/mips/kernel/ptrace.c --- linux-2.6.2-rc1/arch/mips/kernel/ptrace.c Fri Jan 9 08:00:13 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/mips/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -74,7 +75,7 @@ asmlinkage int sys_ptrace(long request, if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/arch/parisc/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/parisc/kernel/ptrace.c --- linux-2.6.2-rc1/arch/parisc/kernel/ptrace.c Fri Jan 9 07:59:09 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/parisc/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -109,7 +110,7 @@ long sys_ptrace(long request, pid_t pid, if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; if (pid == 1) /* no messing around with init! */ diff -NurpP --minimal linux-2.6.2-rc1/arch/ppc/kernel/misc.S linux-2.6.2-rc1-vs0.05.1/arch/ppc/kernel/misc.S --- linux-2.6.2-rc1/arch/ppc/kernel/misc.S Sat Jan 24 03:18:04 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/ppc/kernel/misc.S Sat Jan 24 06:45:48 2004 @@ -1386,3 +1386,22 @@ _GLOBAL(sys_call_table) .long sys_fstatfs64 .long ppc_fadvise64_64 .long sys_ni_syscall /* 255 - rtas (used on ppc64) */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 260 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 265 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 270 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vserver /* 273 sys_vserver */ + diff -NurpP --minimal linux-2.6.2-rc1/arch/ppc/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/ppc/kernel/ptrace.c --- linux-2.6.2-rc1/arch/ppc/kernel/ptrace.c Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/ppc/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -195,7 +196,7 @@ int sys_ptrace(long request, long pid, l if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/arch/ppc64/kernel/misc.S linux-2.6.2-rc1-vs0.05.1/arch/ppc64/kernel/misc.S --- linux-2.6.2-rc1/arch/ppc64/kernel/misc.S Sat Jan 24 03:18:04 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/ppc64/kernel/misc.S Sat Jan 24 06:45:48 2004 @@ -819,6 +819,24 @@ _GLOBAL(sys_call_table32) .llong .compat_fstatfs64 .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ .llong .ppc_rtas /* 255 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 260 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 265 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 270 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vserver /* 273 sys_vserver */ .balign 8 _GLOBAL(sys_call_table) @@ -1078,3 +1096,22 @@ _GLOBAL(sys_call_table) .llong .sys_fstatfs64 .llong .sys_ni_syscall /* 32bit only fadvise64_64 */ .llong .ppc_rtas /* 255 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 260 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 265 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 270 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vserver /* 273 sys_vserver */ + diff -NurpP --minimal linux-2.6.2-rc1/arch/ppc64/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/ppc64/kernel/ptrace.c --- linux-2.6.2-rc1/arch/ppc64/kernel/ptrace.c Fri Jan 9 07:59:56 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/ppc64/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -73,7 +74,7 @@ int sys_ptrace(long request, long pid, l if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/arch/s390/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/s390/kernel/ptrace.c --- linux-2.6.2-rc1/arch/s390/kernel/ptrace.c Sat Jan 24 03:18:05 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/s390/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -647,7 +648,7 @@ sys_ptrace(long request, long pid, long if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = do_ptrace(child, request, addr, data); diff -NurpP --minimal linux-2.6.2-rc1/arch/s390/kernel/syscalls.S linux-2.6.2-rc1-vs0.05.1/arch/s390/kernel/syscalls.S --- linux-2.6.2-rc1/arch/s390/kernel/syscalls.S Sat Jan 24 03:18:05 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/s390/kernel/syscalls.S Sat Jan 24 06:45:48 2004 @@ -271,5 +271,5 @@ SYSCALL(sys_clock_settime,sys_clock_sett SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) diff -NurpP --minimal linux-2.6.2-rc1/arch/sparc/kernel/systbls.S linux-2.6.2-rc1-vs0.05.1/arch/sparc/kernel/systbls.S --- linux-2.6.2-rc1/arch/sparc/kernel/systbls.S Fri Jan 9 07:59:34 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/sparc/kernel/systbls.S Sat Jan 24 06:45:48 2004 @@ -72,7 +72,7 @@ sys_call_table: /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_nis_syscall #ifdef CONFIG_SUNOS_EMUL diff -NurpP --minimal linux-2.6.2-rc1/arch/sparc64/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/sparc64/kernel/ptrace.c --- linux-2.6.2-rc1/arch/sparc64/kernel/ptrace.c Fri Jan 9 08:00:05 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/sparc64/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -164,7 +165,7 @@ asmlinkage void do_ptrace(struct pt_regs get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) { + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { pt_error_return(regs, ESRCH); goto out; } diff -NurpP --minimal linux-2.6.2-rc1/arch/sparc64/kernel/systbls.S linux-2.6.2-rc1-vs0.05.1/arch/sparc64/kernel/systbls.S --- linux-2.6.2-rc1/arch/sparc64/kernel/systbls.S Fri Jan 9 07:59:26 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/sparc64/kernel/systbls.S Sat Jan 24 06:45:48 2004 @@ -73,7 +73,7 @@ sys_call_table32: .word sys_ni_syscall, compat_clock_settime, compat_clock_gettime, compat_clock_getres, compat_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_timer_settime, compat_timer_gettime, sys_timer_getoverrun .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy -/*270*/ .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, sys_ni_syscall +/*270*/ .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, sys_vserver /* Now the 64-bit native Linux syscall table. */ @@ -135,7 +135,7 @@ sys_call_table: .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy -/*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_ni_syscall +/*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_vserver #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \ defined(CONFIG_SOLARIS_EMUL_MODULE) diff -NurpP --minimal linux-2.6.2-rc1/arch/x86_64/ia32/ia32entry.S linux-2.6.2-rc1-vs0.05.1/arch/x86_64/ia32/ia32entry.S --- linux-2.6.2-rc1/arch/x86_64/ia32/ia32entry.S Fri Jan 9 07:59:27 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/x86_64/ia32/ia32entry.S Sat Jan 24 06:45:48 2004 @@ -448,34 +448,35 @@ ia32_sys_call_table: .quad compat_sys_sched_getaffinity .quad sys32_set_thread_area .quad sys32_get_thread_area - .quad sys32_io_setup + .quad sys32_io_setup /* 245 */ .quad sys_io_destroy .quad sys32_io_getevents .quad sys32_io_submit .quad sys_io_cancel - .quad sys_fadvise64 + .quad sys_fadvise64 /* 250 */ .quad sys_ni_syscall /* free_huge_pages */ .quad sys_exit_group /* exit_group */ .quad sys_lookup_dcookie .quad sys_epoll_create - .quad sys_epoll_ctl + .quad sys_epoll_ctl /* 255 */ .quad sys_epoll_wait .quad sys_remap_file_pages .quad sys_set_tid_address .quad sys32_timer_create - .quad compat_timer_settime + .quad compat_timer_settime /* 260 */ .quad compat_timer_gettime .quad sys_timer_getoverrun .quad sys_timer_delete .quad compat_clock_settime - .quad compat_clock_gettime + .quad compat_clock_gettime /* 265 */ .quad compat_clock_getres .quad compat_clock_nanosleep .quad compat_statfs64 /* statfs64 */ .quad compat_fstatfs64 /* fstatfs64 */ - .quad sys_tgkill + .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 + .quad sys_vserver /* 273 sys_vserver */ /* don't forget to change IA32_NR_syscalls */ ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 diff -NurpP --minimal linux-2.6.2-rc1/arch/x86_64/kernel/ptrace.c linux-2.6.2-rc1-vs0.05.1/arch/x86_64/kernel/ptrace.c --- linux-2.6.2-rc1/arch/x86_64/kernel/ptrace.c Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/arch/x86_64/kernel/ptrace.c Sat Jan 24 06:45:48 2004 @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -205,7 +206,7 @@ asmlinkage long sys_ptrace(long request, if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/fs/ext2/ialloc.c linux-2.6.2-rc1-vs0.05.1/fs/ext2/ialloc.c --- linux-2.6.2-rc1/fs/ext2/ialloc.c Sat Jan 24 03:18:15 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/ext2/ialloc.c Sat Jan 24 06:14:24 2004 @@ -581,7 +581,7 @@ got: memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); + ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_IUNLINK_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ if (!S_ISDIR(mode)) ei->i_flags &= ~EXT2_DIRSYNC_FL; diff -NurpP --minimal linux-2.6.2-rc1/fs/ext2/inode.c linux-2.6.2-rc1-vs0.05.1/fs/ext2/inode.c --- linux-2.6.2-rc1/fs/ext2/inode.c Sat Jan 24 03:18:15 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/ext2/inode.c Sat Jan 24 06:28:27 2004 @@ -64,6 +64,8 @@ void ext2_put_inode(struct inode *inode) ext2_discard_prealloc(inode); } +static void ext2_truncate_nocheck (struct inode * inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -77,7 +79,7 @@ void ext2_delete_inode (struct inode * i inode->i_size = 0; if (inode->i_blocks) - ext2_truncate (inode); + ext2_truncate_nocheck(inode); ext2_free_inode (inode); return; @@ -876,7 +878,7 @@ static void ext2_free_branches(struct in ext2_free_data(inode, p, q); } -void ext2_truncate (struct inode * inode) +static void ext2_truncate_nocheck(struct inode * inode) { u32 *i_data = EXT2_I(inode)->i_data; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); @@ -893,8 +895,6 @@ void ext2_truncate (struct inode * inode return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext2_discard_prealloc(inode); @@ -1016,17 +1016,26 @@ Egdp: return ERR_PTR(-EIO); } +void ext2_truncate (struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext2_truncate_nocheck(inode); +} + void ext2_set_inode_flags(struct inode *inode) { unsigned int flags = EXT2_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_IUNLINK|S_NOATIME|S_DIRSYNC); if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & EXT2_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) diff -NurpP --minimal linux-2.6.2-rc1/fs/ext3/ialloc.c linux-2.6.2-rc1-vs0.05.1/fs/ext3/ialloc.c --- linux-2.6.2-rc1/fs/ext3/ialloc.c Sat Jan 24 03:18:15 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/ext3/ialloc.c Sat Jan 24 06:14:24 2004 @@ -569,7 +569,7 @@ got: ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); + ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_IUNLINK_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ if (!S_ISDIR(mode)) ei->i_flags &= ~EXT3_DIRSYNC_FL; diff -NurpP --minimal linux-2.6.2-rc1/fs/ext3/inode.c linux-2.6.2-rc1-vs0.05.1/fs/ext3/inode.c --- linux-2.6.2-rc1/fs/ext3/inode.c Sat Jan 24 03:18:15 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/ext3/inode.c Sat Jan 24 22:44:56 2004 @@ -189,6 +189,8 @@ void ext3_put_inode(struct inode *inode) ext3_discard_prealloc(inode); } +static void ext3_truncate_nocheck (struct inode *inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -214,7 +216,7 @@ void ext3_delete_inode (struct inode * i handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) - ext3_truncate(inode); + ext3_truncate_nocheck(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. @@ -2114,7 +2116,7 @@ static void ext3_free_branches(handle_t * ext3_truncate() run will find them and release them. */ -void ext3_truncate(struct inode * inode) +void ext3_truncate_nocheck(struct inode * inode) { handle_t *handle; struct ext3_inode_info *ei = EXT3_I(inode); @@ -2135,8 +2137,6 @@ void ext3_truncate(struct inode * inode) return; if (ext3_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext3_discard_prealloc(inode); @@ -2443,17 +2443,26 @@ has_buffer: return 0; } +void ext3_truncate(struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext3_truncate_nocheck(inode); +} + void ext3_set_inode_flags(struct inode *inode) { unsigned int flags = EXT3_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_IUNLINK|S_NOATIME|S_DIRSYNC); if (flags & EXT3_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & EXT3_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) diff -NurpP --minimal linux-2.6.2-rc1/fs/inode.c linux-2.6.2-rc1-vs0.05.1/fs/inode.c --- linux-2.6.2-rc1/fs/inode.c Fri Jan 9 08:00:12 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/inode.c Sat Jan 24 06:23:57 2004 @@ -131,6 +131,7 @@ static struct inode *alloc_inode(struct inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_rdev = 0; + // inode->i_xid = 0; /* maybe not too wise ... */ inode->i_security = NULL; if (security_inode_alloc(inode)) { if (inode->i_sb->s_op->destroy_inode) diff -NurpP --minimal linux-2.6.2-rc1/fs/ioctl.c linux-2.6.2-rc1-vs0.05.1/fs/ioctl.c --- linux-2.6.2-rc1/fs/ioctl.c Sat Jan 24 03:18:15 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/ioctl.c Sat Jan 24 07:49:20 2004 @@ -9,10 +9,15 @@ #include #include #include +#include +#include #include #include +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); + static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg) { int error; @@ -118,6 +123,12 @@ asmlinkage long sys_ioctl(unsigned int f } else error = -ENOTTY; + break; + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); break; default: error = -ENOTTY; diff -NurpP --minimal linux-2.6.2-rc1/fs/namei.c linux-2.6.2-rc1-vs0.05.1/fs/namei.c --- linux-2.6.2-rc1/fs/namei.c Fri Jan 9 07:59:26 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/namei.c Sat Jan 24 06:14:24 2004 @@ -1021,7 +1021,7 @@ static inline int may_delete(struct inod if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1816,7 +1816,7 @@ int vfs_link(struct dentry *old_dentry, /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/Makefile linux-2.6.2-rc1-vs0.05.1/fs/proc/Makefile --- linux-2.6.2-rc1/fs/proc/Makefile Fri Jan 9 07:59:07 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/Makefile Sat Jan 24 05:45:59 2004 @@ -8,7 +8,7 @@ proc-y := task_nommu.o proc-$(CONFIG_MMU) := task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - kmsg.o proc_tty.o proc_misc.o + kmsg.o proc_tty.o proc_misc.o virtual.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/array.c linux-2.6.2-rc1-vs0.05.1/fs/proc/array.c --- linux-2.6.2-rc1/fs/proc/array.c Fri Jan 9 07:59:44 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/array.c Sat Jan 24 07:01:35 2004 @@ -73,6 +73,7 @@ #include #include #include +#include #include #include @@ -150,8 +151,13 @@ static inline const char * get_task_stat static inline char * task_state(struct task_struct *p, char *buffer) { int g; - + pid_t ppid; read_lock(&tasklist_lock); + ppid = p->real_parent->pid; + if (ppid != 0 + && current->vx_info + && current->vx_info->vx_initpid == ppid) + ppid = 1; buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -164,7 +170,7 @@ static inline char * task_state(struct t get_task_state(p), (p->sleep_avg/1024)*100/(1000000000/1024), p->tgid, - p->pid, p->pid ? p->real_parent->pid : 0, + p->pid, p->pid ? ppid : 0, p->pid && p->ptrace ? p->parent->pid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -263,16 +269,20 @@ static inline char *task_cap(struct task { return buffer + sprintf(buffer, "CapInh:\t%016x\n" "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", + "CapEff:\t%016x\n" + "CapBset:\t%016x\n", cap_t(p->cap_inheritable), cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + cap_t(p->cap_effective), + cap_t(p->cap_bset)); } extern char *task_mem(struct mm_struct *, char *); int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; + struct vx_info *vxi; + struct ip_info *ipi; struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -284,6 +294,39 @@ int proc_pid_status(struct task_struct * } buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); + + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %d\n" + ,vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); + ipi = task_get_ip_info(task); + if (ipi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,ipi->ipv4[i] + ,ipi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,ipi->v4_bcast); + buffer += sprintf (buffer,"ipv4root_refcnt: %d\n" + ,atomic_read(&ipi->ip_refcount)); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_ip_info(ipi); #if defined(CONFIG_ARCH_S390) buffer = task_show_regs(task, buffer); #endif diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/base.c linux-2.6.2-rc1-vs0.05.1/fs/proc/base.c --- linux-2.6.2-rc1/fs/proc/base.c Sat Jan 24 03:18:15 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/base.c Sat Jan 24 06:29:01 2004 @@ -32,6 +32,7 @@ #include #include #include +#include /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -67,6 +68,7 @@ enum pid_directory_inos { PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, #endif + PROC_TGID_VINFO, PROC_TGID_FD_DIR, PROC_TID_INO, PROC_TID_STATUS, @@ -90,6 +92,7 @@ enum pid_directory_inos { PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, #endif + PROC_TID_VINFO, PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -123,6 +126,7 @@ static struct pid_entry tgid_base_stuff[ #ifdef CONFIG_KALLSYMS E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), #endif + E(PROC_TGID_VINFO, "vinfo", S_IFREG|S_IRUGO), {0,0,NULL,0} }; static struct pid_entry tid_base_stuff[] = { @@ -145,6 +149,7 @@ static struct pid_entry tid_base_stuff[] #ifdef CONFIG_KALLSYMS E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), #endif + E(PROC_TID_VINFO, "vinfo", S_IFREG|S_IRUGO), {0,0,NULL,0} }; @@ -181,6 +186,7 @@ int proc_pid_stat(struct task_struct*,ch int proc_pid_status(struct task_struct*,char*); int proc_pid_statm(struct task_struct*,char*); int proc_pid_cpu(struct task_struct*,char*); +// int proc_pid_vinfo(struct task_struct*,char*); static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { @@ -963,6 +969,7 @@ static struct inode *proc_pid_make_inode inode->i_uid = task->euid; inode->i_gid = task->egid; } + // inode->i_xid = vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -1392,6 +1399,11 @@ static struct dentry *proc_pident_lookup ei->op.proc_read = proc_pid_wchan; break; #endif + case PROC_TID_VINFO: + case PROC_TGID_VINFO: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_vinfo; + break; default: printk("procfs: impossible type (%d)",p->type); iput(inode); @@ -1584,6 +1596,10 @@ struct dentry *proc_pid_lookup(struct in if (!task) goto out; + if (tgid != 1 && !vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) { + put_task_struct(task); + goto out; + } inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); @@ -1691,6 +1707,10 @@ static int get_tgid_list(int index, unsi for ( ; p != &init_task; p = next_task(p)) { int tgid = p->pid; if (!pid_alive(p)) + continue; + if (tgid != 1 && !vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT)) + continue; + if (current->vx_info && current->vx_info->vx_initpid == tgid) continue; if (--index >= 0) continue; diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/generic.c linux-2.6.2-rc1-vs0.05.1/fs/proc/generic.c --- linux-2.6.2-rc1/fs/proc/generic.c Fri Jan 9 08:00:12 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/generic.c Sat Jan 24 05:45:59 2004 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -349,6 +350,8 @@ struct dentry *proc_lookup(struct inode for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_weak_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { int ino = de->low_ino; error = -EINVAL; @@ -424,9 +427,12 @@ int proc_readdir(struct file * filp, } do { + if (!vx_weak_check(0, de->vx_flags)) + goto skip; if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; + skip: filp->f_pos++; de = de->next; } while (de); @@ -538,6 +544,7 @@ static struct proc_dir_entry *proc_creat ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = VX_ADMIN; out: return ent; } @@ -558,7 +565,8 @@ struct proc_dir_entry *proc_symlink(cons kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = 0; } else { kfree(ent); ent = NULL; diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/inode.c linux-2.6.2-rc1-vs0.05.1/fs/proc/inode.c --- linux-2.6.2-rc1/fs/proc/inode.c Fri Jan 9 08:00:02 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/inode.c Sat Jan 24 05:45:59 2004 @@ -207,6 +207,8 @@ printk("proc_iget: using deleted entry % inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/root.c linux-2.6.2-rc1-vs0.05.1/fs/proc/root.c --- linux-2.6.2-rc1/fs/proc/root.c Fri Jan 9 07:59:55 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/root.c Sat Jan 24 05:45:59 2004 @@ -23,6 +23,9 @@ struct proc_dir_entry *proc_net, *proc_b #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static struct super_block *proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -78,6 +81,7 @@ void __init proc_root_init(void) proc_rtas_init(); #endif proc_bus = proc_mkdir("bus", 0); + proc_vx_init(); } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/virtual.c linux-2.6.2-rc1-vs0.05.1/fs/proc/virtual.c --- linux-2.6.2-rc1/fs/proc/virtual.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/virtual.c Sat Jan 24 06:42:17 2004 @@ -0,0 +1,548 @@ +/* + * linux/fs/proc/virtual.c + * + * Virtual Context Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 adaptation vs1.3.0 + * V0.03 proc permissions + * V0.04 locking/generic + * V0.05 next generation procfs + * V0.06 inode validation + * + */ + +#include + +#include +#include +#include + +#include + + +static struct proc_dir_entry *proc_virtual; + +enum xid_directory_inos { + PROC_XID_INO = 32, + PROC_XID_INFO, + PROC_XID_STATUS, + PROC_XID_LIMIT, +}; + + + +/* first the actual feeds */ + + +static int proc_virtual_info(struct vx_info *vxi, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF); +} + + +int proc_xid_info (struct vx_info *vxi, char *buffer) +{ + return sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid); +} + +int proc_xid_status (struct vx_info *vxi, char *buffer) +{ + return sprintf(buffer, + "RefC:\t%d\n" + "Flags:\t%08x\n" + "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_refcount) + ,vxi->vx_flags + ,atomic_read(&vxi->limit.ticks)); +} + +int proc_xid_limit (struct vx_info *vxi, char *buffer) +{ + return sprintf(buffer, + "PROC:\t%8d/%ld\n" + "VM:\t%8d/%ld\n" + "VML:\t%8d/%ld\n" + "RSS:\t%8d/%ld\n" + ,atomic_read(&vxi->limit.res[RLIMIT_NPROC]) + ,vxi->limit.rlim[RLIMIT_NPROC] + ,atomic_read(&vxi->limit.res[RLIMIT_AS]) + ,vxi->limit.rlim[RLIMIT_AS] + ,atomic_read(&vxi->limit.res[RLIMIT_MEMLOCK]) + ,vxi->limit.rlim[RLIMIT_MEMLOCK] + ,atomic_read(&vxi->limit.res[RLIMIT_RSS]) + ,vxi->limit.rlim[RLIMIT_RSS]); +} + + + + +/* here the inode helpers */ + + + +#define fake_ino(xid,ino) (((xid)<<16)|(ino)) + +#define MAX_MULBY10 ((~0U-9)/10) + + +static struct inode *proc_xid_make_inode(struct super_block * sb, + struct vx_info *vxi, int ino) +{ + struct inode *inode = new_inode(sb); + xid_t xid = (vxi)?vxi->vx_id:1; + + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(xid, ino); + + inode->u.generic_ip = vxi; /* reference from above */ + inode->i_uid = 0; + inode->i_gid = 0; + // inode->i_xid = xid; +out: + return inode; +} + +void proc_xid_delete_inode(struct inode *inode) +{ + struct vx_info *vxi = (struct vx_info *)inode->u.generic_ip; + + if (vxi) + put_vx_info(vxi); +} + +static int proc_xid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct vx_info *vxi = (struct vx_info *)dentry->d_inode->u.generic_ip; + + if (atomic_read(&vxi->limit.res[RLIMIT_NPROC])) + return 1; + return 0; +} + + + +static int proc_xid_delete_dentry(struct dentry * dentry) +{ + return 1; +} + + + +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t proc_xid_info_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + ssize_t end; + struct vx_info *vxi = + (struct vx_info *)inode->u.generic_ip; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + length = PROC_I(inode)->op.proc_xid_read(vxi, (char*)page); + + if (length < 0) { + free_page(page); + return length; + } + /* Static 4kB (or whatever) block capacity */ + if (*ppos >= length) { + free_page(page); + return 0; + } + if (count + *ppos > length) + count = length - *ppos; + end = count + *ppos; + copy_to_user(buf, (char *) page + *ppos, count); + *ppos = end; + free_page(page); + return count; +} + + + + + +/* here comes the lower level (xid) */ + +static struct file_operations proc_xid_info_file_operations = { + read: proc_xid_info_read, +}; + + +struct xid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct xid_entry base_stuff[] = { + E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + +static struct dentry *proc_xid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + struct vx_info *vxi; + struct xid_entry *p; + int error; + + error = -ENOENT; + inode = NULL; + + for (p = base_stuff; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + vxi = get_vx_info((struct vx_info *)dir->u.generic_ip); + if (!vxi) + goto out; + + error = -EINVAL; + inode = proc_xid_make_inode(dir->i_sb, vxi, p->type); + if (!inode) + goto out_release; + + switch(p->type) { + case PROC_XID_INFO: + PROC_I(inode)->op.proc_xid_read = proc_xid_info; + break; + case PROC_XID_STATUS: + PROC_I(inode)->op.proc_xid_read = proc_xid_status; + break; + case PROC_XID_LIMIT: + PROC_I(inode)->op.proc_xid_read = proc_xid_limit; + break; + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); + return ERR_PTR(-EINVAL); + } + inode->i_mode = p->mode; +// inode->i_op = &proc_xid_info_inode_operations; + inode->i_fop = &proc_xid_info_file_operations; + inode->i_nlink = 1; + inode->i_flags|=S_IMMUTABLE; + +// dentry->d_op = &proc_xid_dentry_operations; + d_add(dentry, inode); + return NULL; + +out_release: + put_vx_info(vxi); +out: + return ERR_PTR(error); +} + + +static int proc_xid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + int i, xid; + struct inode *inode = filp->f_dentry->d_inode; + struct vx_info *vxi = (struct vx_info *)inode->u.generic_ip; + struct xid_entry *p; + + xid = vxi->vx_id; + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, + inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, + PROC_ROOT_INO, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + if (i>=sizeof(base_stuff)/sizeof(base_stuff[0])) + return 1; + p = base_stuff + i; + while (p->name) { + if (filldir(dirent, p->name, p->len, + filp->f_pos, fake_ino(xid, p->type), + p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + return 1; +} + + + + +/* now the upper level (virtual) */ + +static struct file_operations proc_xid_file_operations = { + read: generic_read_dir, + readdir: proc_xid_readdir, +}; + +static struct inode_operations proc_xid_inode_operations = { + lookup: proc_xid_lookup, +}; + +static struct dentry_operations proc_xid_dentry_operations = +{ + d_revalidate: proc_xid_revalidate, + d_delete: proc_xid_delete_dentry, +}; + + + +struct dentry *proc_virtual_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int xid, c; + struct vx_info *vxi; + const char *name; + struct inode *inode; + int len; + + xid = 0; + name = dentry->d_name.name; + len = dentry->d_name.len; + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_XID_INO); + inode->u.generic_ip = NULL; + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_xid_make_inode(dir->i_sb, NULL, PROC_XID_INFO); + if (!inode) + return ERR_PTR(-ENOMEM); + inode->i_fop = &proc_xid_info_file_operations; + PROC_I(inode)->op.proc_xid_read = proc_virtual_info; + inode->i_mode = S_IFREG|S_IRUGO; +// inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + + while (len-- > 0) { + c = *name - '0'; + name++; + if (c > 9) + goto out; + if (xid >= MAX_MULBY10) + goto out; + xid *= 10; + xid += c; + if (!xid) + goto out; + } + + vxi = find_vx_info(xid); + if (!vxi) + goto out; + + inode = NULL; + if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + inode = proc_xid_make_inode(dir->i_sb, + vxi, PROC_XID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_xid_inode_operations; + inode->i_fop = &proc_xid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_xid_dentry_operations; + d_add(dentry, inode); + return NULL; + +out_release: + put_vx_info(vxi); +out: + return ERR_PTR(-ENOENT); +} + + + +#define PROC_NUMBUF 10 +#define PROC_MAXXIDS 32 + + +static int get_xid_list(int index, unsigned int *xids) +{ + struct vx_info *p; + int nr_xids = 0; + + index--; + spin_lock(&vxlist_lock); + list_for_each_entry(p, &vx_infos, vx_list) { + int xid = p->vx_id; + + if (--index >= 0) + continue; + xids[nr_xids] = xid; + if (++nr_xids >= PROC_MAXXIDS) + break; + } + spin_unlock(&vxlist_lock); + return nr_xids; +} + +int proc_virtual_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int xid_array[PROC_MAXXIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_xids, i; + ino_t ino; + + switch (filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_XID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_XID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (current->xid > 1) { + ino = fake_ino(1, PROC_XID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + default: + } + + nr_xids = get_xid_list(nr, xid_array); + + for (i = 0; i < nr_xids; i++) { + int xid = xid_array[i]; + ino_t ino = fake_ino(xid, PROC_XID_INO); + unsigned long j = PROC_NUMBUF; + + do buf[--j] = '0' + (xid % 10); while (xid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_virtual_dir_operations = { + read: generic_read_dir, + readdir: proc_virtual_readdir, +}; + +static struct inode_operations proc_virtual_dir_inode_operations = { + lookup: proc_virtual_lookup, +}; + + + + + + + +void proc_vx_init(void) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir("virtual", 0); + if (ent) { + ent->proc_fops = &proc_virtual_dir_operations; + ent->proc_iops = &proc_virtual_dir_inode_operations; + } + proc_virtual = ent; +} + + + + +/* per pid info */ + + +char *task_vinfo(struct task_struct *p, char *buffer) +{ + return buffer + sprintf(buffer, + "XID:\t%d\n" + ,p->xid); +} + +int proc_pid_vinfo(struct task_struct *p, char *buffer) +{ + char * orig = buffer; + + buffer = task_vinfo(p, buffer); + return buffer - orig; +} + diff -NurpP --minimal linux-2.6.2-rc1/fs/proc/virtual_old.c linux-2.6.2-rc1-vs0.05.1/fs/proc/virtual_old.c --- linux-2.6.2-rc1/fs/proc/virtual_old.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/fs/proc/virtual_old.c Sat Jan 24 05:45:59 2004 @@ -0,0 +1,179 @@ +/* + * linux/fs/proc/virtual.c + * + * Virtual Context ProcFS Support + * + * Copyright (C) 2003 Herbert Pötzl + * + * V0.01 basic directory array + * V0.02 per context info & stat + * V0.03 proc permissions + * + */ + +#include + +#include +#include +#include + +#include +#include +#include + + +extern struct proc_dir_entry *proc_virtual; +static struct proc_dir_entry *proc_virtual_info; + + +char *task_vinfo(struct task_struct *p, char *buffer) +{ + return buffer + sprintf(buffer, + "VxID:\t%d\n" + ,p->vx_id); +} + +int proc_pid_vinfo(struct task_struct *p, char *buffer) +{ + char * orig = buffer; + + buffer = task_vinfo(p, buffer); + return buffer - orig; +} + + +static int __generic_info_read_func(char *page, char **start, + off_t off, int count, int *eof, void *data, + char *(*info_func)(void *, char *)) +{ + int len; + char *buffer = page; + + buffer = info_func(data, buffer); + + len = buffer-page; + if (len <= off+count) *eof = 1; + + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +char *vx_proc_info (void *data, char *buffer) +{ + struct vx_info *vxi = data; + buffer += sprintf(buffer, + "VxID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid); + return buffer; +} + +int vx_info_read_func (char *page, char **start, + off_t off, int count, int *eof, void *data) +{ + return __generic_info_read_func(page, start, + off, count, eof, data, vx_proc_info); +} + +char *vx_proc_status (void *data, char *buffer) +{ + struct vx_info *vxi = data; + buffer += sprintf(buffer, + "RefC:\t%d\n" + "Flags:\t%08x\n" + "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_refcount) + ,vxi->vx_flags + ,atomic_read(&vxi->limit.ticks)); + return buffer; +} + +int vx_status_read_func (char *page, char **start, + off_t off, int count, int *eof, void *data) +{ + return __generic_info_read_func(page, start, + off, count, eof, data, vx_proc_status); +} + + +static int vx_proc_permission(struct inode *inode, + int mask, struct nameidata *nd) +{ + vxdprintk("vx_proc_permission(%p) = #%d,%04x\n", + inode, inode->i_xid, PROC_I(inode)->vx_flags); + if (vx_check(inode->i_xid, PROC_I(inode)->vx_flags)) + return 0; + vxdprintk("vx_proc_permission(%p) #%d != #%d\n", + inode, inode->i_xid, vx_current_id()); + return -ENOENT; +} + +static struct inode_operations vx_proc_inode_operations = { + .lookup = proc_lookup, + .permission = vx_proc_permission, +}; + + +int vx_proc_create(struct vx_info *vxi) +{ + struct proc_dir_entry *entry, *sub; + char name[8]; + + snprintf(name, sizeof(name)-1, "%d", vxi->vx_id); + entry = create_proc_entry(name, + S_IFDIR|S_IXUGO, proc_virtual); + entry->vx_flags = VX_ADMIN|VX_WATCH|VX_IDENT; + entry->xid = vxi->vx_id; + entry->proc_iops = &vx_proc_inode_operations; + vxi->vx_procent = entry; + sub = create_proc_read_entry("info", + S_IFREG|S_IRUGO|S_IWUSR, + entry, vx_info_read_func, vxi); + sub = create_proc_read_entry("status", + S_IFREG|S_IRUGO|S_IWUSR, + entry, vx_status_read_func, vxi); + return 0; +} + +int vx_proc_destroy(struct vx_info *vxi) +{ + struct proc_dir_entry *entry = vxi->vx_procent; + if (!entry) + return 0; + remove_proc_entry(entry->name, proc_virtual); + vxi->vx_procent = NULL; + return 0; +} + +char *vs_proc_info(void *data, char *buffer) +{ + buffer += sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF); + return buffer; +} + +int vs_info_read_func(char *page, char **start, + off_t off, int count, int *eof, void *data) +{ + return __generic_info_read_func(page, start, + off, count, eof, data, vs_proc_info); +} + + +static int __init virtual_proc_init(void) +{ + proc_virtual_info = create_proc_read_entry("info", + S_IFREG|S_IRUGO|S_IWUSR, + proc_virtual, vs_info_read_func, NULL); + return 0; +} + +__initcall(virtual_proc_init); diff -NurpP --minimal linux-2.6.2-rc1/fs/reiserfs/ioctl.c linux-2.6.2-rc1-vs0.05.1/fs/reiserfs/ioctl.c --- linux-2.6.2-rc1/fs/reiserfs/ioctl.c Fri Jan 9 07:59:26 2004 +++ linux-2.6.2-rc1-vs0.05.1/fs/reiserfs/ioctl.c Sat Jan 24 06:14:24 2004 @@ -47,7 +47,8 @@ int reiserfs_ioctl (struct inode * inode if (get_user(flags, (int *) arg)) return -EFAULT; - if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && + if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & + ( REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) && !capable( CAP_LINUX_IMMUTABLE ) ) return -EPERM; diff -NurpP --minimal linux-2.6.2-rc1/include/asm-alpha/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-alpha/unistd.h --- linux-2.6.2-rc1/include/asm-alpha/unistd.h Fri Jan 9 07:59:26 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-alpha/unistd.h Sat Jan 24 06:45:48 2004 @@ -233,6 +233,7 @@ #define __NR_osf_memcntl 260 /* not implemented */ #define __NR_osf_fdatasync 261 /* not implemented */ +#define __NR_vserver 273 /* * Linux-specific system calls begin at 300 diff -NurpP --minimal linux-2.6.2-rc1/include/asm-m68k/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-m68k/unistd.h --- linux-2.6.2-rc1/include/asm-m68k/unistd.h Fri Jan 9 07:59:33 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-m68k/unistd.h Sat Jan 24 06:45:48 2004 @@ -239,7 +239,9 @@ #define __NR_fremovexattr 234 #define __NR_futex 235 -#define NR_syscalls 236 +#define __NR_vserver 273 + +#define NR_syscalls 274 /* user-visible error numbers are in the range -1 - -124: see */ diff -NurpP --minimal linux-2.6.2-rc1/include/asm-m68knommu/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-m68knommu/unistd.h --- linux-2.6.2-rc1/include/asm-m68knommu/unistd.h Fri Jan 9 07:59:41 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-m68knommu/unistd.h Sat Jan 24 06:45:48 2004 @@ -221,7 +221,9 @@ #define __NR_setfsuid32 215 #define __NR_setfsgid32 216 -#define NR_syscalls 256 +#define __NR_vserver 273 + +#define NR_syscalls 274 /* user-visible error numbers are in the range -1 - -122: see */ diff -NurpP --minimal linux-2.6.2-rc1/include/asm-mips/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-mips/unistd.h --- linux-2.6.2-rc1/include/asm-mips/unistd.h Fri Jan 9 07:59:05 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-mips/unistd.h Sat Jan 24 06:45:48 2004 @@ -289,10 +289,12 @@ #define __NR_tgkill (__NR_Linux + 266) #define __NR_utimes (__NR_Linux + 267) +#define __NR_vserver (__NR_Linux + 273) + /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 267 +#define __NR_Linux_syscalls 273 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ diff -NurpP --minimal linux-2.6.2-rc1/include/asm-parisc/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-parisc/unistd.h --- linux-2.6.2-rc1/include/asm-parisc/unistd.h Fri Jan 9 07:59:03 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-parisc/unistd.h Sat Jan 24 06:45:48 2004 @@ -722,8 +722,9 @@ #define __NR_remap_file_pages (__NR_Linux + 227) #define __NR_semtimedop (__NR_Linux + 228) +#define __NR_vserver (__NR_Linux + 273) -#define __NR_Linux_syscalls 228 +#define __NR_Linux_syscalls 273 #define HPUX_GATEWAY_ADDR 0xC0000004 #define LINUX_GATEWAY_ADDR 0x100 diff -NurpP --minimal linux-2.6.2-rc1/include/asm-ppc/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-ppc/unistd.h --- linux-2.6.2-rc1/include/asm-ppc/unistd.h Sat Jan 24 03:18:18 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-ppc/unistd.h Sat Jan 24 06:45:48 2004 @@ -261,7 +261,9 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 -#define __NR_syscalls 256 +#define __NR_vserver 273 + +#define __NR_syscalls 274 #define __NR(n) #n diff -NurpP --minimal linux-2.6.2-rc1/include/asm-ppc64/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-ppc64/unistd.h --- linux-2.6.2-rc1/include/asm-ppc64/unistd.h Sat Jan 24 03:18:18 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-ppc64/unistd.h Sat Jan 24 06:45:48 2004 @@ -267,7 +267,9 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 -#define __NR_syscalls 256 +#define __NR_vserver 273 + +#define __NR_syscalls 274 #ifdef __KERNEL__ #define NR_syscalls __NR_syscalls #endif diff -NurpP --minimal linux-2.6.2-rc1/include/asm-s390/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-s390/unistd.h --- linux-2.6.2-rc1/include/asm-s390/unistd.h Sat Jan 24 03:18:18 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-s390/unistd.h Sat Jan 24 06:45:48 2004 @@ -256,9 +256,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* - * Number 263 is reserved for vserver - */ +#define __NR_vserver 263 #define __NR_fadvise64_64 264 #define NR_syscalls 265 diff -NurpP --minimal linux-2.6.2-rc1/include/asm-sparc/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-sparc/unistd.h --- linux-2.6.2-rc1/include/asm-sparc/unistd.h Fri Jan 9 07:59:08 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-sparc/unistd.h Sat Jan 24 06:45:48 2004 @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 268 #define __NR_io_submit 269 diff -NurpP --minimal linux-2.6.2-rc1/include/asm-sparc64/unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-sparc64/unistd.h --- linux-2.6.2-rc1/include/asm-sparc64/unistd.h Fri Jan 9 07:59:10 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-sparc64/unistd.h Sat Jan 24 06:45:48 2004 @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 268 #define __NR_io_submit 269 diff -NurpP --minimal linux-2.6.2-rc1/include/asm-x86_64/ia32_unistd.h linux-2.6.2-rc1-vs0.05.1/include/asm-x86_64/ia32_unistd.h --- linux-2.6.2-rc1/include/asm-x86_64/ia32_unistd.h Fri Jan 9 07:59:45 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/asm-x86_64/ia32_unistd.h Sat Jan 24 06:45:48 2004 @@ -278,6 +278,7 @@ #define __NR_ia32_tgkill 270 #define __NR_ia32_utimes 271 #define __NR_ia32_fadvise64_64 272 +#define __NR_ia32_vserver 273 #define IA32_NR_syscalls 275 /* must be > than biggest syscall! */ diff -NurpP --minimal linux-2.6.2-rc1/include/linux/capability.h linux-2.6.2-rc1-vs0.05.1/include/linux/capability.h --- linux-2.6.2-rc1/include/linux/capability.h Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/capability.h Sat Jan 24 06:21:35 2004 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t; /* Allow enabling/disabling tagged queuing on SCSI controllers and sending arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -283,6 +284,15 @@ typedef __u32 kernel_cap_t; /* Allow taking of leases on files */ #define CAP_LEASE 28 + +/* Allow quotactl */ + +#define CAP_QUOTACTL 29 + +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 30 #ifdef __KERNEL__ /* diff -NurpP --minimal linux-2.6.2-rc1/include/linux/ext2_fs.h linux-2.6.2-rc1-vs0.05.1/include/linux/ext2_fs.h --- linux-2.6.2-rc1/include/linux/ext2_fs.h Fri Jan 9 07:59:09 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/ext2_fs.h Sat Jan 24 06:14:24 2004 @@ -192,10 +192,12 @@ struct ext2_group_desc #define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL 0x04000000 /* chroot barrier */ +#define EXT2_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ -#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -#define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#define EXT2_FL_USER_VISIBLE 0x0c03DFFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x0c0380FF /* User modifiable flags */ /* * ioctl commands diff -NurpP --minimal linux-2.6.2-rc1/include/linux/ext3_fs.h linux-2.6.2-rc1-vs0.05.1/include/linux/ext3_fs.h --- linux-2.6.2-rc1/include/linux/ext3_fs.h Fri Jan 9 07:59:44 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/ext3_fs.h Sat Jan 24 06:14:24 2004 @@ -185,10 +185,12 @@ struct ext3_group_desc #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* chroot barrier */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#define EXT3_FL_USER_VISIBLE 0x0c03DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x0c0380FF /* User modifiable flags */ /* * Inode dynamic state flags diff -NurpP --minimal linux-2.6.2-rc1/include/linux/fs.h linux-2.6.2-rc1-vs0.05.1/include/linux/fs.h --- linux-2.6.2-rc1/include/linux/fs.h Sat Jan 24 03:18:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/fs.h Sat Jan 24 06:18:09 2004 @@ -2,9 +2,9 @@ #define _LINUX_FS_H /* - * This file has definitions for some important file table - * structures etc. - */ +* This file has definitions for some important file table +* structures etc. +*/ #include #include @@ -30,14 +30,14 @@ struct vm_area_struct; struct vfsmount; /* - * It's silly to have NR_OPEN bigger than NR_FILE, but you can change - * the file limit at runtime and only root can increase the per-process - * nr_file rlimit, so it's safe to set up a ridiculously high absolute - * upper limit on files-per-process. - * - * Some programs (notably those using select()) may have to be - * recompiled to take full advantage of the new limits.. - */ +* It's silly to have NR_OPEN bigger than NR_FILE, but you can change +* the file limit at runtime and only root can increase the per-process +* nr_file rlimit, so it's safe to set up a ridiculously high absolute +* upper limit on files-per-process. +* +* Some programs (notably those using select()) may have to be +* recompiled to take full advantage of the new limits.. +*/ /* Fixed constants first: */ #undef NR_OPEN @@ -49,16 +49,16 @@ struct vfsmount; /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { - int nr_files; /* read only */ - int nr_free_files; /* read only */ - int max_files; /* tunable */ +int nr_files; /* read only */ +int nr_free_files; /* read only */ +int max_files; /* tunable */ }; extern struct files_stat_struct files_stat; struct inodes_stat_t { - int nr_inodes; - int nr_unused; - int dummy[5]; +int nr_inodes; +int nr_unused; +int dummy[5]; }; extern struct inodes_stat_t inodes_stat; @@ -91,11 +91,11 @@ extern int leases_enable, dir_notify_ena #define FS_REQUIRES_DEV 1 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ + * as nfs_rename() will be cleaned up + */ /* - * These are the fs-independent mount-flags: up to 32 flags are supported - */ +* These are the fs-independent mount-flags: up to 32 flags are supported +*/ #define MS_RDONLY 1 /* Mount read-only */ #define MS_NOSUID 2 /* Ignore suid and sgid bits */ #define MS_NODEV 4 /* Disallow access to device special files */ @@ -116,14 +116,14 @@ extern int leases_enable, dir_notify_ena #define MS_NOUSER (1<<31) /* - * Superblock flags that can be altered by MS_REMOUNT - */ +* Superblock flags that can be altered by MS_REMOUNT +*/ #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|\ - MS_NODIRATIME) + MS_NODIRATIME) /* - * Old magic mount flag and mask - */ +* Old magic mount flag and mask +*/ #define MS_MGC_VAL 0xC0ED0000 #define MS_MGC_MSK 0xffff0000 @@ -137,6 +137,8 @@ extern int leases_enable, dir_notify_ena #define S_DEAD 32 /* removed, but still open directory */ #define S_NOQUOTA 64 /* Inode is not counted to quota */ #define S_DIRSYNC 128 /* Directory modifications are synchronous */ +#define S_BARRIER 256 /* chroot barrier */ +#define S_IUNLINK 512 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -164,11 +166,14 @@ extern int leases_enable, dir_notify_ena #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) +#define IS_BARRIER(inode) ((inode)->i_flags & S_BARRIER) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) /* the read-only stuff doesn't really belong here, but any other place is diff -NurpP --minimal linux-2.6.2-rc1/include/linux/init_task.h linux-2.6.2-rc1-vs0.05.1/include/linux/init_task.h --- linux-2.6.2-rc1/include/linux/init_task.h Fri Jan 9 07:59:08 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/init_task.h Sat Jan 24 07:24:16 2004 @@ -108,6 +108,10 @@ .proc_lock = SPIN_LOCK_UNLOCKED, \ .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ + .cap_bset = CAP_INIT_EFF_SET, \ + .xid = 0, \ + .vx_info = NULL, \ + .ip_info = NULL, \ } diff -NurpP --minimal linux-2.6.2-rc1/include/linux/ip.h linux-2.6.2-rc1-vs0.05.1/include/linux/ip.h --- linux-2.6.2-rc1/include/linux/ip.h Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/ip.h Sat Jan 24 05:46:08 2004 @@ -111,9 +111,11 @@ struct inet_opt { /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ + __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ __u16 dport; /* Destination port */ __u16 num; /* Local port */ __u32 saddr; /* Sending source */ +// __u32 saddr2; /* Second bound ipv4 addr, for ipv4root */ int uc_ttl; /* Unicast TTL */ int tos; /* TOS */ unsigned cmsg_flags; diff -NurpP --minimal linux-2.6.2-rc1/include/linux/proc_fs.h linux-2.6.2-rc1-vs0.05.1/include/linux/proc_fs.h --- linux-2.6.2-rc1/include/linux/proc_fs.h Sat Jan 24 03:18:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/proc_fs.h Sat Jan 24 05:45:59 2004 @@ -60,6 +60,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; unsigned long size; struct inode_operations * proc_iops; struct file_operations * proc_fops; @@ -237,12 +238,16 @@ extern void kclist_add(struct kcore_list extern struct kcore_list *kclist_del(void *); #endif +struct vx_info; + struct proc_inode { struct task_struct *task; int type; + int vx_flags; union { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_xid_read)(struct vx_info *vxi, char *page); } op; struct proc_dir_entry *pde; struct inode vfs_inode; diff -NurpP --minimal linux-2.6.2-rc1/include/linux/reiserfs_fs.h linux-2.6.2-rc1-vs0.05.1/include/linux/reiserfs_fs.h --- linux-2.6.2-rc1/include/linux/reiserfs_fs.h Fri Jan 9 08:00:02 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/reiserfs_fs.h Sat Jan 24 22:45:38 2004 @@ -879,6 +879,8 @@ struct stat_data_v1 /* we want common flags to have the same values as in ext2, so chattr(1) will work without problems */ #define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL +#define REISERFS_IUNLINK_FL EXT2_IUNLINK_FL +#define REISERFS_BARRIER_FL EXT2_BARRIER_FL #define REISERFS_APPEND_FL EXT2_APPEND_FL #define REISERFS_SYNC_FL EXT2_SYNC_FL #define REISERFS_NOATIME_FL EXT2_NOATIME_FL @@ -890,6 +892,7 @@ struct stat_data_v1 /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ + REISERFS_IUNLINK_FL | \ REISERFS_SYNC_FL | \ REISERFS_NOATIME_FL | \ REISERFS_NODUMP_FL | \ diff -NurpP --minimal linux-2.6.2-rc1/include/linux/sched.h linux-2.6.2-rc1-vs0.05.1/include/linux/sched.h --- linux-2.6.2-rc1/include/linux/sched.h Sat Jan 24 03:18:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/sched.h Sat Jan 24 05:57:44 2004 @@ -102,6 +102,7 @@ extern unsigned long nr_iowait(void); #include #include +#include #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 @@ -296,9 +297,10 @@ struct user_struct { /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + int vx_id; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -440,6 +442,12 @@ struct task_struct { void *security; +/* vserver data */ + kernel_cap_t cap_bset; + xid_t xid; + struct vx_info *vx_info; + struct ip_info *ip_info; + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; @@ -561,7 +569,7 @@ extern void set_special_pids(pid_t sessi extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); extern void free_uid(struct user_struct *); extern void switch_uid(struct user_struct *); diff -NurpP --minimal linux-2.6.2-rc1/include/linux/types.h linux-2.6.2-rc1-vs0.05.1/include/linux/types.h --- linux-2.6.2-rc1/include/linux/types.h Fri Jan 9 07:59:57 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/types.h Sat Jan 24 05:45:51 2004 @@ -37,6 +37,7 @@ typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vinline.h linux-2.6.2-rc1-vs0.05.1/include/linux/vinline.h --- linux-2.6.2-rc1/include/linux/vinline.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vinline.h Sat Jan 24 05:14:16 2004 @@ -0,0 +1,289 @@ +#ifndef _VX_INLINE_H +#define _VX_INLINE_H + + +// #define VX_DEBUG + +#include +#include + +#include +#include + +#if defined(VX_DEBUG) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + + +void free_vx_info(struct vx_info *); + +extern int proc_pid_vinfo(struct task_struct *, char *); + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__) + +static __inline__ struct vx_info *__get_vx_info(struct vx_info *vxi, const char *_file, int _line) +{ + /* for now we allow vxi to be null */ + if (!vxi) + return NULL; + vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n", vxi, + vxi->vx_id, atomic_read(&vxi->vx_refcount), + _file, _line); + atomic_inc(&vxi->vx_refcount); + return vxi; +} + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__) + +static __inline__ void __put_vx_info(struct vx_info *vxi, const char *_file, int _line) +{ + /* for now we allow vxi to be null */ + if (!vxi) + return; + vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n", vxi, + vxi->vx_id, atomic_read(&vxi->vx_refcount), + _file, _line); + if (atomic_dec_and_lock(&vxi->vx_refcount, &vxlist_lock)) { + list_del(&vxi->vx_list); + spin_unlock(&vxlist_lock); + free_vx_info(vxi); + } +} + +#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__) + +static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct vx_info *vxi; + + task_lock(p); + vxi = __get_vx_info(p->vx_info, _file, _line); + task_unlock(p); + return vxi; +} + + +#define vx_verify_info(p,i) \ + __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__) + +static __inline__ void __vx_verify_info( + struct vx_info *vxa, struct vx_info *vxb, + const char *_file, int _line) +{ + if (vxa == vxb) + return; + printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n", + vxa, vxb, _file, _line); +} + + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define vx_check(c,m) __vx_check(vx_current_xid(),c,m) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + +/* + * check current context for ADMIN/WATCH and + * optionally agains supplied argument + */ +static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && + (id == cid)) + return 1; + } + if (mode & VX_ATR_MASK) { + if ((mode & VX_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VX_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + return (((mode & VX_ADMIN) && (cid == 0)) || + ((mode & VX_WATCH) && (cid == 1))); +} + + + +void free_ip_info(struct ip_info *); + +#define get_ip_info(i) __get_ip_info(i,__FILE__,__LINE__) + +static __inline__ struct ip_info *__get_ip_info(struct ip_info *ipi, const char *_file, int _line) +{ + /* for now we allow vxi to be null */ + if (!ipi) + return NULL; + vxdprintk("get_ip_info(%p[%d])\t%s:%d\n", ipi, + atomic_read(&ipi->ip_refcount), _file, _line); + atomic_inc(&ipi->ip_refcount); + return ipi; +} + +#define put_ip_info(i) __put_ip_info(i,__FILE__,__LINE__) + +static __inline__ void __put_ip_info(struct ip_info *ipi, const char *_file, int _line) +{ + /* for now we allow vxi to be null */ + if (!ipi) + return; + vxdprintk("put_ip_info(%p[%d])\t%s:%d\n", ipi, + atomic_read(&ipi->ip_refcount), _file, _line); + if (atomic_dec_and_lock(&ipi->ip_refcount, &iplist_lock)) { + list_del(&ipi->ip_list); + spin_unlock(&iplist_lock); + free_ip_info(ipi); + } +} + +#define task_get_ip_info(i) __task_get_ip_info(i,__FILE__,__LINE__) + +static __inline__ struct ip_info *__task_get_ip_info(struct task_struct *p, + const char *_file, int _line) +{ + struct ip_info *ipi; + + task_lock(p); + ipi = __get_ip_info(p->ip_info, _file, _line); + task_unlock(p); + return ipi; +} + +#define ip_verify_info(p,i) \ + __ip_verify_info((p)->ip_info,i,__FILE__,__LINE__) + +static __inline__ void __ip_verify_info( + struct ip_info *ipa, struct ip_info *ipb, + const char *_file, int _line) +{ + if (ipa == ipb) + return; + printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n", + ipa, ipb, _file, _line); +} + + + +#define VX_DEBUG_ACC_RSS 0 +#define VX_DEBUG_ACC_VM 0 +#define VX_DEBUG_ACC_VML 0 + + +#define vx_acc_page(m, d, v, r) \ + __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__) + +static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (v) { + if (dir > 0) + ++(*v); + else + --(*v); + } + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.res[res]); + else + atomic_dec(&vxi->limit.res[res]); + } +} + + +#define vx_acc_pages(m, p, v, r) \ + __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__) + +static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi, + int res, int pages, char *file, int line) +{ + if ((pages > 1 || pages < -1) && + ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) || + (res == RLIMIT_AS && VX_DEBUG_ACC_VM) || + (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))) + vxdprintk("vx_acc_pages [%5d,%2d]: %5d += %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?atomic_read(&vxi->limit.res[res]):0), + pages, file, line); + if (pages == 0) + return; + if (v) + *v += pages; + if (vxi) + atomic_add(pages, &vxi->limit.res[res]); +} + + + +#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS) + +#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS) + +#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) __vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1) +#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1) +#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p) +#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p)) + + + +#define vx_pages_avail(m, p, r) \ + __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__) + +static inline int __vx_pages_avail(struct vx_info *vxi, + int res, int pages, char *file, int line) +{ + if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) || + (res == RLIMIT_AS && VX_DEBUG_ACC_VM) || + (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML)) + printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.res[res]):0), + pages, file, line); + if (!vxi) + return 1; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (vxi->limit.rlim[res] < atomic_read(&vxi->limit.res[res]) + pages) + return 0; + return 1; +} + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS) + +/* procfs ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/context.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/context.h --- linux-2.6.2-rc1/include/linux/vserver/context.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/context.h Sat Jan 24 06:06:06 2004 @@ -0,0 +1,134 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + + +#include + + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define VX_DYNAMIC_ID (-1UL) /* id for dynamic context */ + + +#include + +struct _vx_virt { + int nr_threads; + int nr_running; + int max_threads; + unsigned long total_forks; + + unsigned int bias_cswtch; + long bias_jiffies; + long bias_idle; + + struct new_utsname utsname; +}; + + +#include +#include +#include + +#include +#include + +struct vx_info { + struct list_head vx_list; /* linked list of contexts */ + xid_t vx_id; /* context id */ + atomic_t vx_refcount; /* refcount */ + struct vx_info *vx_parent; /* parent context */ + + struct proc_dir_entry *vx_procent; /* proc entry */ + unsigned int vx_flags; /* VX_INFO_xxx */ + pid_t vx_initpid; /* PID of fake init process */ + + struct _vx_virt virt; /* virtual/bias stuff */ + struct _vx_limit limit; /* vserver limits */ + struct _vx_sched sched; /* vserver scheduler */ + + char vx_name[65]; /* vserver name */ +}; + + +extern spinlock_t vxlist_lock; +extern struct list_head vx_infos; + + +#define VX_ADMIN 0x0001 +#define VX_WATCH 0x0002 +#define VX_DUMMY 0x0008 + +#define VX_IDENT 0x0010 +#define VX_EQUIV 0x0020 +#define VX_PARENT 0x0040 +#define VX_CHILD 0x0080 + +#define VX_ARG_MASK 0x00F0 + +#define VX_DYNAMIC 0x0100 +#define VX_STATIC 0x0200 + +#define VX_ATR_MASK 0x0F00 + + +void free_vx_info(struct vx_info *); + +extern struct vx_info *find_vx_info(int); +extern struct vx_info *find_or_create_vx_info(int); + + +#include + +/* vinfo commands */ + +#define VCMD_task_xid VC_CMD(VINFO, 1, 0) +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +extern int vc_task_xid(uint32_t, void *); + + +#define VCMD_vx_info VC_CMD(VINFO, 5, 0) +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_vx_info_v0 { + uint32_t xid; + uint32_t initpid; + /* more to come */ +}; + +extern int vc_vx_info(uint32_t, void *); + + +/* virtual host info names */ + +#define VCMD_vx_set_vhi_name VC_CMD(VHOST, 1, 0) +#define VCMD_vx_get_vhi_name VC_CMD(VHOST, 2, 0) + +extern int vc_set_vhi_name(uint32_t, void *); +extern int vc_get_vhi_name(uint32_t, void *); + +struct vcmd_vx_vhi_name_v0 { + uint32_t field; + char name[65]; +}; + + +enum vx_vhi_name_field { + VHIN_CONTEXT=0, + VHIN_SYSNAME, + VHIN_NODENAME, + VHIN_RELEASE, + VHIN_VERSION, + VHIN_MACHINE, + VHIN_DOMAINNAME, +}; + + +// EXPORT_SYMBOL_GPL(vxlist_lock); +// EXPORT_SYMBOL_GPL(vx_infos); + +// EXPORT_SYMBOL_GPL(find_vx_info); + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/inode.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/inode.h --- linux-2.6.2-rc1/include/linux/vserver/inode.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/inode.h Sat Jan 24 05:45:51 2004 @@ -0,0 +1,41 @@ +#ifndef _VX_INODE_H +#define _VX_INODE_H + + +#include + +/* inode vserver commands */ + +#define VCMD_get_iattr VC_CMD(INODE, 1, 0) +#define VCMD_set_iattr VC_CMD(INODE, 2, 0) + +struct vcmd_ctx_iattr_v0 { + /* device handle in id */ + uint64_t ino; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +#define IATTR_XID 0x01000000 + +#define IATTR_ADMIN 0x00000001 +#define IATTR_WATCH 0x00000002 +#define IATTR_HIDE 0x00000004 +#define IATTR_FLAGS 0x00000007 + +#define IATTR_BARRIER 0x00010000 +#define IATTR_IUNLINK 0x00020000 + + +extern int vc_get_iattr(uint32_t, void *); +extern int vc_set_iattr(uint32_t, void *); + + +/* inode ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/legacy.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/legacy.h --- linux-2.6.2-rc1/include/linux/vserver/legacy.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/legacy.h Sat Jan 24 05:14:16 2004 @@ -0,0 +1,57 @@ +#ifndef _VX_LEGACY_H +#define _VX_LEGACY_H + + +#include +#include + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } ip_mask_pair[NB_IPV4ROOT]; +}; + + +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_SCHED 2 /* All process in the vx_id */ + /* Contribute to the schedular */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define VX_DYNAMIC_ID (-1UL) /* id for dynamic context */ + +#define NB_S_CONTEXT 16 + +#define NB_IPV4ROOT 16 + + +extern int vc_new_s_context(uint32_t, void *); +extern int vc_set_ipv4root(uint32_t, void *); + + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/limit.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/limit.h --- linux-2.6.2-rc1/include/linux/vserver/limit.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/limit.h Sat Jan 24 05:54:14 2004 @@ -0,0 +1,49 @@ +#ifndef _VX_LIMIT_H +#define _VX_LIMIT_H + + +#include + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define CRLIM_UNSET (0ULL) +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + + +extern int vc_get_rlimit(uint32_t, void *); +extern int vc_set_rlimit(uint32_t, void *); +extern int vc_get_rlimit_mask(uint32_t, void *); + + +#include +#include + +/* context sub struct */ + +struct _vx_limit { + atomic_t ticks; + + unsigned long rlim[RLIM_NLIMITS]; /* Per context limit */ + atomic_t res[RLIM_NLIMITS]; /* Current value */ +}; + + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/network.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/network.h --- linux-2.6.2-rc1/include/linux/vserver/network.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/network.h Sat Jan 24 05:46:08 2004 @@ -0,0 +1,43 @@ +#ifndef _VX_NETWORK_H +#define _VX_NETWORK_H + + +#define NB_IPV4ROOT 16 + +#include +#include +#include +#include +#include + + +struct ip_info { + struct list_head ip_list; /* linked list of ipinfos */ + atomic_t ip_refcount; + int nbipv4; + __u32 ipv4[NB_IPV4ROOT];/* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity when */ + /* binding */ + __u32 mask[NB_IPV4ROOT];/* Netmask for each ipv4 */ + /* Used to select the proper source address */ + /* for sockets */ + __u32 v4_bcast; /* Broadcast address used to receive UDP packets */ +}; + + +extern spinlock_t iplist_lock; +extern struct list_head ip_infos; + + +void free_ip_info(struct ip_info *); +struct ip_info *create_ip_info(void); + + +// EXPORT_SYMBOL_GPL(iplist_lock); +// EXPORT_SYMBOL_GPL(ip_infos); + +// EXPORT_SYMBOL_GPL(find_ip_info); + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/sched.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/sched.h --- linux-2.6.2-rc1/include/linux/vserver/sched.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/sched.h Sat Jan 24 06:12:29 2004 @@ -0,0 +1,42 @@ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + + +#include + +/* sched vserver commands */ + +#define VCMD_set_sched VC_CMD(SYSTEST, 1, 1) + +/* Options - these ones enable or disable the CTX_SCHED flag */ +#define TBF_SCHED_ENABLE 0x0001 +#define TBF_SCHED_DISABLE 0x0002 + +struct vcmd_set_sched_v1 { + uint32_t options; + + int32_t fill_rate; + int32_t period; + int32_t fill_level; + int32_t bucket_size; +}; + + +extern int vc_set_sched(uint32_t, void *); + +#include + +/* context sub struct */ + +struct _vx_sched { + spinlock_t tokens_lock; /* lock for this structure */ + + int tokens; /* number of CPU tokens in this context */ + int tokens_fr; /* Fill rate: add X tokens... */ + int tokens_div; /* Divisor: per Y jiffies */ + int tokens_max; /* Limit: no more than N tokens */ + uint32_t tokens_jfy; /* add an integral multiple of Y to this */ +}; + + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/signal.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/signal.h --- linux-2.6.2-rc1/include/linux/vserver/signal.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/signal.h Sat Jan 24 06:02:39 2004 @@ -0,0 +1,20 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + + +#include + +/* context signalling */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + + +extern int vc_ctx_kill(uint32_t, void *); + + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver/switch.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/switch.h --- linux-2.6.2-rc1/include/linux/vserver/switch.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver/switch.h Sat Jan 24 05:14:16 2004 @@ -0,0 +1,82 @@ +#ifndef _LINUX_VIRTUAL_H +#define _LINUX_VIRTUAL_H + +#include + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.4 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION| | | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | | | | | | | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| | | | | | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | | | | |INODE | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER | | | | | | | |VINFO | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL| | | | | | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL| | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_VHOST 2 + +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_SCHED 14 +#define VC_CAT_INODE 38 + +#define VC_CAT_VINFO 46 + +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00010010 + + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) + + +#include + +#define ENOTSUP ENOTSUPP + + +// EXPORT_SYMBOL_GPL(sys_vserver); + + +#endif /* _LINUX_VIRTUAL_H */ diff -NurpP --minimal linux-2.6.2-rc1/include/linux/vserver.h linux-2.6.2-rc1-vs0.05.1/include/linux/vserver.h --- linux-2.6.2-rc1/include/linux/vserver.h Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/include/linux/vserver.h Sat Jan 24 05:14:16 2004 @@ -0,0 +1,8 @@ +#ifndef _LINUX_VSERVER_H +#define _LINUX_VSERVER_H + +#include +#include +#include + +#endif diff -NurpP --minimal linux-2.6.2-rc1/include/net/route.h linux-2.6.2-rc1-vs0.05.1/include/net/route.h --- linux-2.6.2-rc1/include/net/route.h Fri Jan 9 07:59:02 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/net/route.h Sat Jan 24 05:46:08 2004 @@ -33,6 +33,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -160,6 +161,45 @@ static inline int ip_route_connect(struc .dport = dport } } }; int err; + struct ip_info *ip_info = current->ip_info; + if (ip_info) { + __u32 ipv4root = ip_info->ipv4[0]; + if (ipv4root) { + int n = ip_info->nbipv4; + if (src == 0) { + if (n > 1) { + u32 foundsrc; + int i; + err = __ip_route_output_key(rp, &fl); + if (err) + return err; + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + for (i=0; imask[i]; + u32 ipv4 = ip_info->ipv4[i]; + u32 netipv4 = ipv4 & mask; + if ((foundsrc & mask) == netipv4) { + src = ipv4; + break; + } + } + } + if (src == 0) + src = dst == 0x0100007f + ? 0x0100007f: ipv4root; + } else { + int i; + for (i=0; iipv4[i] == src) break; + } + if (i == n) + return -EPERM; + } + if (dst == 0x0100007f && !vx_check(0, VX_ADMIN)) + dst = ipv4root; + } + } if (!dst || !src) { err = __ip_route_output_key(rp, &fl); if (err) diff -NurpP --minimal linux-2.6.2-rc1/include/net/sock.h linux-2.6.2-rc1-vs0.05.1/include/net/sock.h --- linux-2.6.2-rc1/include/net/sock.h Sat Jan 24 03:18:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/net/sock.h Sat Jan 24 05:46:08 2004 @@ -50,6 +50,7 @@ #include #include +#include #include #include @@ -109,6 +110,8 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + xid_t skc_xid; + struct ip_info *skc_ip_info; }; /** @@ -186,6 +189,8 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_xid __sk_common.skc_xid +#define sk_ip_info __sk_common.skc_ip_info volatile unsigned char sk_zapped; unsigned char sk_shutdown; unsigned char sk_use_write_queue; diff -NurpP --minimal linux-2.6.2-rc1/include/net/tcp.h linux-2.6.2-rc1-vs0.05.1/include/net/tcp.h --- linux-2.6.2-rc1/include/net/tcp.h Sat Jan 24 03:18:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/include/net/tcp.h Sat Jan 24 05:46:08 2004 @@ -195,6 +195,8 @@ struct tcp_tw_bucket { #define tw_node __tw_common.skc_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt +#define tw_xid __tw_common.skc_xid +#define tw_ip_info __tw_common.skc_ip_info volatile unsigned char tw_substate; unsigned char tw_rcv_wscale; __u16 tw_sport; diff -NurpP --minimal linux-2.6.2-rc1/kernel/Makefile linux-2.6.2-rc1-vs0.05.1/kernel/Makefile --- linux-2.6.2-rc1/kernel/Makefile Fri Jan 9 07:59:10 2004 +++ linux-2.6.2-rc1-vs0.05.1/kernel/Makefile Sat Jan 24 05:14:16 2004 @@ -8,6 +8,11 @@ obj-y = sched.o fork.o exec_domain.o signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o +# mod-subdirs := vserver + +subdir-y += vserver +obj-y += vserver/vserver.o + obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o diff -NurpP --minimal linux-2.6.2-rc1/kernel/sys.c linux-2.6.2-rc1-vs0.05.1/kernel/sys.c --- linux-2.6.2-rc1/kernel/sys.c Sat Jan 24 03:18:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/kernel/sys.c Sat Jan 24 06:15:34 2004 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -317,7 +318,7 @@ asmlinkage long sys_setpriority(int whic if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) goto out_unlock; @@ -376,7 +377,7 @@ asmlinkage long sys_getpriority(int whic if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) goto out_unlock; @@ -617,7 +618,7 @@ static int set_user(uid_t new_ruid, int { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(vx_current_xid(), new_ruid); if (!new_user) return -EAGAIN; diff -NurpP --minimal linux-2.6.2-rc1/kernel/user.c linux-2.6.2-rc1-vs0.05.1/kernel/user.c --- linux-2.6.2-rc1/kernel/user.c Fri Jan 9 07:59:26 2004 +++ linux-2.6.2-rc1-vs0.05.1/kernel/user.c Sat Jan 24 05:45:51 2004 @@ -20,8 +20,8 @@ #define UIDHASH_BITS 8 #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) static kmem_cache_t *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -46,7 +46,7 @@ static inline void uid_hash_remove(struc list_del(&up->uidhash_list); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent) { struct list_head *up; @@ -55,7 +55,7 @@ static inline struct user_struct *uid_ha user = list_entry(up, struct user_struct, uidhash_list); - if(user->uid == uid) { + if(user->uid == uid && user->vx_id == xid) { atomic_inc(&user->__count); return user; } @@ -64,9 +64,9 @@ static inline struct user_struct *uid_ha return NULL; } -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(xid_t xid, uid_t uid) { - return uid_hash_find(uid, uidhashentry(uid)); + return uid_hash_find(xid, uid, uidhashentry(xid, uid)); } void free_uid(struct user_struct *up) @@ -78,13 +78,13 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(xid, uid); struct user_struct *up; spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); spin_unlock(&uidhash_lock); if (!up) { @@ -94,6 +94,7 @@ struct user_struct * alloc_uid(uid_t uid if (!new) return NULL; new->uid = uid; + new->vx_id = xid; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -103,7 +104,7 @@ struct user_struct * alloc_uid(uid_t uid * on adding the same user already.. */ spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); if (up) { kmem_cache_free(uid_cachep, new); } else { @@ -148,7 +149,7 @@ static int __init uid_cache_init(void) /* Insert the root user immediately (init already runs as root) */ spin_lock(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(0,0)); spin_unlock(&uidhash_lock); return 0; diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/Makefile linux-2.6.2-rc1-vs0.05.1/kernel/vserver/Makefile --- linux-2.6.2-rc1/kernel/vserver/Makefile Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/Makefile Sat Jan 24 06:36:45 2004 @@ -0,0 +1,11 @@ +# +# Makefile for the Linux vserver routines. +# + + +obj-y += vserver.o + +vserver-y := switch.o context.o network.o inode.o limit.o signal.o + +vserver-y += legacy.o + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/context.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/context.c --- linux-2.6.2-rc1/kernel/vserver/context.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/context.c Sat Jan 24 06:04:27 2004 @@ -0,0 +1,302 @@ +/* + * linux/kernel/vserver/context.c + * + * Virtual Server: Context Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 context helper + * V0.02 vx_ctx_kill syscall command + * V0.03 replaced context_info calls + * V0.04 redesign of struct (de)alloc + * V0.05 rlimit basic implementation + * + */ + +#include +//#include +#include +#include +#include +//#include +#include +//#include +#include + +#include +//#include + + + +/* system functions */ + + +LIST_HEAD(vx_infos); + +spinlock_t vxlist_lock + __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + + +/* + * struct vx_info allocation and deallocation + */ + +static struct vx_info *alloc_vx_info(int id) +{ + struct vx_info *new = NULL; + int lim; + + vxdprintk("alloc_vx_info(%d)\n", id); + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct vx_info)); + new->vx_id = id; + INIT_LIST_HEAD(&new->vx_list); + /* rest of init goes here */ + + for (lim=0; limlimit.rlim[lim] = RLIM_INFINITY; + + /* scheduling; hard code starting values as constants */ + new->sched.tokens_fr = 1; + new->sched.tokens_div = 4; + new->sched.tokens = HZ * 5; + new->sched.tokens_max = HZ * 10; + new->sched.tokens_jfy = jiffies; + new->sched.tokens_lock = SPIN_LOCK_UNLOCKED; + + new->virt.nr_threads = 1; + // new->virt.bias_cswtch = kstat.context_swtch; + new->virt.bias_jiffies = jiffies; + /* new->virt.bias_idle = init_tasks[0]->times.tms_utime + + init_tasks[0]->times.tms_stime; + */ + down_read(&uts_sem); + new->virt.utsname = system_utsname; + up_read(&uts_sem); + + vxdprintk("alloc_vx_info(%d) = %p\n", id, new); + return new; +} + +void free_vx_info(struct vx_info *vxi) +{ + vxdprintk("free_vx_info(%p)\n", vxi); + kfree(vxi); +} + + +/* + * struct vx_info search by id + * assumes vxlist_lock is held + */ + +static __inline__ struct vx_info *__find_vx_info(int id) +{ + struct vx_info *vxi; + + list_for_each_entry(vxi, &vx_infos, vx_list) + if (vxi->vx_id == id) + return vxi; + return 0; +} + + +/* + * struct vx_info ref stuff + */ + +struct vx_info *find_vx_info(int id) +{ + struct vx_info *vxi; + + spin_lock(&vxlist_lock); + if ((vxi = __find_vx_info(id))) + get_vx_info(vxi); + spin_unlock(&vxlist_lock); + return vxi; +} + + +/* + * struct vx_info search by id + * assumes vxlist_lock is held + */ + +static __inline__ xid_t __vx_dynamic_id(void) +{ + static xid_t seq = MAX_S_CONTEXT; + xid_t barrier = seq; + + do { + if (++seq > MAX_S_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__find_vx_info(seq)) + return seq; + } while (barrier != seq); + return 0; +} + + +struct vx_info *find_or_create_vx_info(int id) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk("find_or_create_vx_info(%d)\n", id); + if (!(new = alloc_vx_info(id))) + return 0; + + spin_lock(&vxlist_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->vx_id = id; + } + /* existing context requested */ + else if ((vxi = __find_vx_info(id))) { + vxdprintk("find_or_create_vx_info(%d) = %p (found)\n", id, vxi); + get_vx_info(vxi); + goto out_unlock; + } + + /* new context requested */ + vxdprintk("find_or_create_vx_info(%d) = %p (new)\n", id, vxi); + atomic_set(&new->vx_refcount, 1); + list_add(&new->vx_list, &vx_infos); + vxi = new, new = NULL; + +out_unlock: + spin_unlock(&vxlist_lock); + if (new) + free_vx_info(new); + return vxi; +} + + +#include + + +int vc_task_xid(uint32_t id, void *data) +{ + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + switch (id) { + case 0: + break; + + case -1: + break; + + default: + break; + + } + return 0; +} + + +int vc_vx_info(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct vcmd_vx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* virtual host info names */ + +static char * vx_vhi_name(struct vx_info *vxi, int id) +{ + switch (id) { + case VHIN_CONTEXT: + return vxi->vx_name; + case VHIN_SYSNAME: + return vxi->virt.utsname.sysname; + case VHIN_NODENAME: + return vxi->virt.utsname.nodename; + case VHIN_RELEASE: + return vxi->virt.utsname.release; + case VHIN_VERSION: + return vxi->virt.utsname.version; + case VHIN_MACHINE: + return vxi->virt.utsname.machine; + case VHIN_DOMAINNAME: + return vxi->virt.utsname.domainname; + default: + } + return NULL; +} + +int vc_set_vhi_name(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct vcmd_vx_vhi_name_v0 vc_data; + char *name; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (name) + memcpy(name, vc_data.name, 65); + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +int vc_get_vhi_name(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct vcmd_vx_vhi_name_v0 vc_data; + char *name; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + goto out_put; + + memcpy(vc_data.name, name, 65); + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; +out_put: + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + + + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/inode.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/inode.c --- linux-2.6.2-rc1/kernel/vserver/inode.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/inode.c Sat Jan 24 07:54:13 2004 @@ -0,0 +1,174 @@ +/* + * linux/kernel/vserver/inode.c + * + * Virtual Server: File System Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 separated from vcontext V0.05 + * + */ + +#include +//#include +//#include +//#include +//#include +//#include +#include +#include +#include +//#include +#include + +#include +#include +//#include + + +int vc_get_iattr(uint32_t id, void *data) +{ + struct super_block *sb; + struct inode *in; + struct vcmd_ctx_iattr_v0 vc_data; + int ret; +#if 0 + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = -ESRCH; + sb = get_super(to_kdev_t(id)); + if (!sb) + goto out; + in = iget(sb, vc_data.ino); + if (!in) + goto out_drop_sb; + + vc_data.xid = in->i_xid; + vc_data.flags = IATTR_XID + | (IS_BARRIER(in) ? IATTR_BARRIER : 0) + | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0); + vc_data.mask = IATTR_XID | IATTR_BARRIER | IATTR_IUNLINK; + + if (sb->s_magic == PROC_SUPER_MAGIC) { + vc_data.flags |= (in->u.proc_i.vx_flags & IATTR_FLAGS); + vc_data.mask |= IATTR_FLAGS; + } + + ret = 0; + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + iput(in); +out_drop_sb: + drop_super(sb); +out: +#endif + return ret; +} + +int vc_set_iattr(uint32_t id, void *data) +{ + struct super_block *sb; + struct inode *in; + struct vcmd_ctx_iattr_v0 vc_data; + int ret; +#if 0 + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = -ESRCH; + sb = get_super(to_kdev_t(id)); + if (!sb) + goto out; + + ret = -ENOTSUP; + if ((vc_data.mask & IATTR_FLAGS) && (sb->s_magic != PROC_SUPER_MAGIC)) + goto out_drop_sb; + + ret = -ESRCH; + in = iget(sb, vc_data.ino); + if (!in) + goto out_drop_sb; + + lock_kernel(); + if (vc_data.mask & IATTR_XID) + in->i_xid = vc_data.xid; + + if (vc_data.mask & IATTR_FLAGS) { + unsigned int flags = in->u.proc_i.vx_flags; + unsigned int mask = vc_data.mask; + + in->u.proc_i.vx_flags = (flags & ~(mask & IATTR_FLAGS)) + | (vc_data.flags & IATTR_FLAGS); + } + + if (vc_data.mask & IATTR_BARRIER) + in->i_flags = (in->i_flags & ~S_BARRIER) + | ((vc_data.flags & IATTR_BARRIER) ? S_BARRIER : 0); + if (vc_data.mask & IATTR_IUNLINK) + in->i_flags = (in->i_flags & ~S_IUNLINK) + | ((vc_data.flags & IATTR_IUNLINK) ? S_IUNLINK : 0); + mark_inode_dirty(in); + unlock_kernel(); + iput(in); +out_drop_sb: + drop_super(sb); +out: +#endif + return ret; +} + + + +#include + +int vx_proc_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + struct proc_dir_entry *entry; + int error = 0; + int flags; + + if (inode->i_ino < PROC_DYNAMIC_FIRST || + inode->i_ino >= PROC_DYNAMIC_FIRST+PROC_NDYNAMIC) + return -ENOTTY; + + entry = PROC_I(inode)->pde; + + switch(cmd) { + case FIOC_GETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + flags = entry->vx_flags; + if (capable(CAP_CONTEXT)) + error = put_user(flags, (int *) arg); + break; + } + case FIOC_SETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -EFAULT; + if (get_user(flags, (int *) arg)) + break; + error = 0; + entry->vx_flags = flags; + break; + } + default: + return -ENOTTY; + } + return error; +} + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/legacy.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/legacy.c --- linux-2.6.2-rc1/kernel/vserver/legacy.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/legacy.c Sat Jan 24 05:14:16 2004 @@ -0,0 +1,211 @@ +/* + * linux/kernel/vserver/legacy.c + * + * Virtual Server: Legacy Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext.c V0.05 + * + */ + +#include +//#include +//#include +//#include +#include +#include +//#include +#include +#include +//#include + +#include +#include + + +static int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) +{ + struct user_struct *new_user, *old_user; + + if (!p || !vxi) + BUG(); + new_user = alloc_uid(vxi->vx_id, p->uid); + if (!new_user) + return -ENOMEM; + + old_user = p->user; + if (new_user != old_user) { + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + p->user = new_user; + } + free_uid(old_user); + return 0; +} + +/* + * migrate task to new context + * gets vxi, puts old_vxi on change + */ + +static int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) +{ + struct vx_info *old_vxi = task_get_vx_info(p); + int ret = 0; + + if (!p || !vxi) + BUG(); + + vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_refcount)); + spin_lock(&p->alloc_lock); + if (old_vxi == vxi) + goto out; + + if (!(ret = vx_migrate_user(p, vxi))) { + if (old_vxi) { + old_vxi->virt.nr_threads--; + atomic_dec(&old_vxi->limit.res[RLIMIT_NPROC]); + } + vxi->virt.nr_threads++; + atomic_inc(&vxi->limit.res[RLIMIT_NPROC]); + p->vx_info = get_vx_info(vxi); + p->xid = vxi->vx_id; + if (old_vxi) + put_vx_info(old_vxi); + } +out: + spin_unlock(&p->alloc_lock); + put_vx_info(old_vxi); + return ret; +} + + +static int vx_set_initpid(struct vx_info *vxi, int pid) +{ + int ret = 0; + if (vxi->vx_initpid) + ret = -EPERM; + else + vxi->vx_initpid = pid; + return ret; +} + +int vc_new_s_context(uint32_t ctx, void *data) +{ + int ret = -ENOMEM; + struct vcmd_new_s_context_v1 vc_data; + struct vx_info *new_vxi; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* legacy hack, will be removed soon */ + if (ctx == -2) { + /* assign flags and initpid */ + if (!current->vx_info) + return -EINVAL; + ret = 0; + if (vc_data.flags & VX_INFO_INIT) + ret = vx_set_initpid(current->vx_info, current->tgid); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->cap_bset &= (~vc_data.remove_cap); + ret = vx_current_xid(); + current->vx_info->vx_flags |= vc_data.flags; + } + return ret; + } + + if (!vx_check(0, VX_ADMIN) || + !capable(CAP_SYS_ADMIN) || + (current->vx_info && + (current->vx_info->vx_flags & VX_INFO_LOCK))) + return -EPERM; + + if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || + (ctx == 0)) + return -EINVAL; + + if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) + new_vxi = find_or_create_vx_info(ctx); + else + new_vxi = find_vx_info(ctx); + + if (!new_vxi) + return -EINVAL; + + ret = vx_migrate_task(current, new_vxi); + if (ret == 0) { + current->cap_bset &= (~vc_data.remove_cap); + new_vxi->vx_flags |= vc_data.flags; + if (vc_data.flags & VX_INFO_INIT) + vx_set_initpid(new_vxi, current->tgid); + if (vc_data.flags & VX_INFO_NPROC) + new_vxi->limit.rlim[RLIMIT_NPROC] = + current->rlim[RLIMIT_NPROC].rlim_max; + ret = new_vxi->vx_id; + } + put_vx_info(new_vxi); + return ret; +} + + + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void *data) +{ + int i, err = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct ip_info *new_ipi, *ipi = current->ip_info; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + return -EINVAL; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (!ipi || ipi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + // We are allowed to change everything + err = 0; + else if (ipi) { + int found = 0; + + // We are allowed to select a subset of the currently + // installed IP numbers. No new one allowed + // We can't change the broadcast address though + for (i=0; inbipv4; j++) { + if (ipip == ipi->ipv4[j]) { + found++; + break; + } + } + } + if ((found == nbip) && + (vc_data.broadcast == ipi->v4_bcast)) + err = 0; + } + if (err) + return err; + + new_ipi = create_ip_info(); + if (!new_ipi) + return -EINVAL; + + new_ipi->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.ip_mask_pair[i].ip; + new_ipi->mask[i] = vc_data.ip_mask_pair[i].mask; + } + new_ipi->v4_bcast = vc_data.broadcast; + current->ip_info = new_ipi; + put_ip_info(ipi); + return 0; +} + + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/limit.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/limit.c --- linux-2.6.2-rc1/kernel/vserver/limit.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/limit.c Sat Jan 24 05:54:03 2004 @@ -0,0 +1,116 @@ +/* + * linux/kernel/vserver/limit.c + * + * Virtual Server: Context Limits + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +//#include +//#include +//#include +#include +#include +#include +#include +//#include +//#include + +#include +#include + + +static int is_valid_rlimit(int id) +{ + int valid = 0; + + switch (id) { + case RLIMIT_NPROC: + case RLIMIT_AS: + case RLIMIT_RSS: + valid = 1; + break; + } + return valid; +} + +int vc_get_rlimit(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + if (vc_data.maximum != CRLIM_KEEP) + vc_data.maximum = vxi->limit.rlim[vc_data.id]; + vc_data.minimum = CRLIM_UNSET; + vc_data.softlimit = CRLIM_UNSET; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_rlimit(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + if (vc_data.maximum != CRLIM_KEEP) + vxi->limit.rlim[vc_data.id] = vc_data.maximum; + printk("setting [%d] = %d\n", vc_data.id, (int)vc_data.maximum); + put_vx_info(vxi); + + return 0; +} + +int vc_get_rlimit_mask(uint32_t id, void *data) +{ + static struct vcmd_ctx_rlimit_mask_v0 mask = { + /* minimum */ + 0 + , /* softlimit */ + 0 + , /* maximum */ + (1 << RLIMIT_NPROC) | + (1 << RLIMIT_AS) | + (1 << RLIMIT_RSS) + }; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_to_user(data, &mask, sizeof(mask))) + return -EFAULT; + return 0; +} + + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/network.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/network.c --- linux-2.6.2-rc1/kernel/vserver/network.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/network.c Sat Jan 24 05:46:08 2004 @@ -0,0 +1,83 @@ +/* + * linux/kernel/vserver/network.c + * + * Virtual Server: Network Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.05 rlimit basic implementation + * + */ + +#include +//#include +//#include +#include +#include +//#include +#include +//#include +//#include + +#include +//#include + + + +LIST_HEAD(ip_infos); + +spinlock_t iplist_lock + __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + + +/* + * struct ip_info allocation and deallocation + */ + +static struct ip_info *alloc_ip_info(void) +{ + struct ip_info *new = NULL; + + vxdprintk("alloc_ip_info()\n"); + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct ip_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct ip_info)); + /* rest of init goes here */ + + + vxdprintk("alloc_ip_info() = %p\n", new); + return new; +} + +// extern int ip_proc_destroy(struct ip_info *); + +void free_ip_info(struct ip_info *ipi) +{ + vxdprintk("free_ip_info(%p)\n", ipi); +// ip_proc_destroy(ipi); + kfree(ipi); +} + +struct ip_info *create_ip_info(void) +{ + struct ip_info *new; + + vxdprintk("create_ip_info()\n"); + if (!(new = alloc_ip_info())) + return 0; + + spin_lock(&iplist_lock); + + /* new ip info */ + atomic_set(&new->ip_refcount, 1); + list_add(&new->ip_list, &ip_infos); +// ip_proc_create(new); + + spin_unlock(&iplist_lock); + return new; +} + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/signal.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/signal.c --- linux-2.6.2-rc1/kernel/vserver/signal.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/signal.c Sat Jan 24 06:37:18 2004 @@ -0,0 +1,85 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include + +#include +#include + +#include +#include + + +int vc_ctx_kill(uint32_t id, void *data) +{ + int retval, count=0; + struct vcmd_ctx_kill_v0 vc_data; + struct siginfo info; + struct task_struct *p; + struct vx_info *vxi; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + info.si_signo = vc_data.sig; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->pid; + info.si_uid = current->uid; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = -ESRCH; + read_lock(&tasklist_lock); + switch (vc_data.pid) { + case -1: + case 0: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != id || p->pid <= 1 || + (vc_data.pid && vxi->vx_initpid == p->pid) || + !thread_group_leader(p)) + continue; + + err = send_sig_info(vc_data.sig, &info, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + default: + p = find_task_by_pid(vc_data.pid); + if (p) { + if (!thread_group_leader(p)) { + struct task_struct *tg; + + tg = find_task_by_pid(p->tgid); + if (tg) + p = tg; + } + if ((id == -1) || (vx_task_xid(p) == id)) + retval = send_sig_info(vc_data.sig, &info, p); + } + break; + } + read_unlock(&tasklist_lock); + put_vx_info(vxi); + return retval; +} + + diff -NurpP --minimal linux-2.6.2-rc1/kernel/vserver/switch.c linux-2.6.2-rc1-vs0.05.1/kernel/vserver/switch.c --- linux-2.6.2-rc1/kernel/vserver/switch.c Thu Jan 1 01:00:00 1970 +++ linux-2.6.2-rc1-vs0.05.1/kernel/vserver/switch.c Sat Jan 24 05:14:16 2004 @@ -0,0 +1,84 @@ +/* + * linux/kernel/vserver/switch.c + * + * Virtual Server: Syscall Switch + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * V0.03 added rlimit functions + * V0.04 added iattr, task/xid functions + * + */ + +#include +#include +#include + +#include + + +static inline int +vc_get_version(uint32_t id) +{ + return VCI_VERSION; +} + + +#include +#include +#include +#include +#include +#include + +extern asmlinkage int +sys_vserver(uint32_t cmd, uint32_t id, void *data) +{ + int ret = -ENOTSUP; + + switch (cmd) { + case VCMD_get_version: + ret = vc_get_version(id); + break; + + case VCMD_new_s_context: + ret = vc_new_s_context(id, data); + break; + case VCMD_set_ipv4root: + ret = vc_set_ipv4root(id, data); + break; + + case VCMD_get_rlimit: + ret = vc_get_rlimit(id, data); + break; + case VCMD_set_rlimit: + ret = vc_set_rlimit(id, data); + break; + case VCMD_get_rlimit_mask: + ret = vc_get_rlimit_mask(id, data); + break; + + case VCMD_ctx_kill: + ret = vc_ctx_kill(id, data); + break; + + case VCMD_get_iattr: + ret = vc_get_iattr(id, data); + break; + case VCMD_set_iattr: + ret = vc_set_iattr(id, data); + break; + + case VCMD_task_xid: + ret = vc_task_xid(id, data); + break; + case VCMD_vx_info: + ret = vc_vx_info(id, data); + break; + } + return ret; +} + + diff -NurpP --minimal linux-2.6.2-rc1/net/ipv4/af_inet.c linux-2.6.2-rc1-vs0.05.1/net/ipv4/af_inet.c --- linux-2.6.2-rc1/net/ipv4/af_inet.c Sat Jan 24 03:18:20 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/ipv4/af_inet.c Sat Jan 24 05:46:08 2004 @@ -158,6 +158,10 @@ void inet_sock_destruct(struct sock *sk) if (inet->opt) kfree(inet->opt); + + /* reordering required? */ + put_ip_info(sk->sk_ip_info); + sk->sk_ip_info = NULL; dst_release(sk->sk_dst_cache); #ifdef INET_REFCNT_DEBUG atomic_dec(&inet_sock_nr); @@ -397,6 +401,9 @@ static int inet_create(struct socket *so sk->sk_family = PF_INET; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + + sk->sk_xid = vx_current_xid(); + sk->sk_ip_info = NULL; inet->uc_ttl = -1; inet->mc_loop = 1; @@ -476,6 +483,10 @@ int inet_bind(struct socket *sock, struc unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; + __u32 s_addr2 = 0xffffffffl; /* Optional address of the socket */ + struct ip_info *ip_info; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { @@ -486,7 +497,37 @@ int inet_bind(struct socket *sock, struc if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = s_addr1 = addr->sin_addr.s_addr; + ip_info = current->ip_info; + if (ip_info) { + __u32 v4_bcast = ip_info->v4_bcast; + __u32 ipv4root = ip_info->ipv4[0]; + int nbipv4 = ip_info->nbipv4; + if (s_addr == 0) { + s_addr = ipv4root; + if (nbipv4 > 1) + s_addr1 = 0; + else { + s_addr1 = ipv4root; + ip_info = NULL; + } + s_addr2 = v4_bcast; + } else if (s_addr == 0x0100007f) { + s_addr = s_addr1 = ipv4root; + ip_info = NULL; + } else if (s_addr != v4_bcast + && s_addr != ipv4root) { + int i; + for (i=0; iipv4[i]) + break; + } + if (i == nbipv4) + return -EADDRNOTAVAIL; + ip_info = NULL; + } + } + chk_addr_ret = inet_addr_type(s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -498,7 +539,7 @@ int inet_bind(struct socket *sock, struc err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -523,13 +564,18 @@ int inet_bind(struct socket *sock, struc if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = s_addr1; + inet->rcv_saddr2 = s_addr2; + sk->sk_ip_info = get_ip_info(ip_info); + if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (sk->sk_prot->get_port(sk, snum)) { inet->saddr = inet->rcv_saddr = 0; + sk->sk_ip_info = NULL; + put_ip_info(ip_info); err = -EADDRINUSE; goto out_release_sock; } diff -NurpP --minimal linux-2.6.2-rc1/net/ipv4/devinet.c linux-2.6.2-rc1-vs0.05.1/net/ipv4/devinet.c --- linux-2.6.2-rc1/net/ipv4/devinet.c Sat Jan 24 03:18:20 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/ipv4/devinet.c Sat Jan 24 05:46:08 2004 @@ -487,6 +487,33 @@ static __inline__ int inet_abc_len(u32 a return rc; } +/* + Check that a device is not member of the ipv4root assigned to the process + Return true if this is the case + + If the process is not bound to specific IP, then it returns 0 (all + interface are fine). +*/ +static int devinet_notiproot (struct in_ifaddr *ifa) +{ + int ret = 0; + struct ip_info *info = current->ip_info; + + if (info && !vx_check(0, VX_ADMIN)) { + int i; + int nbip = info->nbipv4; + __u32 addr = ifa->ifa_local; + ret = 1; + for (i=0; iipv4[i] == addr) { + ret = 0; + break; + } + } + } + return ret; +} + int devinet_ioctl(unsigned int cmd, void *arg) { @@ -594,6 +621,8 @@ int devinet_ioctl(unsigned int cmd, void ret = -EADDRNOTAVAIL; if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) goto done; + if (ifa != NULL && devinet_notiproot(ifa)) + goto done; switch(cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -723,6 +752,8 @@ static int inet_gifconf(struct net_devic goto out; for (; ifa; ifa = ifa->ifa_next) { + if (devinet_notiproot(ifa)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -980,6 +1011,8 @@ static int inet_dump_ifaddr(struct sk_bu read_lock(&in_dev->lock); for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (devinet_notiproot(ifa)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, diff -NurpP --minimal linux-2.6.2-rc1/net/ipv4/raw.c linux-2.6.2-rc1-vs0.05.1/net/ipv4/raw.c --- linux-2.6.2-rc1/net/ipv4/raw.c Sat Jan 24 03:18:20 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/ipv4/raw.c Sat Jan 24 05:46:08 2004 @@ -102,6 +102,38 @@ static void raw_v4_unhash(struct sock *s write_unlock_bh(&raw_v4_lock); } + +/* + Check if an address is in the list +*/ +static inline int raw_addr_in_list ( + u32 rcv_saddr1, + u32 rcv_saddr2, + u32 loc_addr, + struct ip_info *ip_info) +{ + int ret = 0; + if (loc_addr != 0 && + (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr)) + ret = 1; + else if (rcv_saddr1 == 0) { + /* Accept any address or only the one in the list */ + if (ip_info == NULL) + ret = 1; + else { + int n = ip_info->nbipv4; + int i; + for (i=0; iipv4[i] == loc_addr) { + ret = 1; + break; + } + } + } + } + return ret; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif) @@ -113,7 +145,8 @@ struct sock *__raw_v4_lookup(struct sock if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_in_list(inet->rcv_saddr, inet->rcv_saddr2, + laddr, sk->sk_ip_info) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -687,7 +720,8 @@ static struct sock *raw_get_first(struct struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) goto found; } sk = NULL; diff -NurpP --minimal linux-2.6.2-rc1/net/ipv4/tcp_ipv4.c linux-2.6.2-rc1-vs0.05.1/net/ipv4/tcp_ipv4.c --- linux-2.6.2-rc1/net/ipv4/tcp_ipv4.c Fri Jan 9 07:59:19 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/ipv4/tcp_ipv4.c Sat Jan 24 05:46:08 2004 @@ -179,9 +179,52 @@ void tcp_bind_hash(struct sock *sk, stru tcp_sk(sk)->bind_hash = tb; } +/* + Return 1 if addr match the socket IP list + or the socket is INADDR_ANY +*/ +static inline int tcp_in_list (struct sock *sk, u32 addr) +{ + struct ip_info *ip_info = sk->sk_ip_info; + + if (ip_info) { + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + } + else if (!tcp_v4_rcv_saddr(sk) || tcp_v4_rcv_saddr(sk) == addr) + return 1; + return 0; +} + +/* + Check if the addresses in sk1 conflict with those in sk2 +*/ +int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2) +{ + if (tcp_v4_rcv_saddr(sk1)) { + /* Bind to one address only */ + return tcp_in_list (sk2, tcp_v4_rcv_saddr(sk1)); + } else if (sk1->sk_ip_info) { + /* A restricted bind(any) */ + struct ip_info *ip_info = sk1->sk_ip_info; + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i])) + return 1; + } else /* A bind(any) do not allow other bind on the same port */ + return 1; + return 0; +} + static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { - const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); +// const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -194,9 +237,8 @@ static inline int tcp_bind_conflict(stru sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) +// const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + if (tcp_ipv4_addr_conflict(sk, sk2)) break; } } @@ -405,6 +447,34 @@ void tcp_unhash(struct sock *sk) wake_up(&tcp_lhash_wait); } +/* + Check if an address is in the list +*/ +static inline int tcp_addr_in_list ( + u32 rcv_saddr, + u32 daddr, + struct ip_info *ip_info) +{ + if (rcv_saddr == daddr) + return 1; + else if (rcv_saddr == 0) { + /* Accept any address or check the list */ + if (!ip_info) + return 1; + else { + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i] == daddr) + return 1; + } + } + return 0; +} + + + /* Don't inline this cruft. Here are some nice properties to * exploit here. The BSD API does not allow a listening TCP * to specify the remote port nor the remote address for the @@ -426,11 +496,10 @@ static struct sock *__tcp_v4_lookup_list __u32 rcv_saddr = inet->rcv_saddr; score = (sk->sk_family == PF_INET ? 1 : 0); - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (tcp_addr_in_list(rcv_saddr, daddr, sk->sk_ip_info)) score+=2; - } + else + continue; if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; @@ -460,8 +529,8 @@ inline struct sock *tcp_v4_lookup_listen struct inet_opt *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + tcp_addr_in_list(inet->rcv_saddr, daddr, sk->sk_ip_info) && !sk->sk_bound_dev_if) goto sherry_cache; sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); diff -NurpP --minimal linux-2.6.2-rc1/net/ipv4/tcp_minisocks.c linux-2.6.2-rc1-vs0.05.1/net/ipv4/tcp_minisocks.c --- linux-2.6.2-rc1/net/ipv4/tcp_minisocks.c Fri Jan 9 07:59:55 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/ipv4/tcp_minisocks.c Sat Jan 24 05:46:08 2004 @@ -362,6 +362,9 @@ void tcp_time_wait(struct sock *sk, int tw->tw_ts_recent_stamp = tp->ts_recent_stamp; tw_dead_node_init(tw); + tw->tw_xid = sk->sk_xid; + tw->tw_ip_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -686,6 +689,7 @@ struct sock *tcp_create_openreq_child(st struct sk_filter *filter; memcpy(newsk, sk, sizeof(struct tcp_sock)); + newsk->sk_ip_info = get_ip_info(sk->sk_ip_info); newsk->sk_state = TCP_SYN_RECV; /* SANITY */ diff -NurpP --minimal linux-2.6.2-rc1/net/ipv4/udp.c linux-2.6.2-rc1-vs0.05.1/net/ipv4/udp.c --- linux-2.6.2-rc1/net/ipv4/udp.c Sat Jan 24 03:18:20 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/ipv4/udp.c Sat Jan 24 05:46:08 2004 @@ -120,6 +120,9 @@ rwlock_t udp_hash_lock = RW_LOCK_UNLOCKE /* Shared by v4/v6 udp. */ int udp_port_rover; +int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2); + + static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; @@ -179,9 +182,7 @@ gotit: (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && + tcp_ipv4_addr_conflict(sk2, sk) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -216,6 +217,17 @@ static void udp_v4_unhash(struct sock *s write_unlock_bh(&udp_hash_lock); } +static int udp_in_list (struct ip_info *ip_info, u32 addr) +{ + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + return 0; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -235,6 +247,11 @@ struct sock *udp_v4_lookup_longway(u32 s if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_ip_info) { + if (udp_in_list(sk->sk_ip_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -290,7 +307,8 @@ static inline struct sock *udp_v4_mcast_ if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -599,6 +617,18 @@ int udp_sendmsg(struct kiocb *iocb, stru .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct ip_info *ip_info = current->ip_info; + + if (ip_info != NULL) { + __u32 ipv4root = ip_info->ipv4[0]; + if (ipv4root) { + if (daddr == 0x0100007f && + !vx_check(0, VX_ADMIN)) + daddr = ipv4root; + if (fl.nl_u.ip4_u.saddr == 0) + fl.nl_u.ip4_u.saddr = ipv4root; + } + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; diff -NurpP --minimal linux-2.6.2-rc1/net/unix/af_unix.c linux-2.6.2-rc1-vs0.05.1/net/unix/af_unix.c --- linux-2.6.2-rc1/net/unix/af_unix.c Sat Jan 24 03:18:22 2004 +++ linux-2.6.2-rc1-vs0.05.1/net/unix/af_unix.c Sat Jan 24 05:46:08 2004 @@ -120,6 +120,7 @@ #include #include #include +#include int sysctl_unix_max_dgram_qlen = 10; @@ -480,6 +481,7 @@ static struct sock * unix_create1(struct sock_init_data(sock,sk); sk_set_owner(sk, THIS_MODULE); + sk->sk_xid = vx_current_xid(); sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; diff -NurpP --minimal linux-2.6.2-rc1/security/commoncap.c linux-2.6.2-rc1-vs0.05.1/security/commoncap.c --- linux-2.6.2-rc1/security/commoncap.c Sat Jan 24 03:18:22 2004 +++ linux-2.6.2-rc1-vs0.05.1/security/commoncap.c Sat Jan 24 07:15:57 2004 @@ -125,7 +125,7 @@ void cap_bprm_compute_creds (struct linu /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect (bprm->cap_permitted, current->cap_bset); working = cap_intersect (bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine (new_permitted, working);