diff -urNp linux-2.6.17.11/arch/alpha/kernel/module.c linux-2.6.17.11/arch/alpha/kernel/module.c --- linux-2.6.17.11/arch/alpha/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/alpha/kernel/module.c 2006-09-01 16:20:28.000000000 -0400 @@ -177,7 +177,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff -urNp linux-2.6.17.11/arch/alpha/kernel/osf_sys.c linux-2.6.17.11/arch/alpha/kernel/osf_sys.c --- linux-2.6.17.11/arch/alpha/kernel/osf_sys.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/alpha/kernel/osf_sys.c 2006-09-01 16:20:28.000000000 -0400 @@ -1278,6 +1278,10 @@ arch_get_unmapped_area(struct file *filp merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != (unsigned long) -ENOMEM) @@ -1285,8 +1289,8 @@ arch_get_unmapped_area(struct file *filp } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); + if (addr != (unsigned long) -ENOMEM) return addr; diff -urNp linux-2.6.17.11/arch/alpha/kernel/ptrace.c linux-2.6.17.11/arch/alpha/kernel/ptrace.c --- linux-2.6.17.11/arch/alpha/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/alpha/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -289,6 +290,9 @@ do_sys_ptrace(long request, long pid, lo goto out; } + if (gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -urNp linux-2.6.17.11/arch/alpha/mm/fault.c linux-2.6.17.11/arch/alpha/mm/fault.c --- linux-2.6.17.11/arch/alpha/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/alpha/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,124 @@ __load_new_mm_context(struct mm_struct * __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U)== 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -133,8 +252,29 @@ do_page_fault(unsigned long address, uns good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)rdusp()); + do_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -urNp linux-2.6.17.11/arch/arm/mm/mmap.c linux-2.6.17.11/arch/arm/mm/mmap.c --- linux-2.6.17.11/arch/arm/mm/mmap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/arm/mm/mmap.c 2006-09-01 16:20:28.000000000 -0400 @@ -62,6 +62,10 @@ arch_get_unmapped_area(struct file *filp if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -76,7 +80,7 @@ arch_get_unmapped_area(struct file *filp if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -93,8 +97,8 @@ full_search: * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } diff -urNp linux-2.6.17.11/arch/i386/boot/compressed/head.S linux-2.6.17.11/arch/i386/boot/compressed/head.S --- linux-2.6.17.11/arch/i386/boot/compressed/head.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/boot/compressed/head.S 2006-09-01 16:20:28.000000000 -0400 @@ -39,11 +39,13 @@ startup_32: movl %eax,%gs lss stack_start,%esp + movl 0x000000,%ecx xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled movl %eax,0x000000 # loop forever if it isn't cmpl %eax,0x100000 je 1b + movl %ecx,0x000000 /* * Initialize eflags. Some BIOS's leave bits like NT set. This would diff -urNp linux-2.6.17.11/arch/i386/Kconfig linux-2.6.17.11/arch/i386/Kconfig --- linux-2.6.17.11/arch/i386/Kconfig 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/Kconfig 2006-09-01 16:20:28.000000000 -0400 @@ -948,7 +948,7 @@ config PCI choice prompt "PCI access mode" depends on PCI && !X86_VISWS - default PCI_GOANY + default PCI_GODIRECT ---help--- On PCI systems, the BIOS can be used to detect the PCI devices and determine their configuration. However, some old PCI motherboards @@ -980,7 +980,7 @@ endchoice config PCI_BIOS bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + depends on !X86_VISWS && PCI && PCI_GOBIOS default y config PCI_DIRECT diff -urNp linux-2.6.17.11/arch/i386/Kconfig.cpu linux-2.6.17.11/arch/i386/Kconfig.cpu --- linux-2.6.17.11/arch/i386/Kconfig.cpu 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/Kconfig.cpu 2006-09-01 16:20:28.000000000 -0400 @@ -251,7 +251,7 @@ config X86_PPRO_FENCE config X86_F00F_BUG bool - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC default y config X86_WP_WORKS_OK @@ -281,7 +281,7 @@ config X86_CMPXCHG64 config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 default y config X86_GOOD_APIC diff -urNp linux-2.6.17.11/arch/i386/Kconfig.debug linux-2.6.17.11/arch/i386/Kconfig.debug --- linux-2.6.17.11/arch/i386/Kconfig.debug 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/Kconfig.debug 2006-09-01 16:20:28.000000000 -0400 @@ -53,7 +53,7 @@ config DEBUG_PAGEALLOC config DEBUG_RODATA bool "Write protect kernel read-only data structures" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && 0 help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const diff -urNp linux-2.6.17.11/arch/i386/kernel/acpi/sleep.c linux-2.6.17.11/arch/i386/kernel/acpi/sleep.c --- linux-2.6.17.11/arch/i386/kernel/acpi/sleep.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/acpi/sleep.c 2006-09-01 16:20:28.000000000 -0400 @@ -10,6 +10,7 @@ #include #include #include +#include /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; @@ -24,11 +25,22 @@ static void init_low_mapping(pgd_t * pgd { int pgd_ofs = 0; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD)); pgd_ofs++, pgd++; } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + flush_tlb_all(); } @@ -55,7 +67,18 @@ int acpi_save_state_mem(void) */ void acpi_restore_state_mem(void) { +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + zap_low_mappings(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } /** diff -urNp linux-2.6.17.11/arch/i386/kernel/alternative.c linux-2.6.17.11/arch/i386/kernel/alternative.c --- linux-2.6.17.11/arch/i386/kernel/alternative.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/alternative.c 2006-09-01 16:20:28.000000000 -0400 @@ -3,6 +3,7 @@ #include #include #include +#include #define DEBUG 0 #if DEBUG @@ -101,71 +102,128 @@ void apply_alternatives(struct alt_instr struct alt_instr *a; int diff, i, k; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { BUG_ON(a->replacementlen > a->instrlen); if (!boot_cpu_has(a->cpuid)) continue; - memcpy(a->instr, a->replacement, a->replacementlen); + memcpy(a->instr + __KERNEL_TEXT_OFFSET, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; /* Pad the rest with nops */ for (i = a->replacementlen; diff > 0; diff -= k, i += k) { k = diff; if (k > ASM_NOP_MAX) k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); + memcpy(a->instr + i + __KERNEL_TEXT_OFFSET, noptable[k], k); } } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { memcpy(a->replacement + a->replacementlen, - a->instr, + a->instr + __KERNEL_TEXT_OFFSET, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + for (a = start; a < end; a++) { - memcpy(a->instr, + memcpy(a->instr + __KERNEL_TEXT_OFFSET, a->replacement + a->replacementlen, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + pax_open_kernel(cr0); +#endif + + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = 0xf0; /* lock prefix */ + *ptr = 0xf0; /* lock prefix */ }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { unsigned char **noptable = find_nop_table(); - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = noptable[1][0]; + *ptr = noptable[1][0]; }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } struct smp_alt_module { diff -urNp linux-2.6.17.11/arch/i386/kernel/apic.c linux-2.6.17.11/arch/i386/kernel/apic.c --- linux-2.6.17.11/arch/i386/kernel/apic.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/apic.c 2006-09-01 16:20:28.000000000 -0400 @@ -1176,7 +1176,7 @@ inline void smp_local_timer_interrupt(st { profile_tick(CPU_PROFILING, regs); #ifdef CONFIG_SMP - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode(regs)); #endif /* diff -urNp linux-2.6.17.11/arch/i386/kernel/apm.c linux-2.6.17.11/arch/i386/kernel/apm.c --- linux-2.6.17.11/arch/i386/kernel/apm.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/apm.c 2006-09-01 16:20:28.000000000 -0400 @@ -235,7 +235,7 @@ #include "io_ports.h" extern unsigned long get_cmos_time(void); -extern void machine_real_restart(unsigned char *, int); +extern void machine_real_restart(const unsigned char *, unsigned int); #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -589,9 +589,18 @@ static u8 apm_bios_call(u32 func, u32 eb struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -603,6 +612,11 @@ static u8 apm_bios_call(u32 func, u32 eb APM_DO_RESTORE_SEGS; local_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); @@ -633,9 +647,18 @@ static u8 apm_bios_call_simple(u32 func, struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -647,6 +670,11 @@ static u8 apm_bios_call_simple(u32 func, APM_DO_RESTORE_SEGS; local_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); return error; @@ -910,7 +938,7 @@ recalc: static void apm_power_off(void) { - unsigned char po_bios_call[] = { + const unsigned char po_bios_call[] = { 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ 0x8e, 0xd0, /* movw ax,ss */ 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ diff -urNp linux-2.6.17.11/arch/i386/kernel/asm-offsets.c linux-2.6.17.11/arch/i386/kernel/asm-offsets.c --- linux-2.6.17.11/arch/i386/kernel/asm-offsets.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/asm-offsets.c 2006-09-01 16:20:28.000000000 -0400 @@ -68,5 +68,6 @@ void foo(void) sizeof(struct tss_struct)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(PTRS_PER_PTE_asm, PTRS_PER_PTE); DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); } diff -urNp linux-2.6.17.11/arch/i386/kernel/cpu/common.c linux-2.6.17.11/arch/i386/kernel/cpu/common.c --- linux-2.6.17.11/arch/i386/kernel/cpu/common.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/cpu/common.c 2006-09-01 16:20:28.000000000 -0400 @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -19,16 +18,18 @@ #include "cpu.h" -DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); - DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; + +#ifdef CONFIG_PAX_NOVSYSCALL +static int disable_x86_sep __cpuinitdata = 1; +#else static int disable_x86_sep __cpuinitdata; +#endif struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -590,11 +591,10 @@ void __init early_cpu_init(void) void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -612,29 +612,11 @@ void __cpuinit cpu_init(void) } /* - * This is a horrible hack to allocate the GDT. The problem - * is that cpu_init() is called really early for the boot CPU - * (and hence needs bootmem) but much later for the secondary - * CPUs, when bootmem will have gone away - */ - if (NODE_DATA(0)->bdata->node_bootmem_map) { - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); - /* alloc_bootmem_pages panics on failure, so no check */ - memset(gdt, 0, PAGE_SIZE); - } else { - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!gdt)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - for (;;) - local_irq_enable(); - } - } - - /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(gdt, cpu_gdt_table, GDT_SIZE); + if (cpu) + memcpy(gdt, cpu_gdt_table, GDT_SIZE); /* Set up GDT entry for 16bit stack */ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= @@ -642,10 +624,10 @@ void __cpuinit cpu_init(void) ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr->size = GDT_SIZE - 1; - cpu_gdt_descr->address = (unsigned long)gdt; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; + cpu_gdt_descr[cpu].address = (unsigned long)gdt; - load_gdt(cpu_gdt_descr); + load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); /* @@ -660,7 +642,7 @@ void __cpuinit cpu_init(void) load_esp0(t, thread); set_tss_desc(cpu,t); load_TR_desc(); - load_LDT(&init_mm.context); + _load_LDT(&init_mm.context); #ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ @@ -668,7 +650,7 @@ void __cpuinit cpu_init(void) #endif /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r"(0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); diff -urNp linux-2.6.17.11/arch/i386/kernel/crash.c linux-2.6.17.11/arch/i386/kernel/crash.c --- linux-2.6.17.11/arch/i386/kernel/crash.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/crash.c 2006-09-01 16:20:28.000000000 -0400 @@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_ return 1; local_irq_disable(); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff -urNp linux-2.6.17.11/arch/i386/kernel/doublefault.c linux-2.6.17.11/arch/i386/kernel/doublefault.c --- linux-2.6.17.11/arch/i386/kernel/doublefault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/doublefault.c 2006-09-01 16:20:28.000000000 -0400 @@ -11,7 +11,7 @@ #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) +#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) @@ -56,10 +56,10 @@ struct tss_struct doublefault_tss __cach .eip = (unsigned long) doublefault_fn, .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ .esp = STACK_START, - .es = __USER_DS, + .es = __KERNEL_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, - .ds = __USER_DS, + .ds = __KERNEL_DS, .__cr3 = __pa(swapper_pg_dir) }; diff -urNp linux-2.6.17.11/arch/i386/kernel/efi.c linux-2.6.17.11/arch/i386/kernel/efi.c --- linux-2.6.17.11/arch/i386/kernel/efi.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/efi.c 2006-09-01 16:20:28.000000000 -0400 @@ -64,82 +64,43 @@ extern void * boot_ioremap(unsigned long static unsigned long efi_rt_eflags; static DEFINE_SPINLOCK(efi_rt_lock); -static pgd_t efi_bak_pg_dir_pointer[2]; +static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS] __attribute__ ((aligned (4096))); -static void efi_call_phys_prelog(void) +static void __init efi_call_phys_prelog(void) { - unsigned long cr4; - unsigned long temp; - struct Xgt_desc_struct *cpu_gdt_descr; - spin_lock(&efi_rt_lock); local_irq_save(efi_rt_eflags); - cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } + clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); - cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); + load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); } -static void efi_call_phys_epilog(void) +static void __init efi_call_phys_epilog(void) { - unsigned long cr4; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = (unsigned long) __va(cpu_gdt_descr[0].address); + load_gdt(&cpu_gdt_descr[0]); - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } + clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); local_irq_restore(efi_rt_eflags); spin_unlock(&efi_rt_lock); } -static efi_status_t +static efi_status_t __init phys_efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, @@ -155,7 +116,7 @@ phys_efi_set_virtual_address_map(unsigne return status; } -static efi_status_t +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { efi_status_t status; diff -urNp linux-2.6.17.11/arch/i386/kernel/efi_stub.S linux-2.6.17.11/arch/i386/kernel/efi_stub.S --- linux-2.6.17.11/arch/i386/kernel/efi_stub.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/efi_stub.S 2006-09-01 16:20:28.000000000 -0400 @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -22,7 +23,7 @@ * service functions will comply with gcc calling convention, too. */ -.text +__INIT ENTRY(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been @@ -38,9 +39,7 @@ ENTRY(efi_call_phys) * The mapping of lower virtual memory has been created in prelog and * epilog. */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx + jmp 1f-__PAGE_OFFSET 1: /* @@ -49,14 +48,8 @@ ENTRY(efi_call_phys) * parameter 2, ..., param n. To make things easy, we save the return * address of efi_call_phys in a global variable. */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx + popl (saved_return_addr) + popl (efi_rt_function_ptr) /* * 3. Clear PG bit in %CR0. @@ -75,9 +68,8 @@ ENTRY(efi_call_phys) /* * 5. Call the physical function. */ - jmp *%ecx + call *(efi_rt_function_ptr-__PAGE_OFFSET) -2: /* * 6. After EFI runtime service returns, control will return to * following instruction. We'd better readjust stack pointer first. @@ -87,37 +79,29 @@ ENTRY(efi_call_phys) /* * 7. Restore PG bit */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: /* * 8. Now restore the virtual mode from flat mode by * adding EIP with PAGE_OFFSET. */ - movl $1f, %edx - jmp *%edx + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f+__PAGE_OFFSET 1: /* * 9. Balance the stack. And because EAX contain the return value, * we'd better not clobber it. */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx + pushl (efi_rt_function_ptr) /* - * 10. Push the saved return address onto the stack and return. + * 10. Return to the saved return address. */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret + jmpl *(saved_return_addr) .previous -.data +__INITDATA saved_return_addr: .long 0 efi_rt_function_ptr: diff -urNp linux-2.6.17.11/arch/i386/kernel/entry.S linux-2.6.17.11/arch/i386/kernel/entry.S --- linux-2.6.17.11/arch/i386/kernel/entry.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/entry.S 2006-09-01 16:20:28.000000000 -0400 @@ -82,7 +82,7 @@ VM_MASK = 0x00020000 #define resume_kernel restore_nocheck #endif -#define SAVE_ALL \ +#define __SAVE_ALL(_DS) \ cld; \ pushl %es; \ pushl %ds; \ @@ -93,10 +93,24 @@ VM_MASK = 0x00020000 pushl %edx; \ pushl %ecx; \ pushl %ebx; \ - movl $(__USER_DS), %edx; \ + movl $(_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; +#ifdef CONFIG_PAX_KERNEXEC +#define SAVE_ALL \ + __SAVE_ALL(__KERNEL_DS) \ + movl %cr0, %edx; \ + movl %edx, %esi; \ + orl $0x10000, %edx; \ + xorl %edx, %esi; \ + movl %edx, %cr0; +#elif defined(CONFIG_PAX_NOVSYSCALL) +#define SAVE_ALL __SAVE_ALL(__KERNEL_DS) +#else +#define SAVE_ALL __SAVE_ALL(__USER_DS) +#endif + #define RESTORE_INT_REGS \ popl %ebx; \ popl %ecx; \ @@ -146,7 +160,19 @@ ret_from_intr: movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al testl $(VM_MASK | 3), %eax + +#ifdef CONFIG_PAX_KERNEXEC + jnz resume_userspace + + movl %cr0, %edx + xorl %esi, %edx + movl %edx, %cr0 + jmp resume_kernel +#else jz resume_kernel +#endif + + ENTRY(resume_userspace) cli # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -190,9 +216,17 @@ sysenter_past_esp: * Load the potential sixth argument from user stack. * Careful about security. */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl $(__USER_DS) + pop %ds +1: movl %ds:(%ebp),%ebp +#else cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp +#endif + .section __ex_table,"a" .align 4 .long 1b,syscall_fault @@ -213,13 +247,33 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_PAX_RANDKSTACK + pushl %eax + call pax_randomize_kstack + popl %eax +#endif + /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx +1: mov DS(%esp), %ds +2: mov ES(%esp), %es xorl %ebp,%ebp sti sysexit +.section .fixup,"ax" +3: movl $0,DS(%esp) + jmp 1b +4: movl $0,ES(%esp) + jmp 2b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,3b + .long 2b,4b +.previous # system call handler stub ENTRY(system_call) @@ -247,6 +301,10 @@ syscall_exit: testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work +#ifdef CONFIG_PAX_RANDKSTACK + call pax_randomize_kstack +#endif + restore_all: movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: OLDSS(%esp) contains the wrong/random values if we @@ -402,7 +460,7 @@ syscall_badsys: * Build the entry stubs and pointer table with * some assembler magic. */ -.data +.section .rodata,"a",@progbits ENTRY(interrupt) .text @@ -412,7 +470,7 @@ ENTRY(irq_entries_start) ALIGN 1: pushl $vector-256 jmp common_interrupt -.data +.section .rodata,"a",@progbits .long 1b .text vector=vector+1 @@ -459,10 +517,19 @@ error_code: movl ORIG_EAX(%esp), %edx # get the error code movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) - movl $(__USER_DS), %ecx + movl $(__KERNEL_DS), %ecx movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer + +#ifdef CONFIG_PAX_KERNEXEC + movl %cr0, %ecx + movl %ecx, %esi + orl $0x10000, %ecx + xorl %ecx, %esi + movl %ecx, %cr0 +#endif + call *%edi jmp ret_from_exception @@ -558,6 +625,13 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + movl %cr0, %edx + xorl %esi, %edx + movl %edx, %cr0 +#endif + jmp restore_all nmi_stack_fixup: @@ -588,6 +662,13 @@ nmi_16bit_stack: FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + movl %cr0, %edx + xorl %esi, %edx + movl %edx, %cr0 +#endif + RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack 1: iret @@ -663,7 +744,6 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code -.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff -urNp linux-2.6.17.11/arch/i386/kernel/head.S linux-2.6.17.11/arch/i386/kernel/head.S --- linux-2.6.17.11/arch/i386/kernel/head.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/head.S 2006-09-01 16:20:31.000000000 -0400 @@ -46,6 +46,16 @@ */ #define INIT_MAP_BEYOND_END (128*1024) +#ifdef CONFIG_PAX_KERNEXEC +/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ +.fill 4096,1,0xcc +#endif + +/* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, @@ -67,6 +77,26 @@ ENTRY(startup_32) movl %eax,%fs movl %eax,%gs +#ifdef CONFIG_PAX_MEMORY_UDEREF + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_KERNEL_DS * 8 + 4) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_DEFAULT_USER_DS * 8 + 4) +#endif + +#ifdef CONFIG_PAX_KERNEXEC + movl $ __KERNEL_TEXT_OFFSET,%eax + movw %ax,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 2) + rorl $16,%eax + movb %al,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 4) + movb %ah,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 7) + + movb %al,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 4) + movb %ah,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 7) + rorl $16,%eax + movw %ax,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 2) +#endif + /* * Clear BSS first so that there are no surprises... * No need to cld as DF is already clear from cld above... @@ -114,24 +144,42 @@ ENTRY(startup_32) * Warning: don't use %esi or the stack in this code. However, %esp * can be used as a GPR if you really need it... */ -page_pde_offset = (__PAGE_OFFSET >> 20); - +#ifdef CONFIG_X86_PAE +page_pde_offset = ((__PAGE_OFFSET >> 21) * (4096 / PTRS_PER_PTE_asm)); +#else +page_pde_offset = ((__PAGE_OFFSET >> 22) * (4096 / PTRS_PER_PTE_asm)); +#endif movl $(pg0 - __PAGE_OFFSET), %edi +#ifdef CONFIG_X86_PAE + movl $(swapper_pm_dir - __PAGE_OFFSET), %edx +#else movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#endif + movl $0x063, %eax /* 0x063 = DIRTY+ACCESSED+PRESENT+RW */ 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal 0x063(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ +#ifdef CONFIG_X86_PAE + movl $0,4(%edx) + movl $0,page_pde_offset+4(%edx) + addl $8,%edx + movl $512, %ecx +#else addl $4,%edx movl $1024, %ecx +#endif 11: stosl +#ifdef CONFIG_X86_PAE + movl $0,(%edi) + addl $4,%edi +#endif addl $0x1000,%eax loop 11b /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* bytes beyond the end of our own page tables; the +0x063 is the attribute bits */ + leal (INIT_MAP_BEYOND_END+0x063)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,(init_pg_tables_end - __PAGE_OFFSET) @@ -154,6 +202,11 @@ ENTRY(startup_32_smp) movl %eax,%fs movl %eax,%gs + /* This is a secondary processor (AP) */ + xorl %ebx,%ebx + incl %ebx +#endif /* CONFIG_SMP */ + /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. @@ -169,26 +222,27 @@ ENTRY(startup_32_smp) * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET +3: movl cr4_bits,%edx andl %edx,%edx - jz 6f + jz 5f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) orl %edx,%eax movl %eax,%cr4 - btl $5, %eax # check if PAE is enabled - jnc 6f +#ifdef CONFIG_X86_PAE + movl %ebx,%edi /* Check if extended functions are implemented */ movl $0x80000000, %eax cpuid cmpl $0x80000000, %eax - jbe 6f + jbe 4f mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ btl $20, %edx - jnc 6f + jnc 4f /* Setup EFER (Extended Feature Enable Register) */ movl $0xc0000080, %ecx @@ -197,14 +251,12 @@ ENTRY(startup_32_smp) btsl $11, %eax /* Make changes effective */ wrmsr + btsl $63,__supported_pte_mask-__PAGE_OFFSET -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx - -3: -#endif /* CONFIG_SMP */ +4: + movl %edi,%ebx +#endif +5: /* * Enable paging @@ -229,9 +281,7 @@ ENTRY(startup_32_smp) #ifdef CONFIG_SMP andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype /* Initial CPU cleans BSS */ #endif /* CONFIG_SMP */ /* @@ -308,8 +358,6 @@ is386: movl $2,%ecx # set MP ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - - movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es @@ -383,63 +431,71 @@ rp_sidt: /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: - cld #ifdef CONFIG_PRINTK - pushl %eax - pushl %ecx - pushl %edx - pushl %es - pushl %ds + cld movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es - pushl 16(%esp) - pushl 24(%esp) - pushl 32(%esp) - pushl 40(%esp) + pushl 12(%esp) + pushl 12(%esp) + pushl 12(%esp) + pushl 12(%esp) pushl $int_msg #ifdef CONFIG_EARLY_PRINTK call early_printk #else call printk #endif - addl $(5*4),%esp - popl %ds - popl %es - popl %edx - popl %ecx - popl %eax #endif - iret +1: hlt + jmp 1b -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","w" +.section .swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) +#ifdef CONFIG_X86_PAE + .long swapper_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else .fill 1024,4,0 +#endif + +#ifdef CONFIG_X86_PAE +.section .swapper_pm_dir,"a",@progbits +ENTRY(swapper_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 +#endif + +.section .empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 /* - * This starts the data section. - */ -.data + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 + +.section .rodata,"a",@progbits +ready: .byte 0 ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-8 .long __BOOT_DS -ready: .byte 0 - int_msg: - .asciz "Unknown interrupt or fault at EIP %p %p %p\n" + .asciz "Unknown interrupt, stack: %p %p %p %p\n" /* * The IDT and GDT 'descriptors' are a strange 48-bit object @@ -465,10 +521,12 @@ idt_descr: # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -cpu_gdt_descr: +ENTRY(cpu_gdt_descr) .word GDT_ENTRIES*8-1 .long cpu_gdt_table + .fill NR_CPUS*8-6,1,0 # space for the other GDT descriptors + /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -476,13 +534,13 @@ cpu_gdt_descr: .align L1_CACHE_BYTES ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align L1_CACHE_BYTES + .align PAGE_SIZE_asm ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ @@ -497,10 +555,10 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0x53 reserved */ .quad 0x0000000000000000 /* 0x5b reserved */ - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ @@ -510,24 +568,30 @@ ENTRY(cpu_gdt_table) * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ - .quad 0x00409a000000ffff /* 0x90 32-bit code */ - .quad 0x00009a000000ffff /* 0x98 16-bit code */ - .quad 0x000092000000ffff /* 0xa0 16-bit data */ - .quad 0x0000920000000000 /* 0xa8 16-bit data */ - .quad 0x0000920000000000 /* 0xb0 16-bit data */ + .quad 0x00409b000000ffff /* 0x90 32-bit code */ + .quad 0x00009b000000ffff /* 0x98 16-bit code */ + .quad 0x000093000000ffff /* 0xa0 16-bit data */ + .quad 0x0000930000000000 /* 0xa8 16-bit data */ + .quad 0x0000930000000000 /* 0xb0 16-bit data */ /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ - .quad 0x00409a000000ffff /* 0xb8 APM CS code */ - .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x004092000000ffff /* 0xc8 APM DS data */ + .quad 0x00409b000000ffff /* 0xb8 APM CS code */ + .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004093000000ffff /* 0xc8 APM DS data */ - .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ + .quad 0x0000930000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 + +#ifdef CONFIG_SMP + .fill (NR_CPUS-1) * (PAGE_SIZE_asm / 8),8,0 /* other CPU's GDT */ +#endif diff -urNp linux-2.6.17.11/arch/i386/kernel/i386_ksyms.c linux-2.6.17.11/arch/i386/kernel/i386_ksyms.c --- linux-2.6.17.11/arch/i386/kernel/i386_ksyms.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/i386_ksyms.c 2006-09-01 16:20:28.000000000 -0400 @@ -3,12 +3,16 @@ #include #include +EXPORT_SYMBOL_GPL(cpu_gdt_table); + EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(csum_partial_copy_generic_to_user); +EXPORT_SYMBOL(csum_partial_copy_generic_from_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff -urNp linux-2.6.17.11/arch/i386/kernel/init_task.c linux-2.6.17.11/arch/i386/kernel/init_task.c --- linux-2.6.17.11/arch/i386/kernel/init_task.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/init_task.c 2006-09-01 16:20:28.000000000 -0400 @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; diff -urNp linux-2.6.17.11/arch/i386/kernel/ioport.c linux-2.6.17.11/arch/i386/kernel/ioport.c --- linux-2.6.17.11/arch/i386/kernel/ioport.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/ioport.c 2006-09-01 16:20:28.000000000 -0400 @@ -16,6 +16,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) @@ -64,9 +65,16 @@ asmlinkage long sys_ioperm(unsigned long if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; - +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -88,7 +96,7 @@ asmlinkage long sys_ioperm(unsigned long * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -142,8 +150,13 @@ asmlinkage long sys_iopl(unsigned long u return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } t->iopl = level << 12; regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; diff -urNp linux-2.6.17.11/arch/i386/kernel/irq.c linux-2.6.17.11/arch/i386/kernel/irq.c --- linux-2.6.17.11/arch/i386/kernel/irq.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/irq.c 2006-09-01 16:20:28.000000000 -0400 @@ -91,7 +91,7 @@ fastcall unsigned int do_IRQ(struct pt_r int arg1, arg2, ebx; /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; @@ -119,10 +119,10 @@ fastcall unsigned int do_IRQ(struct pt_r * gcc's 3.0 and earlier don't handle that correctly. */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing @@ -182,7 +182,7 @@ asmlinkage void do_softirq(void) irqctx->tinfo.previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; asm volatile( " xchgl %%ebx,%%esp \n" diff -urNp linux-2.6.17.11/arch/i386/kernel/ldt.c linux-2.6.17.11/arch/i386/kernel/ldt.c --- linux-2.6.17.11/arch/i386/kernel/ldt.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/ldt.c 2006-09-01 16:20:28.000000000 -0400 @@ -103,6 +103,19 @@ int init_new_context(struct task_struct retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (!mm->context.user_cs_limit) { + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + + } +#endif + return retval; } @@ -160,7 +173,7 @@ static int read_default_ldt(void __user { int err; unsigned long size; - void *address; + const void *address; err = 0; address = &default_ldt[0]; @@ -215,6 +228,13 @@ static int write_ldt(void __user * ptr, } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = LDT_entry_a(&ldt_info); entry_2 = LDT_entry_b(&ldt_info); if (oldmode) diff -urNp linux-2.6.17.11/arch/i386/kernel/module.c linux-2.6.17.11/arch/i386/kernel/module.c --- linux-2.6.17.11/arch/i386/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/module.c 2006-09-01 16:20:28.000000000 -0400 @@ -21,6 +21,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -32,9 +33,30 @@ void *module_alloc(unsigned long size) { if (size == 0) return NULL; + +#ifdef CONFIG_PAX_KERNEXEC + return vmalloc(size); +#else return vmalloc_exec(size); +#endif + } +#ifdef CONFIG_PAX_KERNEXEC +void *module_alloc_exec(unsigned long size) +{ + struct vm_struct *area; + + if (size == 0) + return NULL; + + area = __get_vm_area(size, 0, (unsigned long)&MODULES_VADDR, (unsigned long)&MODULES_END); + if (area) + return area->addr; + + return NULL; +} +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) @@ -44,6 +66,45 @@ void module_free(struct module *mod, voi table entries. */ } +#ifdef CONFIG_PAX_KERNEXEC +void module_free_exec(struct module *mod, void *module_region) +{ + struct vm_struct **p, *tmp; + + if (!module_region) + return; + + if ((PAGE_SIZE-1) & (unsigned long)module_region) { + printk(KERN_ERR "Trying to module_free_exec() bad address (%p)\n", module_region); + WARN_ON(1); + return; + } + + write_lock(&vmlist_lock); + for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) + if (tmp->addr == module_region) + break; + + if (tmp) { + unsigned long cr0; + + pax_open_kernel(cr0); + memset(tmp->addr, 0xCC, tmp->size); + pax_close_kernel(cr0); + + *p = tmp->next; + kfree(tmp); + } + write_unlock(&vmlist_lock); + + if (!tmp) { + printk(KERN_ERR "Trying to module_free_exec() nonexistent vm area (%p)\n", + module_region); + WARN_ON(1); + } +} +#endif + /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -62,14 +123,16 @@ int apply_relocate(Elf32_Shdr *sechdrs, unsigned int i; Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym; - uint32_t *location; + uint32_t *plocation, location; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; + plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + location = (uint32_t)plocation; + if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) + plocation = (void *)plocation + __KERNEL_TEXT_OFFSET; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr @@ -78,11 +141,11 @@ int apply_relocate(Elf32_Shdr *sechdrs, switch (ELF32_R_TYPE(rel[i].r_info)) { case R_386_32: /* We add the value into the location given */ - *location += sym->st_value; + *plocation += sym->st_value; break; case R_386_PC32: /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; + *plocation += sym->st_value - location; break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", diff -urNp linux-2.6.17.11/arch/i386/kernel/process.c linux-2.6.17.11/arch/i386/kernel/process.c --- linux-2.6.17.11/arch/i386/kernel/process.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/process.c 2006-09-01 16:20:28.000000000 -0400 @@ -69,7 +69,7 @@ EXPORT_SYMBOL(boot_option_idle_override) */ unsigned long thread_saved_pc(struct task_struct *tsk) { - return ((unsigned long *)tsk->thread.esp)[3]; + return tsk->thread.eip; } /* @@ -296,7 +296,7 @@ void show_regs(struct pt_regs * regs) 0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), system_utsname.release, @@ -345,8 +345,8 @@ int kernel_thread(int (*fn)(void *), voi regs.ebx = (unsigned long) fn; regs.edx = (unsigned long) arg; - regs.xds = __USER_DS; - regs.xes = __USER_DS; + regs.xds = __KERNEL_DS; + regs.xes = __KERNEL_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS; @@ -369,7 +369,7 @@ void exit_thread(void) /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; @@ -389,6 +389,9 @@ void flush_thread(void) { struct task_struct *tsk = current; + __asm__("mov %0,%%fs\n" + "mov %0,%%gs\n" + : : "r" (0) : "memory"); memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -421,7 +424,7 @@ int copy_thread(int nr, unsigned long cl struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -464,6 +467,11 @@ int copy_thread(int nr, unsigned long cl if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + goto out; +#endif + desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); @@ -628,7 +636,11 @@ struct task_struct fastcall * __switch_t struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -651,11 +663,23 @@ struct task_struct fastcall * __switch_t savesegment(fs, prev->fs); savesegment(gs, prev->gs); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(get_fs(), cpu); +#endif + /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + /* * Restore %fs and %gs if needed. * @@ -809,8 +833,18 @@ asmlinkage int sys_set_thread_area(struc struct desc_struct *desc; int cpu, idx; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + idx = info.entry_number; /* @@ -842,8 +876,17 @@ asmlinkage int sys_set_thread_area(struc desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + load_TLS(t, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); return 0; @@ -899,9 +942,27 @@ asmlinkage int sys_get_thread_area(struc return 0; } -unsigned long arch_align_stack(unsigned long sp) +#ifdef CONFIG_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) { - if (randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; + struct tss_struct *tss = init_tss + smp_processor_id(); + unsigned long time; + + if (!randomize_va_space) + return; + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x1EUL; + time <<= 2; +#else + time &= 0xFUL; + time <<= 3; +#endif + + tss->esp0 ^= time; + current->thread.esp0 = tss->esp0; } +#endif diff -urNp linux-2.6.17.11/arch/i386/kernel/ptrace.c linux-2.6.17.11/arch/i386/kernel/ptrace.c --- linux-2.6.17.11/arch/i386/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -342,6 +343,11 @@ ptrace_set_thread_area(struct task_struc if (copy_from_user(&info, user_desc, sizeof(info))) return -EFAULT; +#ifdef CONFIG_PAX_SEGMEXEC + if ((child->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; @@ -432,6 +438,17 @@ long arch_ptrace(struct task_struct *chi if(addr == (long) &dummy->u_debugreg[5]) break; if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; + +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif /* Sanity-check data. Take one half-byte at once with * check = (val >> (16 + 4*i)) & 0xf. It contains the @@ -645,7 +662,7 @@ void send_sigtrap(struct task_struct *ts info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff -urNp linux-2.6.17.11/arch/i386/kernel/reboot.c linux-2.6.17.11/arch/i386/kernel/reboot.c --- linux-2.6.17.11/arch/i386/kernel/reboot.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/reboot.c 2006-09-01 16:20:28.000000000 -0400 @@ -25,7 +25,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static int reboot_mode; +static unsigned short reboot_mode; static int reboot_thru_bios; #ifdef CONFIG_SMP @@ -138,18 +138,18 @@ core_initcall(reboot_init); doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; static struct { unsigned short size __attribute__ ((packed)); - unsigned long long * base __attribute__ ((packed)); + const unsigned long long * base __attribute__ ((packed)); } real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, real_mode_idt = { 0x3ff, NULL }, @@ -175,7 +175,7 @@ no_idt = { 0, NULL }; More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -189,7 +189,7 @@ static unsigned char real_mode_switch [] 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -199,10 +199,14 @@ static unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, unsigned int length) { unsigned long flags; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -223,8 +227,16 @@ void machine_real_restart(unsigned char from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif /* * Use `swapper_pg_dir' as our page directory. @@ -237,7 +249,7 @@ void machine_real_restart(unsigned char REBOOT.COM programs, and the previous reset routine did this too. */ - *((unsigned short *)0x472) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); /* For the switch to real mode, copy some code to low memory. It has to be in the first 64k because it is running in 16-bit mode, and it @@ -245,9 +257,9 @@ void machine_real_restart(unsigned char off paging. Copy it near the end of the first page, out of the way of BIOS variables. */ - memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), + flags = __copy_to_user ((void __user *) (0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); - memcpy ((void *) (0x1000 - 100), code, length); + flags = __copy_to_user ((void __user *) (0x1000 - 100), code, length); /* Set up the IDT for real mode. */ @@ -329,7 +341,7 @@ void machine_emergency_restart(void) __asm__ __volatile__("int3"); } /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); for (;;) { mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); diff -urNp linux-2.6.17.11/arch/i386/kernel/setup.c linux-2.6.17.11/arch/i386/kernel/setup.c --- linux-2.6.17.11/arch/i386/kernel/setup.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/setup.c 2006-09-01 16:20:28.000000000 -0400 @@ -88,7 +88,11 @@ struct cpuinfo_x86 new_cpu_data __initda struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +#ifdef CONFIG_X86_PAE +unsigned long mmu_cr4_features = X86_CR4_PAE; +#else unsigned long mmu_cr4_features; +#endif #ifdef CONFIG_ACPI int acpi_disabled = 0; @@ -1493,14 +1497,14 @@ void __init setup_arch(char **cmdline_p) if (!MOUNT_ROOT_RDONLY) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; + init_mm.start_code = (unsigned long) _text + __KERNEL_TEXT_OFFSET; + init_mm.end_code = (unsigned long) _etext + __KERNEL_TEXT_OFFSET; init_mm.end_data = (unsigned long) _edata; init_mm.brk = init_pg_tables_end + PAGE_OFFSET; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + code_resource.start = virt_to_phys(_text + __KERNEL_TEXT_OFFSET); + code_resource.end = virt_to_phys(_etext + __KERNEL_TEXT_OFFSET)-1; + data_resource.start = virt_to_phys(_data); data_resource.end = virt_to_phys(_edata)-1; parse_cmdline_early(cmdline_p); diff -urNp linux-2.6.17.11/arch/i386/kernel/signal.c linux-2.6.17.11/arch/i386/kernel/signal.c --- linux-2.6.17.11/arch/i386/kernel/signal.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/signal.c 2006-09-01 16:20:28.000000000 -0400 @@ -351,7 +351,17 @@ static int setup_frame(int sig, struct k goto give_sigsegv; } +#ifdef CONFIG_PAX_NOVSYSCALL + restorer = frame->retcode; +#else restorer = &__kernel_sigreturn; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + restorer -= SEGMEXEC_TASK_SIZE; +#endif +#endif + if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -447,7 +457,18 @@ static int setup_rt_frame(int sig, struc goto give_sigsegv; /* Set up to return from userspace. */ + +#ifdef CONFIG_PAX_NOVSYSCALL + restorer = frame->retcode; +#else restorer = &__kernel_rt_sigreturn; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + restorer -= SEGMEXEC_TASK_SIZE; +#endif +#endif + if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); @@ -580,7 +601,7 @@ static void fastcall do_signal(struct pt * before reaching here, so testing against kernel * CS suffices. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) return; if (test_thread_flag(TIF_RESTORE_SIGMASK)) diff -urNp linux-2.6.17.11/arch/i386/kernel/smpboot.c linux-2.6.17.11/arch/i386/kernel/smpboot.c --- linux-2.6.17.11/arch/i386/kernel/smpboot.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/smpboot.c 2006-09-01 16:20:28.000000000 -0400 @@ -1069,7 +1069,7 @@ static int __cpuinit __smp_prepare_cpu(i /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - KERNEL_PGD_PTRS); + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); flush_tlb_all(); schedule_work(&task); wait_for_completion(&done); diff -urNp linux-2.6.17.11/arch/i386/kernel/syscall_table.S linux-2.6.17.11/arch/i386/kernel/syscall_table.S --- linux-2.6.17.11/arch/i386/kernel/syscall_table.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/syscall_table.S 2006-09-01 16:20:28.000000000 -0400 @@ -1,3 +1,4 @@ +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff -urNp linux-2.6.17.11/arch/i386/kernel/sysenter.c linux-2.6.17.11/arch/i386/kernel/sysenter.c --- linux-2.6.17.11/arch/i386/kernel/sysenter.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/sysenter.c 2006-09-01 16:20:28.000000000 -0400 @@ -24,7 +24,7 @@ extern asmlinkage void sysenter_entry(vo void enable_sep_cpu(void) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); @@ -48,6 +48,7 @@ extern const char vsyscall_sysenter_star int __init sysenter_setup(void) { +#ifndef CONFIG_PAX_NOVSYSCALL void *page = (void *)get_zeroed_page(GFP_ATOMIC); __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); @@ -62,6 +63,7 @@ int __init sysenter_setup(void) memcpy(page, &vsyscall_sysenter_start, &vsyscall_sysenter_end - &vsyscall_sysenter_start); +#endif return 0; } diff -urNp linux-2.6.17.11/arch/i386/kernel/sys_i386.c linux-2.6.17.11/arch/i386/kernel/sys_i386.c --- linux-2.6.17.11/arch/i386/kernel/sys_i386.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/sys_i386.c 2006-09-01 16:20:28.000000000 -0400 @@ -100,6 +100,191 @@ out: return err; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { + start_addr = 0x00110000UL; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + start_addr += mm->delta_mmap & 0x03FFF000UL; +#endif + + if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) + start_addr = addr = mm->mmap_base; + else + addr = start_addr; + } +#endif + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (mm->start_brk <= addr && addr < mm->mmap_base) { + start_addr = addr = mm->mmap_base; + goto full_search; + } + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + /* requested length too big for entire address space */ + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) + goto bottomup; +#endif + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base-len; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len < vma->vm_start); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} struct sel_arg_struct { unsigned long n; diff -urNp linux-2.6.17.11/arch/i386/kernel/traps.c linux-2.6.17.11/arch/i386/kernel/traps.c --- linux-2.6.17.11/arch/i386/kernel/traps.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/traps.c 2006-09-01 16:20:31.000000000 -0400 @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -58,18 +59,13 @@ asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, +const struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern struct desc_struct idt_table[256]; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -144,18 +140,22 @@ static inline unsigned long print_contex { unsigned long addr; int printed = 0; /* nr of entries already printed on current line */ + int i = kstack_depth_to_print; #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printed = print_addr_and_symbol(addr, log_lvl, printed); ebp = *(unsigned long *)ebp; + --i; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; - if (__kernel_text_address(addr)) + if (__kernel_text_address(addr + __KERNEL_TEXT_OFFSET)) { printed = print_addr_and_symbol(addr, log_lvl, printed); + --i; + } } #endif if (printed) @@ -249,7 +249,7 @@ void show_registers(struct pt_regs *regs esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode_vm(regs)) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -276,13 +276,15 @@ void show_registers(struct pt_regs *regs */ if (in_kernel) { u8 __user *eip; + mm_segment_t old_fs = get_fs(); printk("\n" KERN_EMERG "Stack: "); show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); printk(KERN_EMERG "Code: "); - eip = (u8 __user *)regs->eip - 43; + set_fs(KERNEL_DS); + eip = (u8 __user *)regs->eip - 43 + __KERNEL_TEXT_OFFSET; for (i = 0; i < 64; i++, eip++) { unsigned char c; @@ -290,11 +292,12 @@ void show_registers(struct pt_regs *regs printk(" Bad EIP value."); break; } - if (eip == (u8 __user *)regs->eip) + if (eip == (u8 __user *)regs->eip + __KERNEL_TEXT_OFFSET) printk("<%02x> ", c); else printk("%02x ", c); } + set_fs(old_fs); } printk("\n"); } @@ -306,29 +309,34 @@ static void handle_BUG(struct pt_regs *r char *file; char c; unsigned long eip; + mm_segment_t old_fs = get_fs(); - eip = regs->eip; + eip = regs->eip + __KERNEL_TEXT_OFFSET; + set_fs(KERNEL_DS); if (eip < PAGE_OFFSET) goto no_bug; if (__get_user(ud2, (unsigned short __user *)eip)) goto no_bug; if (ud2 != 0x0b0f) goto no_bug; - if (__get_user(line, (unsigned short __user *)(eip + 2))) + if (__get_user(line, (unsigned short __user *)(eip + 7))) goto bug; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + if (__get_user(file, (char * __user *)(eip + 3)) || file < _text + __KERNEL_TEXT_OFFSET) + goto bug; + if (__get_user(c, file)) file = ""; printk(KERN_EMERG "------------[ cut here ]------------\n"); printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); no_bug: + set_fs(old_fs); return; /* Here we know it was a BUG but file-n-line is unavailable */ bug: + set_fs(old_fs); printk(KERN_EMERG "Kernel BUG\n"); } @@ -430,7 +438,7 @@ void die(const char * str, struct pt_reg static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!user_mode_vm(regs)) + if (!user_mode(regs)) die(str, regs, err); } @@ -448,7 +456,7 @@ static void __kprobes do_trap(int trapnr goto trap_signal; } - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto kernel_trap; trap_signal: { @@ -536,7 +544,7 @@ fastcall void __kprobes do_general_prote long error_code) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &init_tss[cpu]; struct thread_struct *thread = ¤t->thread; /* @@ -572,9 +580,25 @@ fastcall void __kprobes do_general_prote if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto gp_in_kernel; +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm && (current->mm->pax_flags & MF_PAX_PAGEEXEC)) { + struct mm_struct *mm = current->mm; + unsigned long limit; + + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, PROT_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } +#endif + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -590,6 +614,13 @@ gp_in_kernel: if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; + +#ifdef CONFIG_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } } @@ -663,7 +694,7 @@ void die_nmi (struct pt_regs *regs, cons /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -816,7 +847,7 @@ fastcall void __kprobes do_debug(struct * check for kernel mode by just checking the CPL * of CS. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto clear_TF_reenable; } @@ -1106,7 +1137,19 @@ do { \ */ void set_intr_gate(unsigned int n, void *addr) { + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } /* diff -urNp linux-2.6.17.11/arch/i386/kernel/vm86.c linux-2.6.17.11/arch/i386/kernel/vm86.c --- linux-2.6.17.11/arch/i386/kernel/vm86.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/vm86.c 2006-09-01 16:20:28.000000000 -0400 @@ -123,7 +123,7 @@ struct pt_regs * fastcall save_v86_state do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); @@ -297,7 +297,7 @@ static void do_sys_vm86(struct kernel_vm savesegment(fs, tsk->thread.saved_fs); savesegment(gs, tsk->thread.saved_gs); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff -urNp linux-2.6.17.11/arch/i386/kernel/vmlinux.lds.S linux-2.6.17.11/arch/i386/kernel/vmlinux.lds.S --- linux-2.6.17.11/arch/i386/kernel/vmlinux.lds.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/kernel/vmlinux.lds.S 2006-09-01 16:20:28.000000000 -0400 @@ -4,10 +4,19 @@ #define LOAD_OFFSET __PAGE_OFFSET +#include + #include #include #include #include +#include + +#ifdef CONFIG_X86_PAE +#define PMD_SHIFT 21 +#else +#define PMD_SHIFT 22 +#endif OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -16,59 +25,14 @@ jiffies = jiffies_64; SECTIONS { . = __KERNEL_START; - phys_startup_32 = startup_32 - LOAD_OFFSET; - /* read-only */ - _text = .; /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - *(.text) - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } - __stop___ex_table = .; - - RODATA + phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; - /* writeable */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - *(.data) - CONSTRUCTORS + .text.startup : AT(ADDR(.text.startup) - LOAD_OFFSET) { + BYTE(0xEA) /* jmp far */ + LONG(phys_startup_32) + SHORT(__BOOT_CS) } - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(4096); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.idt) - } - - . = ALIGN(32); - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } - _edata = .; /* End of data section */ - - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) - } - /* might get freed after init */ . = ALIGN(4096); __smp_alt_begin = .; @@ -92,11 +56,6 @@ SECTIONS /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - *(.init.text) - _einittext = .; - } .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } . = ALIGN(16); __setup_start = .; @@ -128,9 +87,7 @@ SECTIONS .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { *(.altinstr_replacement) } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } . = ALIGN(4096); __initramfs_start = .; @@ -140,10 +97,108 @@ SECTIONS __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; + + /* read-only */ + . = ALIGN(4096); - __init_end = .; + .init.text (. - __KERNEL_TEXT_OFFSET) : AT(ADDR(.init.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + /* .exit.text is discard at runtime, not link time, to deal with references + from .altinstructions and .eh_frame */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { *(.exit.text) } + +#ifdef CONFIG_PAX_KERNEXEC + .text.align : AT(ADDR(.text.align) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + . = ALIGN(__KERNEL_TEXT_OFFSET - LOAD_OFFSET) - 1; + BYTE(0) + } +#else + . = ALIGN(4096); +#endif + + __init_end = . + __KERNEL_TEXT_OFFSET; /* freed after init ends here */ - + + _text = .; /* Text and read-only data */ + .text : AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + *(.text) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + . += __KERNEL_TEXT_OFFSET; + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } + __stop___ex_table = .; + + . = ALIGN(4096); + .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { + *(.empty_zero_page) + +#ifdef CONFIG_X86_PAE + *(.swapper_pm_dir) +#endif + + *(.swapper_pg_dir) + *(.idt) + } + + RODATA + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(4096); + MODULES_VADDR = .; + + .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { + . += (4 * 1024 * 1024); + . = ALIGN(1 << PMD_SHIFT) - 1; + BYTE(0) + } + + MODULES_END = .; +#else + . = ALIGN(32); +#endif + + /* writeable */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + _data = .; + *(.data) + CONSTRUCTORS + } + + . = ALIGN(4096); + __nosave_begin = .; + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + . = ALIGN(4096); + __nosave_end = .; + + . = ALIGN(32); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } + + . = ALIGN(THREAD_SIZE); /* init_task */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } + + _edata = .; /* End of data section */ + + . = ALIGN(4096); __bss_start = .; /* BSS */ .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) { *(.bss.page_aligned) diff -urNp linux-2.6.17.11/arch/i386/lib/checksum.S linux-2.6.17.11/arch/i386/lib/checksum.S --- linux-2.6.17.11/arch/i386/lib/checksum.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/lib/checksum.S 2006-09-01 16:20:28.000000000 -0400 @@ -27,7 +27,8 @@ #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -281,12 +282,23 @@ unsigned int csum_partial_copy_generic ( .align 4 .globl csum_partial_copy_generic - +.globl csum_partial_copy_generic_to_user +.globl csum_partial_copy_generic_from_user + #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: subl $4,%esp pushl %edi @@ -305,7 +317,7 @@ csum_partial_copy_generic: jmp 4f SRC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +DST( movw %bx, %es:(%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -317,30 +329,30 @@ DST( movw %bx, (%edi) ) SRC(1: movl (%esi), %ebx ) SRC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +DST( movl %edx, %es:4(%edi) ) SRC( movl 8(%esi), %ebx ) SRC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +DST( movl %ebx, %es:8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +DST( movl %edx, %es:12(%edi) ) SRC( movl 16(%esi), %ebx ) SRC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +DST( movl %ebx, %es:16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +DST( movl %edx, %es:20(%edi) ) SRC( movl 24(%esi), %ebx ) SRC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +DST( movl %ebx, %es:24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +DST( movl %edx, %es:28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -354,7 +366,7 @@ DST( movl %edx, 28(%edi) ) shrl $2, %edx # This clears CF SRC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -366,12 +378,12 @@ DST( movl %ebx, (%edi) ) jb 5f SRC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +DST( movw %cx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +DST( movb %cl, %es:(%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: @@ -382,7 +394,7 @@ DST( movb %cl, (%edi) ) 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination - computing the rest # is too much work @@ -395,11 +407,15 @@ DST( movb %cl, (%edi) ) 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) + movl $-EFAULT,%ss:(%ebx) jmp 5000b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %ebx popl %esi popl %edi @@ -411,17 +427,28 @@ DST( movb %cl, (%edi) ) /* Version for PentiumII/PPro */ #define ROUND1(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ROUND(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ARGBASE 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: pushl %ebx pushl %edi @@ -440,7 +467,7 @@ csum_partial_copy_generic: subl %ebx, %edi lea -1(%esi),%edx andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx + lea 3f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx 1: addl $64,%esi @@ -461,19 +488,19 @@ csum_partial_copy_generic: jb 5f SRC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +DST( movw %dx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +DST( movb %dl, %es:(%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination (computing the rest is too much work) movl ARGBASE+8(%esp),%edi # dst movl ARGBASE+12(%esp),%ecx # len @@ -481,10 +508,14 @@ DST( movb %dl, (%edi) ) rep; stosb jmp 7b 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) jmp 7b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %esi popl %edi popl %ebx diff -urNp linux-2.6.17.11/arch/i386/lib/getuser.S linux-2.6.17.11/arch/i386/lib/getuser.S --- linux-2.6.17.11/arch/i386/lib/getuser.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/lib/getuser.S 2006-09-01 16:20:28.000000000 -0400 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -30,8 +31,12 @@ __get_user_1: GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 1: movzbl (%eax),%edx xorl %eax,%eax + pushl %ss + pop %ds ret .align 4 @@ -42,7 +47,11 @@ __get_user_2: GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 2: movzwl -1(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret @@ -54,11 +63,17 @@ __get_user_4: GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 3: movl -3(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret bad_get_user: + pushl %ss + pop %ds xorl %edx,%edx movl $-14,%eax ret diff -urNp linux-2.6.17.11/arch/i386/lib/mmx.c linux-2.6.17.11/arch/i386/lib/mmx.c --- linux-2.6.17.11/arch/i386/lib/mmx.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/lib/mmx.c 2006-09-01 16:20:28.000000000 -0400 @@ -48,14 +48,30 @@ void *_mmx_memcpy(void *to, const void * " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(; i>5; i--) @@ -79,14 +95,30 @@ void *_mmx_memcpy(void *to, const void * " movq %%mm2, 48(%1)\n" " movq %%mm3, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -179,14 +211,30 @@ static void fast_copy_page(void *to, voi " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(i=0; i<(4096-320)/64; i++) { @@ -209,14 +257,30 @@ static void fast_copy_page(void *to, voi " movq 56(%0), %%mm7\n" " movntq %%mm7, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -309,14 +373,30 @@ static void fast_copy_page(void *to, voi " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(i=0; i<4096/64; i++) { @@ -339,14 +419,30 @@ static void fast_copy_page(void *to, voi " movq %%mm2, 48(%1)\n" " movq %%mm3, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } diff -urNp linux-2.6.17.11/arch/i386/lib/putuser.S linux-2.6.17.11/arch/i386/lib/putuser.S --- linux-2.6.17.11/arch/i386/lib/putuser.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/lib/putuser.S 2006-09-01 16:20:28.000000000 -0400 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -33,7 +34,11 @@ __put_user_1: ENTER cmpl TI_addr_limit(%ebx),%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 1: movb %al,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -45,7 +50,11 @@ __put_user_2: subl $1,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 2: movw %ax,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -57,7 +66,11 @@ __put_user_4: subl $3,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 3: movl %eax,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -69,12 +82,18 @@ __put_user_8: subl $7,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 4: movl %eax,(%ecx) 5: movl %edx,4(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT bad_put_user: + pushl %ss + popl %ds movl $-14,%eax EXIT diff -urNp linux-2.6.17.11/arch/i386/lib/usercopy.c linux-2.6.17.11/arch/i386/lib/usercopy.c --- linux-2.6.17.11/arch/i386/lib/usercopy.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/lib/usercopy.c 2006-09-01 16:20:28.000000000 -0400 @@ -33,6 +33,7 @@ do { \ int __d0, __d1, __d2; \ might_sleep(); \ __asm__ __volatile__( \ + " movw %w10,%%ds\n" \ " testl %1,%1\n" \ " jz 2f\n" \ "0: lodsb\n" \ @@ -43,6 +44,8 @@ do { \ " jnz 0b\n" \ "1: subl %1,%0\n" \ "2:\n" \ + " pushl %%ss\n" \ + " popl %%ds\n" \ ".section .fixup,\"ax\"\n" \ "3: movl %5,%0\n" \ " jmp 2b\n" \ @@ -53,7 +56,8 @@ do { \ ".previous" \ : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst),\ + "r"(__USER_DS) \ : "memory"); \ } while (0) @@ -123,10 +127,13 @@ do { \ int __d0; \ might_sleep(); \ __asm__ __volatile__( \ + " movw %w6,%%es\n" \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ "2:\n" \ + " pushl %%ss\n" \ + " popl %%es\n" \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -137,7 +144,8 @@ do { \ " .long 1b,2b\n" \ ".previous" \ : "=&c"(size), "=&D" (__d0) \ - : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0), \ + "r"(__USER_DS)); \ } while (0) /** @@ -198,14 +206,17 @@ long strnlen_user(const char __user *s, might_sleep(); __asm__ __volatile__( + " movw %w8,%%es\n" " testl %0, %0\n" " jz 3f\n" - " andl %0,%%ecx\n" + " movl %0,%%ecx\n" "0: repne; scasb\n" " setne %%al\n" " subl %%ecx,%0\n" " addl %0,%%eax\n" "1:\n" + " pushl %%ss\n" + " popl %%es\n" ".section .fixup,\"ax\"\n" "2: xorl %%eax,%%eax\n" " jmp 1b\n" @@ -217,7 +228,7 @@ long strnlen_user(const char __user *s, " .long 0b,2b\n" ".previous" :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) + :"0" (n), "1" (s), "2" (0), "3" (mask), "r" (__USER_DS) :"cc"); return res & mask; } @@ -229,6 +240,7 @@ __copy_user_intel(void __user *to, const { int d0, d1; __asm__ __volatile__( + " movw %w6, %%es\n" " .align 2,0x90\n" "1: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -237,36 +249,36 @@ __copy_user_intel(void __user *to, const " .align 2,0x90\n" "3: movl 0(%4), %%eax\n" "4: movl 4(%4), %%edx\n" - "5: movl %%eax, 0(%3)\n" - "6: movl %%edx, 4(%3)\n" + "5: movl %%eax, %%es:0(%3)\n" + "6: movl %%edx, %%es:4(%3)\n" "7: movl 8(%4), %%eax\n" "8: movl 12(%4),%%edx\n" - "9: movl %%eax, 8(%3)\n" - "10: movl %%edx, 12(%3)\n" + "9: movl %%eax, %%es:8(%3)\n" + "10: movl %%edx, %%es:12(%3)\n" "11: movl 16(%4), %%eax\n" "12: movl 20(%4), %%edx\n" - "13: movl %%eax, 16(%3)\n" - "14: movl %%edx, 20(%3)\n" + "13: movl %%eax, %%es:16(%3)\n" + "14: movl %%edx, %%es:20(%3)\n" "15: movl 24(%4), %%eax\n" "16: movl 28(%4), %%edx\n" - "17: movl %%eax, 24(%3)\n" - "18: movl %%edx, 28(%3)\n" + "17: movl %%eax, %%es:24(%3)\n" + "18: movl %%edx, %%es:28(%3)\n" "19: movl 32(%4), %%eax\n" "20: movl 36(%4), %%edx\n" - "21: movl %%eax, 32(%3)\n" - "22: movl %%edx, 36(%3)\n" + "21: movl %%eax, %%es:32(%3)\n" + "22: movl %%edx, %%es:36(%3)\n" "23: movl 40(%4), %%eax\n" "24: movl 44(%4), %%edx\n" - "25: movl %%eax, 40(%3)\n" - "26: movl %%edx, 44(%3)\n" + "25: movl %%eax, %%es:40(%3)\n" + "26: movl %%edx, %%es:44(%3)\n" "27: movl 48(%4), %%eax\n" "28: movl 52(%4), %%edx\n" - "29: movl %%eax, 48(%3)\n" - "30: movl %%edx, 52(%3)\n" + "29: movl %%eax, %%es:48(%3)\n" + "30: movl %%edx, %%es:52(%3)\n" "31: movl 56(%4), %%eax\n" "32: movl 60(%4), %%edx\n" - "33: movl %%eax, 56(%3)\n" - "34: movl %%edx, 60(%3)\n" + "33: movl %%eax, %%es:56(%3)\n" + "34: movl %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -280,6 +292,8 @@ __copy_user_intel(void __user *to, const "36: movl %%eax, %0\n" "37: rep; movsb\n" "100:\n" + " pushl %%ss\n" + " popl %%es\n" ".section .fixup,\"ax\"\n" "101: lea 0(%%eax,%0,4),%0\n" " jmp 100b\n" @@ -326,7 +340,7 @@ __copy_user_intel(void __user *to, const " .long 99b,101b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -336,6 +350,7 @@ __copy_user_zeroing_intel(void *to, cons { int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -344,36 +359,36 @@ __copy_user_zeroing_intel(void *to, cons " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movl %%eax, 0(%3)\n" - " movl %%edx, 4(%3)\n" + " movl %%eax, %%es:0(%3)\n" + " movl %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movl %%eax, 8(%3)\n" - " movl %%edx, 12(%3)\n" + " movl %%eax, %%es:8(%3)\n" + " movl %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movl %%eax, 16(%3)\n" - " movl %%edx, 20(%3)\n" + " movl %%eax, %%es:16(%3)\n" + " movl %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movl %%eax, 24(%3)\n" - " movl %%edx, 28(%3)\n" + " movl %%eax, %%es:24(%3)\n" + " movl %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movl %%eax, 32(%3)\n" - " movl %%edx, 36(%3)\n" + " movl %%eax, %%es:32(%3)\n" + " movl %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movl %%eax, 40(%3)\n" - " movl %%edx, 44(%3)\n" + " movl %%eax, %%es:40(%3)\n" + " movl %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movl %%eax, 48(%3)\n" - " movl %%edx, 52(%3)\n" + " movl %%eax, %%es:48(%3)\n" + " movl %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movl %%eax, 56(%3)\n" - " movl %%edx, 60(%3)\n" + " movl %%eax, %%es:56(%3)\n" + " movl %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -387,6 +402,8 @@ __copy_user_zeroing_intel(void *to, cons " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: pushl %0\n" @@ -421,7 +438,7 @@ __copy_user_zeroing_intel(void *to, cons " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -441,6 +458,7 @@ __copy_user_intel(void __user *to, const do { \ int __d0, __d1, __d2; \ __asm__ __volatile__( \ + " movw %w8,%%es\n" \ " cmp $7,%0\n" \ " jbe 1f\n" \ " movl %1,%0\n" \ @@ -456,6 +474,8 @@ do { \ " movl %3,%0\n" \ "1: rep; movsb\n" \ "2:\n" \ + " pushl %%ss\n" \ + " popl %%es\n" \ ".section .fixup,\"ax\"\n" \ "5: addl %3,%0\n" \ " jmp 2b\n" \ @@ -469,7 +489,7 @@ do { \ " .long 1b,2b\n" \ ".previous" \ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS)\ : "memory"); \ } while (0) @@ -477,6 +497,7 @@ do { \ do { \ int __d0, __d1, __d2; \ __asm__ __volatile__( \ + " movw %w8,%%ds\n" \ " cmp $7,%0\n" \ " jbe 1f\n" \ " movl %1,%0\n" \ @@ -492,6 +513,8 @@ do { \ " movl %3,%0\n" \ "1: rep; movsb\n" \ "2:\n" \ + " pushl %%ss\n" \ + " popl %%ds\n" \ ".section .fixup,\"ax\"\n" \ "5: addl %3,%0\n" \ " jmp 6f\n" \ @@ -511,7 +534,7 @@ do { \ " .long 1b,6b\n" \ ".previous" \ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS)\ : "memory"); \ } while (0) @@ -640,3 +663,45 @@ copy_from_user(void *to, const void __us return n; } EXPORT_SYMBOL(copy_from_user); + +#ifdef CONFIG_PAX_MEMORY_UDEREF +void __set_fs(mm_segment_t x, int cpu) +{ + unsigned long limit = x.seg; + + current_thread_info()->addr_limit = x; + if (likely(limit)) { + limit -= 1UL; + limit >>= 12; + } + + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_DS].a = (limit & 0xFFFFUL); + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_DS].b = (limit & 0xF0000UL) | 0xC0F300UL; +} + +void set_fs(mm_segment_t x) +{ + int cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + + __set_fs(x, cpu); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + put_cpu_no_resched(); +} +#else +void set_fs(mm_segment_t x) +{ + current_thread_info()->addr_limit = x; +} +#endif + +EXPORT_SYMBOL(set_fs); diff -urNp linux-2.6.17.11/arch/i386/mach-voyager/voyager_smp.c linux-2.6.17.11/arch/i386/mach-voyager/voyager_smp.c --- linux-2.6.17.11/arch/i386/mach-voyager/voyager_smp.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mach-voyager/voyager_smp.c 2006-09-01 16:20:28.000000000 -0400 @@ -1295,7 +1295,7 @@ smp_local_timer_interrupt(struct pt_regs per_cpu(prof_counter, cpu); } - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode(regs)); } if( ((1< -#undef CONFIG_X86_PAE #include #include #include #include #include -/* - * I'm cheating here. It is known that the two boot PTE pages are - * allocated next to each other. I'm pretending that they're just - * one big array. - */ - -#define BOOT_PTE_PTRS (PTRS_PER_PTE*2) -#define boot_pte_index(address) \ - (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1)) - -static inline boot_pte_t* boot_vaddr_to_pte(void *address) -{ - boot_pte_t* boot_pg = (boot_pte_t*)pg0; - return &boot_pg[boot_pte_index((unsigned long)address)]; -} /* * This is only for a caller who is clever enough to page-align * phys_addr and virtual_source, and who also has a preference * about which virtual address from which to steal ptes */ -static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, - void* virtual_source) +static void __init __boot_ioremap(unsigned long phys_addr, unsigned int nrpages, + char* virtual_source) { - boot_pte_t* pte; - int i; - char *vaddr = virtual_source; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t* pte; + unsigned int i; + unsigned long vaddr = (unsigned long)virtual_source; + + pgd = pgd_offset_k(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); - pte = boot_vaddr_to_pte(virtual_source); for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) { set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL)); - __flush_tlb_one(&vaddr[i*PAGE_SIZE]); + __flush_tlb_one(&virtual_source[i*PAGE_SIZE]); } } diff -urNp linux-2.6.17.11/arch/i386/mm/extable.c linux-2.6.17.11/arch/i386/mm/extable.c --- linux-2.6.17.11/arch/i386/mm/extable.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mm/extable.c 2006-09-01 16:20:28.000000000 -0400 @@ -12,7 +12,7 @@ int fixup_exception(struct pt_regs *regs const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) + if (unlikely(!(regs->eflags & VM_MASK) && ((regs->xcs & 0xFFFCU) == PNP_CS32 || (regs->xcs & 0xFFFCU) == PNP_CS16))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; diff -urNp linux-2.6.17.11/arch/i386/mm/fault.c linux-2.6.17.11/arch/i386/mm/fault.c --- linux-2.6.17.11/arch/i386/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include @@ -82,11 +85,13 @@ static inline unsigned long get_segment_ /* Unlikely, but must come before segment checks. */ if (unlikely((regs->eflags & VM_MASK) != 0)) - return eip + (seg << 4); + return (eip & 0xFFFF) + (seg << 4); /* By far the most common cases. */ - if (likely(seg == __USER_CS || seg == __KERNEL_CS)) + if (likely(seg == __USER_CS)) return eip; + if (likely(seg == __KERNEL_CS)) + return eip + __KERNEL_TEXT_OFFSET; /* Check the segment exists, is within the current LDT/GDT size, that kernel/user (ring 0..3) has the appropriate privilege, @@ -214,6 +219,30 @@ static noinline void force_sig_info_faul fastcall void do_invalid_op(struct pt_regs *, unsigned long); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return NULL; + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return NULL; + return pmd; +} +#endif + static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) { unsigned index = pgd_index(address); @@ -295,13 +324,20 @@ fastcall void __kprobes do_page_fault(st struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; - unsigned long page; int write, si_code; +#ifdef CONFIG_PAX_PAGEEXEC + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + unsigned char pte_mask; +#endif + /* get the address */ address = read_cr2(); tsk = current; + mm = tsk->mm; si_code = SEGV_MAPERR; @@ -340,14 +376,12 @@ fastcall void __kprobes do_page_fault(st if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) local_irq_enable(); - mm = tsk->mm; - /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. */ if (in_atomic() || !mm) - goto bad_area_nosemaphore; + goto bad_area_nopax; /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -367,10 +401,101 @@ fastcall void __kprobes do_page_fault(st if (!down_read_trylock(&mm->mmap_sem)) { if ((error_code & 4) == 0 && !search_exception_tables(regs->eip)) - goto bad_area_nosemaphore; + goto bad_area_nopax; down_read(&mm->mmap_sem); } +#ifdef CONFIG_PAX_PAGEEXEC + if (unlikely((error_code & 5) != 5 || + (regs->eflags & X86_EFLAGS_VM) || + !(mm->pax_flags & MF_PAX_PAGEEXEC))) + goto not_pax_fault; + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & 2) && (regs->eip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } + + pmd = pax_get_pmd(mm, address); + if (unlikely(!pmd)) + goto not_pax_fault; + + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { + pte_unmap_unlock(pte, ptl); + goto not_pax_fault; + } + + if (unlikely((error_code & 2) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + pte_unmap_unlock(pte, ptl); + goto not_pax_fault; + } + +#ifdef CONFIG_SMP + if (likely(address > get_limit(regs->xcs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) +#else + if (likely(address > get_limit(regs->xcs))) +#endif + { + set_pte(pte, pte_mkread(*pte)); + __flush_tlb_one(address); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return; + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & 2) << (_PAGE_BIT_DIRTY-1)); + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "movw %w4,%%ds\n" + "orb %2,%%ss:(%1)\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + "invlpg (%0)\n" +#endif + "testb $0,(%0)\n" + "xorb %3,%%ss:(%1)\n" + "pushl %%ss\n" + "popl %%ds\n" + : + : "q" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER), "r" (__USER_DS) + : "memory", "cc"); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return; + +not_pax_fault: +#endif + vma = find_vma(mm, address); if (!vma) goto bad_area; @@ -456,6 +581,37 @@ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm && (error_code & 4) && !(regs->eflags & X86_EFLAGS_VM)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && !(error_code & 3) && (regs->eip == address)) { + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && !(error_code & 3) && (regs->eip + SEGMEXEC_TASK_SIZE == address)) { + + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + + } +#endif + +bad_area_nopax: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { /* @@ -523,6 +679,21 @@ no_context: if (address < PAGE_SIZE) printk(KERN_ALERT "BUG: unable to handle kernel NULL " "pointer dereference"); + +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_MODULES + else if (init_mm.start_code <= address && address < (unsigned long)MODULES_END) +#else + else if (init_mm.start_code <= address && address < init_mm.end_code) +#endif + if (tsk->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + NIPQUAD(tsk->signal->curr_ip), tsk->comm, tsk->pid, tsk->uid, tsk->euid); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + tsk->comm, tsk->pid, tsk->uid, tsk->euid); +#endif + else printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); @@ -530,24 +701,34 @@ no_context: printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); } - page = read_cr3(); - page = ((unsigned long *) __va(page))[address >> 22]; - if (oops_may_print()) - printk(KERN_ALERT "*pde = %08lx\n", page); - /* - * We must not directly access the pte in the highpte - * case, the page table might be allocated in highmem. - * And lets rather not kmap-atomic the pte, just in case - * it's allocated already. - */ + + if (oops_may_print()) { + unsigned long index = pgd_index(address); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = index + (pgd_t *)__va(read_cr3()); + printk(KERN_ALERT "*pgd = %*llx\n", sizeof(*pgd), (unsigned long long)pgd_val(*pgd)); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + printk(KERN_ALERT "*pmd = %*llx\n", sizeof(*pmd), (unsigned long long)pmd_val(*pmd)); + /* + * We must not directly access the pte in the highpte + * case, the page table might be allocated in highmem. + * And lets rather not kmap-atomic the pte, just in case + * it's allocated already. + */ #ifndef CONFIG_HIGHPTE - if ((page & 1) && oops_may_print()) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); - } + if (pmd_present(*pmd) && !pmd_large(*pmd)) { + pte = pte_offset_kernel(pmd, address); + printk(KERN_ALERT "*pte = %*llx\n", sizeof(*pte), (unsigned long long)pte_val(*pte)); + } #endif + } + } tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; @@ -624,3 +805,105 @@ void vmalloc_sync_all(void) } } #endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +/* + * PaX: decide what to do with offenders (regs->eip = fault address) + * + * returns 1 when task should be killed + * 2 when gcc trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUTRAMP + static const unsigned char trans[8] = {6, 1, 2, 0, 13, 5, 3, 4}; + int err; +#endif + + if (regs->eflags & X86_EFLAGS_VM) + return 1; + +#ifdef CONFIG_PAX_EMUTRAMP + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned char __user *)regs->eip); + err |= get_user(addr1, (unsigned long __user *)(regs->eip + 1)); + err |= get_user(mov2, (unsigned char __user *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long __user *)(regs->eip + 6)); + err |= get_user(jmp, (unsigned short __user *)(regs->eip + 10)); + + if (err) + break; + + if ((mov1 & 0xF8) == 0xB8 && + (mov2 & 0xF8) == 0xB8 && + (mov1 & 0x07) != (mov2 & 0x07) && + (jmp & 0xF8FF) == 0xE0FF && + (mov2 & 0x07) == ((jmp>>8) & 0x07)) + { + ((unsigned long *)regs)[trans[mov1 & 0x07]] = addr1; + ((unsigned long *)regs)[trans[mov2 & 0x07]] = addr2; + regs->eip = addr2; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned long addr1, addr2; + + err = get_user(mov, (unsigned char __user *)regs->eip); + err |= get_user(addr1, (unsigned long __user *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long __user *)(regs->eip + 6)); + + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9) + { + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); +#endif + + return 1; /* PaX in action */ +} +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_insns(void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __user *)pc+i)) + printk("?? "); + else + printk("%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-4: "); + for (i = -1; i < 20; i++) { + unsigned long c; + if (get_user(c, (unsigned long __user *)sp+i)) + printk("???????? "); + else + printk("%08lx ", c); + } + printk("\n"); +} +#endif diff -urNp linux-2.6.17.11/arch/i386/mm/hugetlbpage.c linux-2.6.17.11/arch/i386/mm/hugetlbpage.c --- linux-2.6.17.11/arch/i386/mm/hugetlbpage.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mm/hugetlbpage.c 2006-09-01 16:20:28.000000000 -0400 @@ -121,7 +121,12 @@ static unsigned long hugetlb_get_unmappe { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; + unsigned long start_addr, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif if (len > mm->cached_hole_size) { start_addr = mm->free_area_cache; @@ -135,7 +140,7 @@ full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { + if (task_size - len < addr) { /* * Start a new search - just in case we missed * some holes. @@ -163,9 +168,8 @@ static unsigned long hugetlb_get_unmappe { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; + unsigned long base = mm->mmap_base, addr; unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; /* don't allow allocations above current base */ if (mm->free_area_cache > base) @@ -175,7 +179,7 @@ static unsigned long hugetlb_get_unmappe largest_hole = 0; mm->free_area_cache = base; } -try_again: + /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; @@ -217,16 +221,6 @@ try_again: fail: /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() @@ -252,16 +246,23 @@ hugetlb_get_unmapped_area(struct file *f { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long task_size = TASK_SIZE; if (len & ~HPAGE_MASK) return -EINVAL; - if (len > TASK_SIZE) + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size || addr > task_size - len) return -ENOMEM; if (addr) { addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } diff -urNp linux-2.6.17.11/arch/i386/mm/init.c linux-2.6.17.11/arch/i386/mm/init.c --- linux-2.6.17.11/arch/i386/mm/init.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mm/init.c 2006-09-01 16:20:28.000000000 -0400 @@ -41,6 +41,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -50,30 +51,6 @@ unsigned long highstart_pfn, highend_pfn static int noinline do_test_wp_bit(void); /* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - if (pmd_table != pmd_offset(pud, 0)) - BUG(); -#else - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); -#endif - - return pmd_table; -} - -/* * Create a page table and place a pointer to it in a middle page * directory entry. */ @@ -81,7 +58,11 @@ static pte_t * __init one_page_table_ini { if (pmd_none(*pmd)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); +#else set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); +#endif if (page_table != pte_offset_kernel(pmd, 0)) BUG(); @@ -116,8 +97,6 @@ static void __init page_table_range_init pgd = pgd_base + pgd_idx; for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (pgd_none(*pgd)) - one_md_table_init(pgd); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { @@ -130,11 +109,22 @@ static void __init page_table_range_init } } -static inline int is_kernel_text(unsigned long addr) +static inline int is_kernel_text(unsigned long start, unsigned long end) { - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) - return 1; - return 0; + unsigned long etext; + +#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) + etext = (unsigned long)&MODULES_END - __KERNEL_TEXT_OFFSET; +#else + etext = (unsigned long)&_etext; +#endif + + if ((start > etext + __KERNEL_TEXT_OFFSET || + end <= (unsigned long)_stext + __KERNEL_TEXT_OFFSET) && + (start > (unsigned long)_einittext + __KERNEL_TEXT_OFFSET || + end <= (unsigned long)_sinittext + __KERNEL_TEXT_OFFSET)) + return 0; + return 1; } /* @@ -146,26 +136,24 @@ static void __init kernel_physical_mappi { unsigned long pfn; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; - int pgd_idx, pmd_idx, pte_ofs; + unsigned int pgd_idx, pmd_idx, pte_ofs; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; + for (; pgd_idx < PTRS_PER_PGD && pfn < max_low_pfn; pgd++, pgd_idx++) { + pud = pud_offset(pgd, 0); + pmd = pmd_offset(pud, 0); for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { - unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; + unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET; /* Map with big pages if possible, otherwise create normal page tables. */ if (cpu_has_pse) { - unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - - if (is_kernel_text(address) || is_kernel_text(address2)) + if (is_kernel_text(address, address + PMD_SIZE)) set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); else set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); @@ -174,7 +162,7 @@ static void __init kernel_physical_mappi pte = one_page_table_init(pmd); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { - if (is_kernel_text(address)) + if (is_kernel_text(address, address + PAGE_SIZE)) set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); else set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); @@ -341,13 +329,6 @@ static void __init pagetable_init (void) unsigned long vaddr; pgd_t *pgd_base = swapper_pg_dir; -#ifdef CONFIG_X86_PAE - int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); -#endif - /* Enable PSE if available */ if (cpu_has_pse) { set_in_cr4(X86_CR4_PSE); @@ -371,17 +352,6 @@ static void __init pagetable_init (void) page_table_range_init(vaddr, 0, pgd_base); permanent_kmaps_init(pgd_base); - -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); -#endif } #ifdef CONFIG_SUSPEND_SHARED @@ -423,7 +393,6 @@ void zap_low_mappings (void) flush_tlb_all(); } -static int disable_nx __initdata = 0; u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; /* @@ -437,11 +406,9 @@ u64 __supported_pte_mask __read_mostly = void __init noexec_setup(const char *str) { if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; + nx_enabled = 1; } else if (!strncmp(str,"off",3)) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; + nx_enabled = 0; } } @@ -450,17 +417,13 @@ int nx_enabled = 0; static void __init set_nx(void) { - unsigned int v[4], l, h; + if (!nx_enabled && cpu_has_nx) { + unsigned l, h; - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } + __supported_pte_mask &= ~_PAGE_NX; + rdmsr(MSR_EFER, l, h); + l &= ~EFER_NX; + wrmsr(MSR_EFER, l, h); } } @@ -512,14 +475,6 @@ void __init paging_init(void) load_cr3(swapper_pg_dir); -#ifdef CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); -#endif __flush_tlb_all(); kmap_init(); @@ -611,7 +566,7 @@ void __init mem_init(void) set_highmem_pages_init(bad_ppro); codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; + datasize = (unsigned long) &_edata - (unsigned long) &_data; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); @@ -628,10 +583,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#ifdef CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -761,6 +712,37 @@ void free_init_pages(char *what, unsigne void free_initmem(void) { + +#ifdef CONFIG_PAX_KERNEXEC + /* PaX: limit KERNEL_CS to actual size */ + unsigned long addr, limit; + int cpu; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + +#ifdef CONFIG_MODULES + limit = (unsigned long)&MODULES_END - __KERNEL_TEXT_OFFSET; +#else + limit = (unsigned long)&_etext; +#endif + limit = (limit - 1UL) >> PAGE_SHIFT; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS].a = (get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS].a & 0xFFFF0000UL) | (limit & 0x0FFFFUL); + get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS].b = (get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS].b & 0xFFF0FFFFUL) | (limit & 0xF0000UL); + } + + /* PaX: make KERNEL_CS read-only */ + for (addr = __KERNEL_TEXT_OFFSET; addr < (unsigned long)&_data; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } + flush_tlb_all(); +#endif + free_init_pages("unused kernel memory", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); diff -urNp linux-2.6.17.11/arch/i386/mm/mmap.c linux-2.6.17.11/arch/i386/mm/mmap.c --- linux-2.6.17.11/arch/i386/mm/mmap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mm/mmap.c 2006-09-01 16:20:28.000000000 -0400 @@ -34,12 +34,18 @@ * Leave an at least ~128 MB hole. */ #define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) +#define MAX_GAP (task_size/6*5) static inline unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; unsigned long random_factor = 0; + unsigned long task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif if (current->flags & PF_RANDOMIZE) random_factor = get_random_int() % (1024*1024); @@ -49,7 +55,7 @@ static inline unsigned long mmap_base(st else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - random_factor); + return PAGE_ALIGN(task_size - gap - random_factor); } /* @@ -66,10 +72,22 @@ void arch_pick_mmap_layout(struct mm_str (current->personality & ADDR_COMPAT_LAYOUT) || current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -urNp linux-2.6.17.11/arch/i386/mm/pageattr.c linux-2.6.17.11/arch/i386/mm/pageattr.c --- linux-2.6.17.11/arch/i386/mm/pageattr.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/mm/pageattr.c 2006-09-01 16:20:28.000000000 -0400 @@ -14,6 +14,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(cpa_lock); static struct list_head df_list = LIST_HEAD_INIT(df_list); @@ -84,7 +85,18 @@ static void set_pmd_pte(pte_t *kpte, uns struct page *page; unsigned long flags; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + set_pte_atomic(kpte, pte); /* change init_mm */ + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + if (PTRS_PER_PMD > 1) return; @@ -111,7 +123,7 @@ static inline void revert_page(struct pa pte_t *linear; ref_prot = - ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext + __KERNEL_TEXT_OFFSET) ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; linear = (pte_t *) @@ -143,7 +155,7 @@ __change_page_attr(struct page *page, pg struct page *split; ref_prot = - ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext + __KERNEL_TEXT_OFFSET) ? PAGE_KERNEL_EXEC : PAGE_KERNEL; split = split_large_page(address, prot, ref_prot); if (!split) diff -urNp linux-2.6.17.11/arch/i386/oprofile/backtrace.c linux-2.6.17.11/arch/i386/oprofile/backtrace.c --- linux-2.6.17.11/arch/i386/oprofile/backtrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/oprofile/backtrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -116,7 +116,7 @@ x86_backtrace(struct pt_regs * const reg head = (struct frame_head *)regs->ebp; #endif - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { while (depth-- && valid_kernel_stack(head, regs)) head = dump_kernel_backtrace(head); return; diff -urNp linux-2.6.17.11/arch/i386/power/cpu.c linux-2.6.17.11/arch/i386/power/cpu.c --- linux-2.6.17.11/arch/i386/power/cpu.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/i386/power/cpu.c 2006-09-01 16:20:28.000000000 -0400 @@ -62,7 +62,7 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ diff -urNp linux-2.6.17.11/arch/ia64/ia32/binfmt_elf32.c linux-2.6.17.11/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.17.11/arch/ia64/ia32/binfmt_elf32.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/ia32/binfmt_elf32.c 2006-09-01 16:20:28.000000000 -0400 @@ -46,6 +46,17 @@ randomize_stack_top(unsigned long stack_ #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) ((tsk)->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - IA32_PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - IA32_PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - IA32_PAGE_SHIFT) +#endif + /* Ugly but avoids duplication */ #include "../../../fs/binfmt_elf.c" diff -urNp linux-2.6.17.11/arch/ia64/ia32/ia32priv.h linux-2.6.17.11/arch/ia64/ia32/ia32priv.h --- linux-2.6.17.11/arch/ia64/ia32/ia32priv.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/ia32/ia32priv.h 2006-09-01 16:20:28.000000000 -0400 @@ -305,7 +305,14 @@ struct old_linux32_dirent { #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_386 -#define IA32_STACK_TOP IA32_PAGE_OFFSET +#ifdef CONFIG_PAX_RANDUSTACK +#define __IA32_DELTA_STACK (current->mm->delta_stack) +#else +#define __IA32_DELTA_STACK 0UL +#endif + +#define IA32_STACK_TOP (IA32_PAGE_OFFSET - __IA32_DELTA_STACK) + #define IA32_GATE_OFFSET IA32_PAGE_OFFSET #define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE diff -urNp linux-2.6.17.11/arch/ia64/kernel/module.c linux-2.6.17.11/arch/ia64/kernel/module.c --- linux-2.6.17.11/arch/ia64/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/kernel/module.c 2006-09-01 16:20:28.000000000 -0400 @@ -322,7 +322,7 @@ module_alloc (unsigned long size) void module_free (struct module *mod, void *module_region) { - if (mod->arch.init_unw_table && module_region == mod->module_init) { + if (mod->arch.init_unw_table && module_region == mod->module_init_rx) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } @@ -500,15 +500,39 @@ module_frob_arch_sections (Elf_Ehdr *ehd } static inline int +in_init_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx; +} + +static inline int +in_init_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw; +} + +static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return in_init_rx(mod, value) || in_init_rw(mod, value); +} + +static inline int +in_core_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx; +} + +static inline int +in_core_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return in_core_rx(mod, value) || in_core_rw(mod, value); } static inline int @@ -692,7 +716,14 @@ do_reloc (struct module *mod, uint8_t r_ break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + if (in_init_rx(mod, val)) + val -= (uint64_t) mod->module_init_rx; + else if (in_init_rw(mod, val)) + val -= (uint64_t) mod->module_init_rw; + else if (in_core_rx(mod, val)) + val -= (uint64_t) mod->module_core_rx; + else if (in_core_rw(mod, val)) + val -= (uint64_t) mod->module_core_rw; break; case RV_LTV: @@ -826,15 +857,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = (mod->core_size_rx + mod->core_size_rw) / 2; + gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); } diff -urNp linux-2.6.17.11/arch/ia64/kernel/ptrace.c linux-2.6.17.11/arch/ia64/kernel/ptrace.c --- linux-2.6.17.11/arch/ia64/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1451,6 +1452,9 @@ sys_ptrace (long request, pid_t pid, uns if (pid == 1) /* no messing around with init! */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urNp linux-2.6.17.11/arch/ia64/kernel/sys_ia64.c linux-2.6.17.11/arch/ia64/kernel/sys_ia64.c --- linux-2.6.17.11/arch/ia64/kernel/sys_ia64.c 2006-08-23 19:30:00.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/kernel/sys_ia64.c 2006-09-01 16:20:28.000000000 -0400 @@ -38,6 +38,13 @@ arch_get_unmapped_area (struct file *fil if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; #endif + +#ifdef CONFIG_PAX_RANDMMAP + if ((mm->pax_flags & MF_PAX_RANDMMAP) && addr && filp) + addr = mm->free_area_cache; + else +#endif + if (!addr) addr = mm->free_area_cache; @@ -56,9 +63,9 @@ arch_get_unmapped_area (struct file *fil for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { + if (start_addr != mm->mmap_base) { /* Start a new search --- just in case we missed some holes. */ - addr = TASK_UNMAPPED_BASE; + addr = mm->mmap_base; goto full_search; } return -ENOMEM; diff -urNp linux-2.6.17.11/arch/ia64/mm/fault.c linux-2.6.17.11/arch/ia64/mm/fault.c --- linux-2.6.17.11/arch/ia64/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,23 @@ mapped_kernel_page_is_present (unsigned return pte_present(pte); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + void __kprobes ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) { @@ -117,9 +135,23 @@ ia64_do_page_fault (unsigned long addres | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT) | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT)); - if ((vma->vm_flags & mask) != mask) + if ((vma->vm_flags & mask) != mask) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) + goto bad_area; + + up_read(&mm->mmap_sem); + pax_report_fault(regs, (void*)regs->cr_iip, (void*)regs->r12); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } + survive: /* * If for any reason at all we couldn't handle the fault, make diff -urNp linux-2.6.17.11/arch/ia64/mm/init.c linux-2.6.17.11/arch/ia64/mm/init.c --- linux-2.6.17.11/arch/ia64/mm/init.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ia64/mm/init.c 2006-09-01 16:20:28.000000000 -0400 @@ -20,8 +20,8 @@ #include #include #include +#include -#include #include #include #include diff -urNp linux-2.6.17.11/arch/mips/kernel/binfmt_elfn32.c linux-2.6.17.11/arch/mips/kernel/binfmt_elfn32.c --- linux-2.6.17.11/arch/mips/kernel/binfmt_elfn32.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/mips/kernel/binfmt_elfn32.c 2006-09-01 16:20:28.000000000 -0400 @@ -50,6 +50,17 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff -urNp linux-2.6.17.11/arch/mips/kernel/binfmt_elfo32.c linux-2.6.17.11/arch/mips/kernel/binfmt_elfo32.c --- linux-2.6.17.11/arch/mips/kernel/binfmt_elfo32.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/mips/kernel/binfmt_elfo32.c 2006-09-01 16:20:28.000000000 -0400 @@ -52,6 +52,17 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff -urNp linux-2.6.17.11/arch/mips/kernel/syscall.c linux-2.6.17.11/arch/mips/kernel/syscall.c --- linux-2.6.17.11/arch/mips/kernel/syscall.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/mips/kernel/syscall.c 2006-09-01 16:20:28.000000000 -0400 @@ -90,6 +90,11 @@ unsigned long arch_get_unmapped_area(str do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -100,7 +105,7 @@ unsigned long arch_get_unmapped_area(str (!vmm || addr + len <= vmm->vm_start)) return addr; } - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else diff -urNp linux-2.6.17.11/arch/mips/mm/fault.c linux-2.6.17.11/arch/mips/mm/fault.c --- linux-2.6.17.11/arch/mips/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/mips/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -27,6 +27,23 @@ #include #include /* For VMALLOC_END */ +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate diff -urNp linux-2.6.17.11/arch/parisc/kernel/module.c linux-2.6.17.11/arch/parisc/kernel/module.c --- linux-2.6.17.11/arch/parisc/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/parisc/kernel/module.c 2006-09-01 16:20:28.000000000 -0400 @@ -72,16 +72,38 @@ /* three functions to determine where in the module core * or init pieces the location is */ +static inline int is_init_rx(struct module *me, void *loc) +{ + return (loc >= me->module_init_rx && + loc < (me->module_init_rx + me->init_size_rx)); +} + +static inline int is_init_rw(struct module *me, void *loc) +{ + return (loc >= me->module_init_rw && + loc < (me->module_init_rw + me->init_size_rw)); +} + static inline int is_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return is_init_rx(me, loc) || is_init_rw(me, loc); +} + +static inline int is_core_rx(struct module *me, void *loc) +{ + return (loc >= me->module_core_rx && + loc < (me->module_core_rx + me->core_size_rx)); +} + +static inline int is_core_rw(struct module *me, void *loc) +{ + return (loc >= me->module_core_rw && + loc < (me->module_core_rw + me->core_size_rw)); } static inline int is_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return is_core_rx(me, loc) || is_core_rw(me, loc); } static inline int is_local(struct module *me, void *loc) @@ -289,21 +311,21 @@ int module_frob_arch_sections(CONST Elf_ } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); - - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); - - me->core_size = ALIGN(me->core_size, 16); - me->arch.stub_offset = me->core_size; - me->core_size += stubs * sizeof(struct stub_entry); - - me->init_size = ALIGN(me->init_size, 16); - me->arch.init_stub_offset = me->init_size; - me->init_size += init_stubs * sizeof(struct stub_entry); + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += gots * sizeof(struct got_entry); + + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.fdesc_offset = me->core_size_rw; + me->core_size_rw += fdescs * sizeof(Elf_Fdesc); + + me->core_size_rx = ALIGN(me->core_size_rx, 16); + me->arch.stub_offset = me->core_size_rx; + me->core_size_rx += stubs * sizeof(struct stub_entry); + + me->init_size_rx = ALIGN(me->init_size_rx, 16); + me->arch.init_stub_offset = me->init_size_rx; + me->init_size_rx += init_stubs * sizeof(struct stub_entry); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -323,7 +345,7 @@ static Elf64_Word get_got(struct module BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->module_core_rw + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -341,7 +363,7 @@ static Elf64_Word get_got(struct module #ifdef __LP64__ static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -359,7 +381,7 @@ static Elf_Addr get_fdesc(struct module /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* __LP64__ */ @@ -373,12 +395,12 @@ static Elf_Addr get_stub(struct module * if(init_section) { i = me->arch.init_stub_count++; BUG_ON(me->arch.init_stub_count > me->arch.init_stub_max); - stub = me->module_init + me->arch.init_stub_offset + + stub = me->module_init_rx + me->arch.init_stub_offset + i * sizeof(struct stub_entry); } else { i = me->arch.stub_count++; BUG_ON(me->arch.stub_count > me->arch.stub_max); - stub = me->module_core + me->arch.stub_offset + + stub = me->module_core_rx + me->arch.stub_offset + i * sizeof(struct stub_entry); } @@ -721,7 +743,7 @@ register_unwind_table(struct module *me, table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff -urNp linux-2.6.17.11/arch/parisc/kernel/ptrace.c linux-2.6.17.11/arch/parisc/kernel/ptrace.c --- linux-2.6.17.11/arch/parisc/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/parisc/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff -urNp linux-2.6.17.11/arch/parisc/kernel/sys_parisc.c linux-2.6.17.11/arch/parisc/kernel/sys_parisc.c --- linux-2.6.17.11/arch/parisc/kernel/sys_parisc.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/parisc/kernel/sys_parisc.c 2006-09-01 16:20:28.000000000 -0400 @@ -105,7 +105,7 @@ unsigned long arch_get_unmapped_area(str if (len > TASK_SIZE) return -ENOMEM; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (filp) { addr = get_shared_area(filp->f_mapping, addr, len, pgoff); diff -urNp linux-2.6.17.11/arch/parisc/kernel/traps.c linux-2.6.17.11/arch/parisc/kernel/traps.c --- linux-2.6.17.11/arch/parisc/kernel/traps.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/parisc/kernel/traps.c 2006-09-01 16:20:28.000000000 -0400 @@ -712,9 +712,7 @@ void handle_interruption(int code, struc down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm,regs->iaoq[0]); - if (vma && (regs->iaoq[0] >= vma->vm_start) - && (vma->vm_flags & VM_EXEC)) { - + if (vma && (regs->iaoq[0] >= vma->vm_start)) { fault_address = regs->iaoq[0]; fault_space = regs->iasq[0]; diff -urNp linux-2.6.17.11/arch/parisc/mm/fault.c linux-2.6.17.11/arch/parisc/mm/fault.c --- linux-2.6.17.11/arch/parisc/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/parisc/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -57,7 +59,7 @@ DEFINE_PER_CPU(struct exception_data, ex static unsigned long parisc_acctyp(unsigned long code, unsigned int inst) { - if (code == 6 || code == 16) + if (code == 6 || code == 7 || code == 16) return VM_EXEC; switch (inst & 0xf0000000) { @@ -143,6 +145,116 @@ parisc_acctyp(unsigned long code, unsign } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) + * + * returns 1 when task should be killed + * 2 when rt_sigreturn trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: unpatched PLT emulation */ + unsigned int bl, depwi; + + err = get_user(bl, (unsigned int*)instruction_pointer(regs)); + err |= get_user(depwi, (unsigned int*)(instruction_pointer(regs)+4)); + + if (err) + break; + + if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { + unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; + + err = get_user(ldw, (unsigned int*)addr); + err |= get_user(bv, (unsigned int*)(addr+4)); + err |= get_user(ldw2, (unsigned int*)(addr+8)); + + if (err) + break; + + if (ldw == 0x0E801096U && + bv == 0xEAC0C000U && + ldw2 == 0x0E881095U) + { + unsigned int resolver, map; + + err = get_user(resolver, (unsigned int*)(instruction_pointer(regs)+8)); + err |= get_user(map, (unsigned int*)(instruction_pointer(regs)+12)); + if (err) + break; + + regs->gr[20] = instruction_pointer(regs)+8; + regs->gr[21] = map; + regs->gr[22] = resolver; + regs->iaoq[0] = resolver | 3UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + return 3; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + +#ifndef CONFIG_PAX_EMUSIGRT + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: rt_sigreturn emulation */ + unsigned int ldi1, ldi2, bel, nop; + + err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); + err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); + err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); + + if (err) + break; + + if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && + ldi2 == 0x3414015AU && + bel == 0xE4008200U && + nop == 0x08000240U) + { + regs->gr[25] = (ldi1 & 2) >> 1; + regs->gr[20] = __NR_rt_sigreturn; + regs->gr[31] = regs->iaoq[1] + 16; + regs->sr[0] = regs->iasq[1]; + regs->iaoq[0] = 0x100UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + regs->iasq[0] = regs->sr[2]; + regs->iasq[1] = regs->sr[2]; + return 2; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { @@ -168,8 +280,33 @@ good_area: acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) + if ((vma->vm_flags & acc_type) != acc_type) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && + (address & ~3UL) == instruction_pointer(regs)) + { + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)instruction_pointer(regs), (void*)regs->gr[30]); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } /* * If for any reason at all we couldn't handle the fault, make diff -urNp linux-2.6.17.11/arch/powerpc/kernel/module_32.c linux-2.6.17.11/arch/powerpc/kernel/module_32.c --- linux-2.6.17.11/arch/powerpc/kernel/module_32.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/powerpc/kernel/module_32.c 2006-09-01 16:20:28.000000000 -0400 @@ -123,7 +123,7 @@ int module_frob_arch_sections(Elf32_Ehdr me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + printk("Module %s doesn't contain .plt or .init.plt sections.\n", me->name); return -ENOEXEC; } @@ -164,11 +164,16 @@ static uint32_t do_plt_call(void *locati DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) || + (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; - else + else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) || + (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw)) entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + else { + printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name); + return ~0UL; + } /* Find this entry, or if that fails, the next avail. entry */ while (entry->jump[0]) { diff -urNp linux-2.6.17.11/arch/powerpc/mm/fault.c linux-2.6.17.11/arch/powerpc/mm/fault.c --- linux-2.6.17.11/arch/powerpc/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/powerpc/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,38 @@ static void do_dabr(struct pt_regs *regs } #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. For 400-family processors @@ -335,6 +368,19 @@ bad_area: bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if (is_exec && (error_code & DSISR_PROTFAULT)) { + switch (pax_handle_fetch_fault(regs)) { + } + + pax_report_fault(regs, (void*)regs->nip, (void*)regs->gpr[1]); + do_exit(SIGKILL); + } + } +#endif + _exception(SIGSEGV, regs, code, address); return 0; } diff -urNp linux-2.6.17.11/arch/powerpc/mm/mmap.c linux-2.6.17.11/arch/powerpc/mm/mmap.c --- linux-2.6.17.11/arch/powerpc/mm/mmap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/powerpc/mm/mmap.c 2006-09-01 16:20:28.000000000 -0400 @@ -74,10 +74,22 @@ void arch_pick_mmap_layout(struct mm_str */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -urNp linux-2.6.17.11/arch/ppc/mm/fault.c linux-2.6.17.11/arch/ppc/mm/fault.c --- linux-2.6.17.11/arch/ppc/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/ppc/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -26,6 +26,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -49,6 +54,364 @@ unsigned long pte_misses; /* updated by unsigned long pte_errors; /* updated by do_page_fault() */ unsigned int probingmem; +#ifdef CONFIG_PAX_EMUSIGRT +void pax_syscall_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_syscall = 0UL; +} + +static struct page* pax_syscall_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x44000002U; /* sc */ + __flush_dcache_icache(kaddr); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_syscall_close, + .nopage = pax_syscall_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + * 2 when patched GOT trampoline was detected + * 3 when patched PLT trampoline was detected + * 4 when unpatched PLT trampoline was detected + * 5 when sigreturn trampoline was detected + * 7 when rt_sigreturn trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: patched GOT emulation */ + unsigned int blrl; + + err = get_user(blrl, (unsigned int*)regs->nip); + + if (!err && blrl == 0x4E800021U) { + unsigned long temp = regs->nip; + + regs->nip = regs->link & 0xFFFFFFFCUL; + regs->link = temp + 4UL; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #1 */ + unsigned int b; + + err = get_user(b, (unsigned int *)regs->nip); + + if (!err && (b & 0xFC000003U) == 0x48000000U) { + regs->nip += (((b | 0xFC000000UL) ^ 0x02000000UL) + 0x02000000UL); + return 3; + } + } while (0); + + do { /* PaX: unpatched PLT emulation #1 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(rlwinm, (unsigned int*)addr); + err |= get_user(add, (unsigned int*)(addr+4)); + err |= get_user(li2, (unsigned int*)(addr+8)); + err |= get_user(addis2, (unsigned int*)(addr+12)); + err |= get_user(mtctr, (unsigned int*)(addr+16)); + err |= get_user(li3, (unsigned int*)(addr+20)); + err |= get_user(addis3, (unsigned int*)(addr+24)); + err |= get_user(bctr, (unsigned int*)(addr+28)); + + if (err) + break; + + if (rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); + +#if 0 + do { /* PaX: unpatched PLT emulation #2 */ + unsigned int lis, lwzu, b, bctr; + + err = get_user(lis, (unsigned int *)regs->nip); + err |= get_user(lwzu, (unsigned int *)(regs->nip+4)); + err |= get_user(b, (unsigned int *)(regs->nip+8)); + err |= get_user(bctr, (unsigned int *)(regs->nip+12)); + + if (err) + break; + + if ((lis & 0xFFFF0000U) == 0x39600000U && + (lwzu & 0xU) == 0xU && + (b & 0xFC000003U) == 0x48000000U && + bctr == 0x4E800420U) + { + unsigned int addis, addi, rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 12 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(addi, (unsigned int*)(addr+4)); + err |= get_user(rlwinm, (unsigned int*)(addr+8)); + err |= get_user(add, (unsigned int*)(addr+12)); + err |= get_user(li2, (unsigned int*)(addr+16)); + err |= get_user(addis2, (unsigned int*)(addr+20)); + err |= get_user(mtctr, (unsigned int*)(addr+24)); + err |= get_user(li3, (unsigned int*)(addr+28)); + err |= get_user(addis3, (unsigned int*)(addr+32)); + err |= get_user(bctr, (unsigned int*)(addr+36)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (addi & 0xFFFF0000U) == 0x396B0000U && + rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); +#endif + + do { /* PaX: unpatched PLT emulation #3 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int addis, lwz, mtctr, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(lwz, (unsigned int*)(addr+4)); + err |= get_user(mtctr, (unsigned int*)(addr+8)); + err |= get_user(bctr, (unsigned int*)(addr+12)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (lwz & 0xFFFF0000U) == 0x816B0000U && + mtctr == 0x7D6903A6U && + bctr == 0x4E800420U) + { + unsigned int r11; + + addr = (addis << 16) + (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + addr += (((lwz | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + + err = get_user(r11, (unsigned int*)addr); + if (err) + break; + + regs->gpr[PT_R11] = r11; + regs->ctr = r11; + regs->nip = r11; + return 4; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + do { /* PaX: sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned long call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_syscall)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->gpr[PT_R0] = __NR_sigreturn; + regs->nip = call_syscall; + return 5; + } + } while (0); + + do { /* PaX: rt_sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_rt_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned int call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto rt_emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto rt_emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_syscall)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +rt_emulate: + regs->gpr[PT_R0] = __NR_rt_sigreturn; + regs->nip = call_syscall; + return 6; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. @@ -109,7 +472,7 @@ int do_page_fault(struct pt_regs *regs, * indicate errors in DSISR but can validly be set in SRR1. */ if (TRAP(regs) == 0x400) - error_code &= 0x48200000; + error_code &= 0x58200000; else is_write = error_code & 0x02000000; #endif /* CONFIG_4xx || CONFIG_BOOKE */ @@ -204,15 +567,14 @@ good_area: pte_t *ptep; pmd_t *pmdp; -#if 0 +#if 1 /* It would be nice to actually enforce the VM execute permission on CPUs which can do so, but far too much stuff in userspace doesn't get the permissions right, so we let any page be executed for now. */ if (! (vma->vm_flags & VM_EXEC)) goto bad_area; -#endif - +#else /* Since 4xx/Book-E supports per-page execute permission, * we lazily flush dcache to icache. */ ptep = NULL; @@ -235,6 +597,7 @@ good_area: pte_unmap_unlock(ptep, ptl); } #endif +#endif /* a read */ } else { /* protection fault */ @@ -280,6 +643,33 @@ bad_area: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if ((TRAP(regs) == 0x400) && (regs->nip == address)) { + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + case 4: + return 0; +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + case 5: + case 6: + return 0; +#endif + + } + + pax_report_fault(regs, (void*)regs->nip, (void*)regs->gpr[1]); + do_exit(SIGKILL); + } + } +#endif + _exception(SIGSEGV, regs, code, address); return 0; } diff -urNp linux-2.6.17.11/arch/s390/kernel/module.c linux-2.6.17.11/arch/s390/kernel/module.c --- linux-2.6.17.11/arch/s390/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/s390/kernel/module.c 2006-09-01 16:20:28.000000000 -0400 @@ -164,11 +164,11 @@ module_frob_arch_sections(Elf_Ehdr *hdr, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_size_rw = ALIGN(me->core_size_rw, 4); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += me->arch.got_size; + me->arch.plt_offset = me->core_size_rx; + me->core_size_rx += me->arch.plt_size; return 0; } @@ -254,7 +254,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->module_core_rw + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -278,7 +278,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + (val + (Elf_Addr) me->module_core_rw - loc) >> 1; else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) *(unsigned long *) loc = val; @@ -292,7 +292,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->module_core_rx + me->arch.plt_offset + info->plt_offset; #ifndef CONFIG_64BIT ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ @@ -314,7 +314,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base val = me->arch.plt_offset - me->arch.got_offset + info->plt_offset + rela->r_addend; else - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->module_core_rx + me->arch.plt_offset + info->plt_offset + rela->r_addend - loc; if (r_type == R_390_PLT16DBL) @@ -334,7 +334,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->module_core_rw + me->arch.got_offset); if (r_type == R_390_GOTOFF16) *(unsigned short *) loc = val; else if (r_type == R_390_GOTOFF32) @@ -344,7 +344,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->module_core_rw + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) *(unsigned int *) loc = val; diff -urNp linux-2.6.17.11/arch/sparc/kernel/ptrace.c linux-2.6.17.11/arch/sparc/kernel/ptrace.c --- linux-2.6.17.11/arch/sparc/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -305,6 +306,11 @@ asmlinkage void do_ptrace(struct pt_regs goto out_tsk; } + if (gr_handle_ptrace(child, request)) { + pt_error_return(regs, EPERM); + goto out_tsk; + } + if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { if (ptrace_attach(child)) { diff -urNp linux-2.6.17.11/arch/sparc/kernel/sys_sparc.c linux-2.6.17.11/arch/sparc/kernel/sys_sparc.c --- linux-2.6.17.11/arch/sparc/kernel/sys_sparc.c 2006-08-23 19:30:00.000000000 -0400 +++ linux-2.6.17.11/arch/sparc/kernel/sys_sparc.c 2006-09-01 16:20:28.000000000 -0400 @@ -57,7 +57,7 @@ unsigned long arch_get_unmapped_area(str if (ARCH_SUN4C_SUN4 && len > 0x20000000) return -ENOMEM; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (flags & MAP_SHARED) addr = COLOUR_ALIGN(addr); diff -urNp linux-2.6.17.11/arch/sparc/Makefile linux-2.6.17.11/arch/sparc/Makefile --- linux-2.6.17.11/arch/sparc/Makefile 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc/Makefile 2006-09-01 16:20:28.000000000 -0400 @@ -34,7 +34,7 @@ libs-y += arch/sparc/prom/ arch/sparc/li # Renaming is done to avoid confusing pattern matching rules in 2.5.45 (multy-) INIT_Y := $(patsubst %/, %/built-in.o, $(init-y)) CORE_Y := $(core-y) -CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ CORE_Y := $(patsubst %/, %/built-in.o, $(CORE_Y)) DRIVERS_Y := $(patsubst %/, %/built-in.o, $(drivers-y)) NET_Y := $(patsubst %/, %/built-in.o, $(net-y)) diff -urNp linux-2.6.17.11/arch/sparc/mm/fault.c linux-2.6.17.11/arch/sparc/mm/fault.c --- linux-2.6.17.11/arch/sparc/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -21,6 +21,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -217,6 +221,252 @@ static unsigned long compute_si_addr(str return safe_compute_effective_address(regs, insn); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_emuplt_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static struct page* pax_emuplt_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(page); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .nopage = pax_emuplt_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} + +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int*)regs->pc); + err |= get_user(sethi2, (unsigned int*)(regs->pc+4)); + err |= get_user(jmpl, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned int addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int*)regs->pc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned int addr; + + addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int*)regs->pc); + err |= get_user(jmpl, (unsigned int*)(regs->pc+4)); + err |= get_user(nop, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned int addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->pc); + err |= get_user(ba, (unsigned int*)(regs->pc+4)); + err |= get_user(nop, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned int addr, save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + else + addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + + err = get_user(save, (unsigned int*)addr); + err |= get_user(call, (unsigned int*)(addr+4)); + err |= get_user(nop, (unsigned int*)(addr+8)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->pc = call_dl_resolve; + regs->npc = addr+4; + return 3; + } + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int*)(regs->pc-4)); + err |= get_user(call, (unsigned int*)regs->pc); + err |= get_user(nop, (unsigned int*)(regs->pc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2); + + regs->u_regs[UREG_RETPC] = regs->pc; + regs->pc = dl_resolve; + regs->npc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long address) { @@ -280,6 +530,24 @@ good_area: if(!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) { + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)regs->u_regs[UREG_FP]); + do_exit(SIGKILL); + } +#endif + /* Allow reads even for write-only mappings */ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; diff -urNp linux-2.6.17.11/arch/sparc/mm/init.c linux-2.6.17.11/arch/sparc/mm/init.c --- linux-2.6.17.11/arch/sparc/mm/init.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc/mm/init.c 2006-09-01 16:20:28.000000000 -0400 @@ -333,17 +333,17 @@ void __init paging_init(void) /* Initialize the protection map with non-constant, MMU dependent values. */ protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; + protection_map[1] = PAGE_READONLY_NOEXEC; + protection_map[2] = PAGE_COPY_NOEXEC; + protection_map[3] = PAGE_COPY_NOEXEC; protection_map[4] = PAGE_READONLY; protection_map[5] = PAGE_READONLY; protection_map[6] = PAGE_COPY; protection_map[7] = PAGE_COPY; protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; + protection_map[9] = PAGE_READONLY_NOEXEC; + protection_map[10] = PAGE_SHARED_NOEXEC; + protection_map[11] = PAGE_SHARED_NOEXEC; protection_map[12] = PAGE_READONLY; protection_map[13] = PAGE_READONLY; protection_map[14] = PAGE_SHARED; diff -urNp linux-2.6.17.11/arch/sparc/mm/srmmu.c linux-2.6.17.11/arch/sparc/mm/srmmu.c --- linux-2.6.17.11/arch/sparc/mm/srmmu.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc/mm/srmmu.c 2006-09-01 16:20:28.000000000 -0400 @@ -2161,6 +2161,13 @@ void __init ld_mmu_srmmu(void) BTFIXUPSET_INT(page_shared, pgprot_val(SRMMU_PAGE_SHARED)); BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); + +#ifdef CONFIG_PAX_PAGEEXEC + BTFIXUPSET_INT(page_shared_noexec, pgprot_val(SRMMU_PAGE_SHARED_NOEXEC)); + BTFIXUPSET_INT(page_copy_noexec, pgprot_val(SRMMU_PAGE_COPY_NOEXEC)); + BTFIXUPSET_INT(page_readonly_noexec, pgprot_val(SRMMU_PAGE_RDONLY_NOEXEC)); +#endif + BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); diff -urNp linux-2.6.17.11/arch/sparc64/kernel/ptrace.c linux-2.6.17.11/arch/sparc64/kernel/ptrace.c --- linux-2.6.17.11/arch/sparc64/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc64/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -218,6 +219,11 @@ asmlinkage void do_ptrace(struct pt_regs goto out_tsk; } + if (gr_handle_ptrace(child, (long)request)) { + pt_error_return(regs, EPERM); + goto out_tsk; + } + if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { if (ptrace_attach(child)) { diff -urNp linux-2.6.17.11/arch/sparc64/kernel/sys_sparc.c linux-2.6.17.11/arch/sparc64/kernel/sys_sparc.c --- linux-2.6.17.11/arch/sparc64/kernel/sys_sparc.c 2006-08-23 19:30:00.000000000 -0400 +++ linux-2.6.17.11/arch/sparc64/kernel/sys_sparc.c 2006-09-01 16:20:28.000000000 -0400 @@ -141,6 +141,10 @@ unsigned long arch_get_unmapped_area(str if (filp || (flags & MAP_SHARED)) do_color_align = 1; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -154,9 +158,9 @@ unsigned long arch_get_unmapped_area(str } if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -176,8 +180,8 @@ full_search: vma = find_vma(mm, VA_EXCLUDE_END); } if (unlikely(task_size < addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } @@ -380,6 +384,12 @@ void arch_pick_mmap_layout(struct mm_str current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || sysctl_legacy_va_layout) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { @@ -394,6 +404,12 @@ void arch_pick_mmap_layout(struct mm_str gap = (task_size / 6 * 5); mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -urNp linux-2.6.17.11/arch/sparc64/mm/fault.c linux-2.6.17.11/arch/sparc64/mm/fault.c --- linux-2.6.17.11/arch/sparc64/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/sparc64/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -19,6 +19,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -253,6 +257,369 @@ cannot_handle: unhandled_fault (address, current, regs); } +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_EMUPLT +static void pax_emuplt_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static struct page* pax_emuplt_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(page); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .nopage = pax_emuplt_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +/* + * PaX: decide what to do with offenders (regs->tpc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int*)regs->tpc); + err |= get_user(sethi2, (unsigned int*)(regs->tpc+4)); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int*)regs->tpc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned long addr; + + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int*)regs->tpc); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+4)); + err |= get_user(nop, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #4 */ + unsigned int mov1, call, mov2; + + err = get_user(mov1, (unsigned int*)regs->tpc); + err |= get_user(call, (unsigned int*)(regs->tpc+4)); + err |= get_user(mov2, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if (mov1 == 0x8210000FU && + (call & 0xC0000000U) == 0x40000000U && + mov2 == 0x9E100001U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC]; + addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #5 */ + unsigned int sethi1, sethi2, or1, or2, sllx, jmpl, nop; + + err = get_user(sethi1, (unsigned int*)regs->tpc); + err |= get_user(sethi2, (unsigned int*)(regs->tpc+4)); + err |= get_user(or1, (unsigned int*)(regs->tpc+8)); + err |= get_user(or2, (unsigned int*)(regs->tpc+12)); + err |= get_user(sllx, (unsigned int*)(regs->tpc+16)); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+20)); + err |= get_user(nop, (unsigned int*)(regs->tpc+24)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + (or1 & 0xFFFFE000U) == 0x82106000U && + (or2 & 0xFFFFE000U) == 0x8A116000U && + sllx == 0x83287020 && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #6 */ + unsigned int sethi1, sethi2, sllx, or, jmpl, nop; + + err = get_user(sethi1, (unsigned int*)regs->tpc); + err |= get_user(sethi2, (unsigned int*)(regs->tpc+4)); + err |= get_user(sllx, (unsigned int*)(regs->tpc+8)); + err |= get_user(or, (unsigned int*)(regs->tpc+12)); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+16)); + err |= get_user(nop, (unsigned int*)(regs->tpc+20)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + sllx == 0x83287020 && + (or & 0xFFFFE000U) == 0x8A116000U && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #7 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->tpc); + err |= get_user(ba, (unsigned int*)(regs->tpc+4)); + err |= get_user(nop, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (ba & 0xFFF00000U) == 0x30600000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->tpc); + err |= get_user(ba, (unsigned int*)(regs->tpc+4)); + err |= get_user(nop, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned long addr; + unsigned int save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + else + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + err = get_user(save, (unsigned int*)addr); + err |= get_user(call, (unsigned int*)(addr+4)); + err |= get_user(nop, (unsigned int*)(addr+8)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->tpc = call_dl_resolve; + regs->tnpc = addr+4; + return 3; + } + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int*)(regs->tpc-4)); + err |= get_user(call, (unsigned int*)regs->tpc); + err |= get_user(nop, (unsigned int*)(regs->tpc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + + regs->u_regs[UREG_RETPC] = regs->tpc; + regs->tpc = dl_resolve; + regs->tnpc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -295,8 +662,10 @@ asmlinkage void __kprobes do_sparc64_fau goto intr_or_no_mm; if (test_thread_flag(TIF_32BIT)) { - if (!(regs->tstate & TSTATE_PRIV)) + if (!(regs->tstate & TSTATE_PRIV)) { regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } address &= 0xffffffff; } @@ -313,6 +682,29 @@ asmlinkage void __kprobes do_sparc64_fau if (!vma) goto bad_area; +#ifdef CONFIG_PAX_PAGEEXEC + /* PaX: detect ITLB misses on non-exec pages */ + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address && + !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB)) + { + if (address != regs->tpc) + goto good_area; + + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->tpc, (void*)(regs->u_regs[UREG_FP] + STACK_BIAS)); + do_exit(SIGKILL); + } +#endif + /* Pure DTLB misses do not tell us whether the fault causing * load/store/atomic was a write or not, it only says that there * was no match. So in such a case we (carefully) read the diff -urNp linux-2.6.17.11/arch/v850/kernel/module.c linux-2.6.17.11/arch/v850/kernel/module.c --- linux-2.6.17.11/arch/v850/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/v850/kernel/module.c 2006-09-01 16:20:28.000000000 -0400 @@ -150,8 +150,8 @@ static uint32_t do_plt_call (void *locat tramp[1] = ((val >> 16) & 0xffff) + 0x610000; /* ...; jmp r1 */ /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->module_core_rx + && location < mod->module_core_rx + mod->core_size_rx) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; diff -urNp linux-2.6.17.11/arch/x86_64/boot/compressed/head.S linux-2.6.17.11/arch/x86_64/boot/compressed/head.S --- linux-2.6.17.11/arch/x86_64/boot/compressed/head.S 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/boot/compressed/head.S 2006-09-01 16:20:28.000000000 -0400 @@ -41,11 +41,13 @@ startup_32: movl %eax,%gs lss stack_start,%esp + movl 0x000000,%ecx xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled movl %eax,0x000000 # loop forever if it isn't cmpl %eax,0x100000 je 1b + movl %ecx,0x000000 /* * Initialize eflags. Some BIOS's leave bits like NT set. This would diff -urNp linux-2.6.17.11/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.17.11/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.17.11/arch/x86_64/ia32/ia32_binfmt.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/ia32/ia32_binfmt.c 2006-09-01 16:20:28.000000000 -0400 @@ -186,6 +186,17 @@ struct elf_prpsinfo //#include #include +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) 0x08048000UL + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) 16 +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) 16 +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) 16 +#endif + typedef struct user_i387_ia32_struct elf_fpregset_t; typedef struct user32_fxsr_struct elf_fpxregset_t; diff -urNp linux-2.6.17.11/arch/x86_64/ia32/mmap32.c linux-2.6.17.11/arch/x86_64/ia32/mmap32.c --- linux-2.6.17.11/arch/x86_64/ia32/mmap32.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/ia32/mmap32.c 2006-09-01 16:20:28.000000000 -0400 @@ -68,10 +68,22 @@ void ia32_pick_mmap_layout(struct mm_str (current->personality & ADDR_COMPAT_LAYOUT) || current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -urNp linux-2.6.17.11/arch/x86_64/kernel/process.c linux-2.6.17.11/arch/x86_64/kernel/process.c --- linux-2.6.17.11/arch/x86_64/kernel/process.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/kernel/process.c 2006-09-01 16:20:28.000000000 -0400 @@ -831,9 +831,3 @@ int dump_task_regs(struct task_struct *t return 1; } -unsigned long arch_align_stack(unsigned long sp) -{ - if (randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} diff -urNp linux-2.6.17.11/arch/x86_64/kernel/ptrace.c linux-2.6.17.11/arch/x86_64/kernel/ptrace.c --- linux-2.6.17.11/arch/x86_64/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/kernel/ptrace.c 2006-09-01 16:20:28.000000000 -0400 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff -urNp linux-2.6.17.11/arch/x86_64/kernel/setup64.c linux-2.6.17.11/arch/x86_64/kernel/setup64.c --- linux-2.6.17.11/arch/x86_64/kernel/setup64.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/kernel/setup64.c 2006-09-01 16:20:28.000000000 -0400 @@ -38,7 +38,6 @@ struct desc_ptr idt_descr = { 256 * 16 - char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); unsigned long __supported_pte_mask __read_mostly = ~0UL; -static int do_not_nx __cpuinitdata = 0; /* noexec=on|off Control non executable mappings for 64bit processes. @@ -50,16 +49,14 @@ int __init nonx_setup(char *str) { if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 1; } __setup("noexec=", nonx_setup); /* parsed early actually */ -int force_personality32 = 0; +int force_personality32; /* noexec32=on|off Control non executable heap for 32bit processes. @@ -173,7 +170,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) { + if (!(efer & EFER_NX)) { __supported_pte_mask &= ~_PAGE_NX; } } diff -urNp linux-2.6.17.11/arch/x86_64/kernel/sys_x86_64.c linux-2.6.17.11/arch/x86_64/kernel/sys_x86_64.c --- linux-2.6.17.11/arch/x86_64/kernel/sys_x86_64.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/kernel/sys_x86_64.c 2006-09-01 16:20:28.000000000 -0400 @@ -66,8 +66,8 @@ out: return error; } -static void find_start_end(unsigned long flags, unsigned long *begin, - unsigned long *end) +static void find_start_end(struct mm_struct *mm, unsigned long flags, + unsigned long *begin, unsigned long *end) { if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { /* This is usually used needed to map code in small @@ -80,7 +80,7 @@ static void find_start_end(unsigned long *begin = 0x40000000; *end = 0x80000000; } else { - *begin = TASK_UNMAPPED_BASE; + *begin = mm->mmap_base; *end = TASK_SIZE; } } @@ -94,11 +94,15 @@ arch_get_unmapped_area(struct file *filp unsigned long start_addr; unsigned long begin, end; - find_start_end(flags, &begin, &end); + find_start_end(mm, flags, &begin, &end); if (len > end) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); diff -urNp linux-2.6.17.11/arch/x86_64/mm/fault.c linux-2.6.17.11/arch/x86_64/mm/fault.c --- linux-2.6.17.11/arch/x86_64/mm/fault.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/mm/fault.c 2006-09-01 16:20:28.000000000 -0400 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -294,6 +295,33 @@ static int vmalloc_fault(unsigned long a return 0; } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __user *)pc+i)) + printk("?? "); + else + printk("%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-8: "); + for (i = -1; i < 10; i++) { + unsigned long c; + if (get_user(c, (unsigned long __user *)sp+i)) + printk("???????????????? "); + else + printk("%016lx ", c); + } + printk("\n"); +} +#endif + int page_fault_trace = 0; int exception_trace = 1; @@ -423,6 +451,8 @@ asmlinkage void __kprobes do_page_fault( good_area: info.si_code = SEGV_ACCERR; write = 0; + if ((error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC)) + goto bad_area; switch (error_code & (PF_PROT|PF_WRITE)) { default: /* 3: write, present */ /* fall through */ @@ -489,7 +519,14 @@ bad_area_nosemaphore: tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); } - + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm && (mm->pax_flags & MF_PAX_PAGEEXEC) && (error_code & 16)) { + pax_report_fault(regs, (void*)regs->rip, (void*)regs->rsp); + do_exit(SIGKILL); + } +#endif + tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); diff -urNp linux-2.6.17.11/arch/x86_64/mm/mmap.c linux-2.6.17.11/arch/x86_64/mm/mmap.c --- linux-2.6.17.11/arch/x86_64/mm/mmap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/arch/x86_64/mm/mmap.c 2006-09-01 16:20:28.000000000 -0400 @@ -24,6 +24,12 @@ void arch_pick_mmap_layout(struct mm_str unsigned rnd = get_random_int() & 0xfffffff; mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT; } + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } diff -urNp linux-2.6.17.11/Documentation/dontdiff linux-2.6.17.11/Documentation/dontdiff --- linux-2.6.17.11/Documentation/dontdiff 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/Documentation/dontdiff 2006-09-01 16:20:28.000000000 -0400 @@ -55,7 +55,7 @@ aic7*seq.h* aicasm aicdb.h* asm -asm-offsets.* +asm-offsets.h asm_offsets.* autoconf.h* bbootsect diff -urNp linux-2.6.17.11/drivers/char/agp/frontend.c linux-2.6.17.11/drivers/char/agp/frontend.c --- linux-2.6.17.11/drivers/char/agp/frontend.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/char/agp/frontend.c 2006-09-01 16:20:28.000000000 -0400 @@ -841,7 +841,7 @@ static int agpioc_reserve_wrap(struct ag if (copy_from_user(&reserve, arg, sizeof(struct agp_region))) return -EFAULT; - if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment)) + if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment_priv)) return -EFAULT; client = agp_find_client_by_pid(reserve.pid); diff -urNp linux-2.6.17.11/drivers/char/keyboard.c linux-2.6.17.11/drivers/char/keyboard.c --- linux-2.6.17.11/drivers/char/keyboard.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/char/keyboard.c 2006-09-01 16:20:28.000000000 -0400 @@ -618,6 +618,16 @@ static void k_spec(struct vc_data *vc, u kbd->kbdmode == VC_MEDIUMRAW) && value != KVAL(K_SAK)) return; /* SAK is allowed even in raw mode */ + +#if defined(CONFIG_GRKERNSEC_PROC) || defined(CONFIG_GRKERNSEC_PROC_MEMMAP) + { + void *func = fn_handler[value]; + if (func == fn_show_state || func == fn_show_ptregs || + func == fn_show_mem) + return; + } +#endif + fn_handler[value](vc, regs); } diff -urNp linux-2.6.17.11/drivers/char/mem.c linux-2.6.17.11/drivers/char/mem.c --- linux-2.6.17.11/drivers/char/mem.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/char/mem.c 2006-09-01 16:20:28.000000000 -0400 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,10 @@ # include #endif +#ifdef CONFIG_GRKERNSEC +extern struct file_operations grsec_fops; +#endif + /* * Architectures vary in how they handle caching for addresses * outside of main memory. @@ -175,6 +180,11 @@ static ssize_t write_mem(struct file * f if (!valid_phys_addr_range(p, count)) return -EFAULT; +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_mem_write(); + return -EPERM; +#endif + written = 0; #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED @@ -251,6 +261,11 @@ static int mmap_mem(struct file * file, size, vma->vm_page_prot); +#ifdef CONFIG_GRKERNSEC_KMEM + if (gr_handle_mem_mmap(vma->vm_pgoff << PAGE_SHIFT, vma)) + return -EPERM; +#endif + /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ if (remap_pfn_range(vma, vma->vm_start, @@ -478,6 +493,11 @@ static ssize_t write_kmem(struct file * ssize_t written; char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_kmem_write(); + return -EPERM; +#endif + if (p < (unsigned long) high_memory) { wrote = count; @@ -618,7 +638,23 @@ static inline size_t read_zero_pagealign count = size; zap_page_range(vma, addr, count, NULL); - zeromap_page_range(vma, addr, count, PAGE_COPY); + zeromap_page_range(vma, addr, count, vma->vm_page_prot); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) { + unsigned long addr_m; + struct vm_area_struct * vma_m; + + addr_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma(mm, addr_m); + if (vma_m && vma_m->vm_start == addr_m && (vma_m->vm_flags & VM_MIRROR)) { + addr_m = addr + vma->vm_mirror; + zap_page_range(vma_m, addr_m, count, NULL); + } else + printk(KERN_ERR "PAX: VMMIRROR: read_zero bug, %08lx, %08lx\n", + addr, vma->vm_start); + } +#endif size -= count; buf += count; @@ -767,6 +803,16 @@ static loff_t memory_lseek(struct file * static int open_port(struct inode * inode, struct file * filp) { +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_open_port(); + return -EPERM; +#endif + + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static int open_mem(struct inode * inode, struct file * filp) +{ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } @@ -774,7 +820,6 @@ static int open_port(struct inode * inod #define full_lseek null_lseek #define write_zero write_null #define read_full read_zero -#define open_mem open_port #define open_kmem open_mem #define open_oldmem open_mem @@ -897,6 +942,11 @@ static int memory_open(struct inode * in filp->f_op = &oldmem_fops; break; #endif +#ifdef CONFIG_GRKERNSEC + case 13: + filp->f_op = &grsec_fops; + break; +#endif default: return -ENXIO; } @@ -929,6 +979,9 @@ static const struct { #ifdef CONFIG_CRASH_DUMP {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops}, #endif +#ifdef CONFIG_GRKERNSEC + {13,"grsec", S_IRUSR | S_IWUGO, &grsec_fops}, +#endif }; static struct class *mem_class; diff -urNp linux-2.6.17.11/drivers/char/random.c linux-2.6.17.11/drivers/char/random.c --- linux-2.6.17.11/drivers/char/random.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/char/random.c 2006-09-01 16:20:28.000000000 -0400 @@ -249,8 +249,13 @@ /* * Configuration information */ +#ifdef CONFIG_GRKERNSEC_RANDNET +#define INPUT_POOL_WORDS 512 +#define OUTPUT_POOL_WORDS 128 +#else #define INPUT_POOL_WORDS 128 #define OUTPUT_POOL_WORDS 32 +#endif #define SEC_XFER_SIZE 512 /* @@ -287,10 +292,17 @@ static struct poolinfo { int poolwords; int tap1, tap2, tap3, tap4, tap5; } poolinfo_table[] = { +#ifdef CONFIG_GRKERNSEC_RANDNET + /* x^512 + x^411 + x^308 + x^208 +x^104 + x + 1 -- 225 */ + { 512, 411, 308, 208, 104, 1 }, + /* x^128 + x^103 + x^76 + x^51 + x^25 + x + 1 -- 105 */ + { 128, 103, 76, 51, 25, 1 }, +#else /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */ { 128, 103, 76, 51, 25, 1 }, /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */ { 32, 26, 20, 14, 7, 1 }, +#endif #if 0 /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ { 2048, 1638, 1231, 819, 411, 1 }, @@ -1658,3 +1670,25 @@ randomize_range(unsigned long start, uns return 0; return PAGE_ALIGN(get_random_int() % range + start); } + +#if defined(CONFIG_PAX_ASLR) || defined(CONFIG_GRKERNSEC) +unsigned long pax_get_random_long(void) +{ + static time_t rekey_time; + static __u32 secret[12]; + time_t t; + + /* + * Pick a random secret every REKEY_INTERVAL seconds. + */ + t = get_seconds(); + if (!rekey_time || (t - rekey_time) > REKEY_INTERVAL) { + rekey_time = t; + get_random_bytes(secret, sizeof(secret)); + } + + secret[1] = half_md4_transform(secret+8, secret); + secret[0] = half_md4_transform(secret+8, secret); + return *(unsigned long *)secret; +} +#endif diff -urNp linux-2.6.17.11/drivers/char/vt_ioctl.c linux-2.6.17.11/drivers/char/vt_ioctl.c --- linux-2.6.17.11/drivers/char/vt_ioctl.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/char/vt_ioctl.c 2006-09-01 16:20:28.000000000 -0400 @@ -96,6 +96,12 @@ do_kdsk_ioctl(int cmd, struct kbentry __ case KDSKBENT: if (!perm) return -EPERM; + +#ifdef CONFIG_GRKERNSEC + if (!capable(CAP_SYS_TTY_CONFIG)) + return -EPERM; +#endif + if (!i && v == K_NOSUCHMAP) { /* disallocate map */ key_map = key_maps[s]; @@ -236,6 +242,13 @@ do_kdgkb_ioctl(int cmd, struct kbsentry goto reterr; } +#ifdef CONFIG_GRKERNSEC + if (!capable(CAP_SYS_TTY_CONFIG)) { + ret = -EPERM; + goto reterr; + } +#endif + q = func_table[i]; first_free = funcbufptr + (funcbufsize - funcbufleft); for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) diff -urNp linux-2.6.17.11/drivers/ieee1394/hosts.c linux-2.6.17.11/drivers/ieee1394/hosts.c --- linux-2.6.17.11/drivers/ieee1394/hosts.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/ieee1394/hosts.c 2006-09-01 16:20:28.000000000 -0400 @@ -75,6 +75,7 @@ static int dummy_isoctl(struct hpsb_iso } static struct hpsb_host_driver dummy_driver = { + .name = "dummy", .transmit_packet = dummy_transmit_packet, .devctl = dummy_devctl, .isoctl = dummy_isoctl diff -urNp linux-2.6.17.11/drivers/ieee1394/ohci1394.c linux-2.6.17.11/drivers/ieee1394/ohci1394.c --- linux-2.6.17.11/drivers/ieee1394/ohci1394.c 2006-08-23 19:30:00.000000000 -0400 +++ linux-2.6.17.11/drivers/ieee1394/ohci1394.c 2006-09-01 16:20:28.000000000 -0400 @@ -162,9 +162,9 @@ printk(level "%s: " fmt "\n" , OHCI1394_ printk(level "%s: fw-host%d: " fmt "\n" , OHCI1394_DRIVER_NAME, ohci->host->id , ## args) /* Module Parameters */ -static int phys_dma = 1; +static int phys_dma = 0; module_param(phys_dma, int, 0644); -MODULE_PARM_DESC(phys_dma, "Enable physical dma (default = 1)."); +MODULE_PARM_DESC(phys_dma, "Enable physical dma (default = 0)."); static void dma_trm_tasklet(unsigned long data); static void dma_trm_reset(struct dma_trm_ctx *d); diff -urNp linux-2.6.17.11/drivers/mtd/devices/doc2001.c linux-2.6.17.11/drivers/mtd/devices/doc2001.c --- linux-2.6.17.11/drivers/mtd/devices/doc2001.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/mtd/devices/doc2001.c 2006-09-01 16:20:28.000000000 -0400 @@ -423,6 +423,8 @@ static int doc_read_ecc (struct mtd_info /* Don't allow read past end of device */ if (from >= this->totlen) return -EINVAL; + if (!len) + return -EINVAL; /* Don't allow a single read to cross a 512-byte block boundary */ if (from + len > ((from | 0x1ff) + 1)) diff -urNp linux-2.6.17.11/drivers/net/pcnet32.c linux-2.6.17.11/drivers/net/pcnet32.c --- linux-2.6.17.11/drivers/net/pcnet32.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/net/pcnet32.c 2006-09-01 16:20:28.000000000 -0400 @@ -81,7 +81,7 @@ static int cards_found; /* * VLB I/O addresses */ -static unsigned int pcnet32_portlist[] __initdata = +static unsigned int pcnet32_portlist[] __devinitdata = { 0x300, 0x320, 0x340, 0x360, 0 }; static int pcnet32_debug = 0; diff -urNp linux-2.6.17.11/drivers/pci/proc.c linux-2.6.17.11/drivers/pci/proc.c --- linux-2.6.17.11/drivers/pci/proc.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/pci/proc.c 2006-09-01 16:20:28.000000000 -0400 @@ -473,7 +473,15 @@ static int __init pci_proc_init(void) { struct proc_dir_entry *entry; struct pci_dev *dev = NULL; +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + proc_bus_pci_dir = proc_mkdir_mode("pci", S_IRUSR | S_IXUSR, proc_bus); +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + proc_bus_pci_dir = proc_mkdir_mode("pci", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, proc_bus); +#endif +#else proc_bus_pci_dir = proc_mkdir("pci", proc_bus); +#endif entry = create_proc_entry("devices", 0, proc_bus_pci_dir); if (entry) entry->proc_fops = &proc_bus_pci_dev_operations; diff -urNp linux-2.6.17.11/drivers/pnp/pnpbios/bioscalls.c linux-2.6.17.11/drivers/pnp/pnpbios/bioscalls.c --- linux-2.6.17.11/drivers/pnp/pnpbios/bioscalls.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/pnp/pnpbios/bioscalls.c 2006-09-01 16:20:28.000000000 -0400 @@ -65,7 +65,7 @@ set_base(gdt[(selname) >> 3], (u32)(addr set_limit(gdt[(selname) >> 3], size); \ } while(0) -static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; +static struct desc_struct bad_bios_desc = { 0, 0x00409300 }; /* * At some point we want to use this stack frame pointer to unwind @@ -93,6 +93,10 @@ static inline u16 call_pnp_bios(u16 func struct desc_struct save_desc_40; int cpu; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + /* * PnP BIOSes are generally not terribly re-entrant. * Also, don't rely on them to save everything correctly. @@ -107,6 +111,10 @@ static inline u16 call_pnp_bios(u16 func /* On some boxes IRQ's during PnP BIOS calls are deadly. */ spin_lock_irqsave(&pnp_bios_lock, flags); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + /* The lock prevents us bouncing CPU here */ if (ts1_size) Q2_SET_SEL(smp_processor_id(), PNP_TS1, ts1_base, ts1_size); @@ -142,9 +150,14 @@ static inline u16 call_pnp_bios(u16 func "i" (0) : "memory" ); - spin_unlock_irqrestore(&pnp_bios_lock, flags); get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + spin_unlock_irqrestore(&pnp_bios_lock, flags); put_cpu(); /* If we get here and this is set then the PnP BIOS faulted on us. */ diff -urNp linux-2.6.17.11/drivers/video/i810/i810_main.c linux-2.6.17.11/drivers/video/i810/i810_main.c --- linux-2.6.17.11/drivers/video/i810/i810_main.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/video/i810/i810_main.c 2006-09-01 16:20:28.000000000 -0400 @@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info int size = ((cursor->image.width + 7) >> 3) * cursor->image.height; int i; - u8 *data = kmalloc(64 * 8, GFP_ATOMIC); + u8 *data = kmalloc(64 * 8, GFP_KERNEL); if (data == NULL) return -ENOMEM; diff -urNp linux-2.6.17.11/drivers/video/vesafb.c linux-2.6.17.11/drivers/video/vesafb.c --- linux-2.6.17.11/drivers/video/vesafb.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/drivers/video/vesafb.c 2006-09-01 16:20:28.000000000 -0400 @@ -267,7 +267,7 @@ static int __init vesafb_probe(struct pl size_remap = size_total; vesafb_fix.smem_len = size_remap; -#ifndef __i386__ +#if !defined(__i386__) || defined(CONFIG_PAX_KERNEXEC) screen_info.vesapm_seg = 0; #endif diff -urNp linux-2.6.17.11/fs/binfmt_aout.c linux-2.6.17.11/fs/binfmt_aout.c --- linux-2.6.17.11/fs/binfmt_aout.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/binfmt_aout.c 2006-09-01 16:20:28.000000000 -0400 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -124,10 +125,12 @@ static int aout_core_dump(long signr, st /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ #ifdef __sparc__ + gr_learn_resource(current, RLIMIT_CORE, dump.u_dsize+dump.u_ssize, 1); if ((dump.u_dsize+dump.u_ssize) > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_dsize = 0; #else + gr_learn_resource(current, RLIMIT_CORE, (dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE, 1); if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_dsize = 0; @@ -135,10 +138,12 @@ static int aout_core_dump(long signr, st /* Make sure we have enough room to write the stack and data areas. */ #ifdef __sparc__ + gr_learn_resource(current, RLIMIT_CORE, dump.u_ssize, 1); if ((dump.u_ssize) > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_ssize = 0; #else + gr_learn_resource(current, RLIMIT_CORE, (dump.u_ssize+1) * PAGE_SIZE, 1); if ((dump.u_ssize+1) * PAGE_SIZE > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_ssize = 0; @@ -288,6 +293,8 @@ static int load_aout_binary(struct linux rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; if (rlim >= RLIM_INFINITY) rlim = ~0; + + gr_learn_resource(current, RLIMIT_DATA, ex.a_data + ex.a_bss, 1); if (ex.a_data + ex.a_bss > rlim) return -ENOMEM; @@ -320,6 +327,28 @@ static int load_aout_binary(struct linux current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + current->mm->pax_flags = 0UL; +#endif + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(N_FLAGS(ex) & F_PAX_PAGEEXEC)) { + current->mm->pax_flags |= MF_PAX_PAGEEXEC; + +#ifdef CONFIG_PAX_EMUTRAMP + if (N_FLAGS(ex) & F_PAX_EMUTRAMP) + current->mm->pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if (!(N_FLAGS(ex) & F_PAX_MPROTECT)) + current->mm->pax_flags |= MF_PAX_MPROTECT; +#endif + + } +#endif + #ifdef __sparc__ if (N_MAGIC(ex) == NMAGIC) { loff_t pos = fd_offset; @@ -415,7 +444,7 @@ static int load_aout_binary(struct linux down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, - PROT_READ | PROT_WRITE | PROT_EXEC, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); up_write(¤t->mm->mmap_sem); diff -urNp linux-2.6.17.11/fs/binfmt_elf.c linux-2.6.17.11/fs/binfmt_elf.c --- linux-2.6.17.11/fs/binfmt_elf.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/binfmt_elf.c 2006-09-01 16:20:28.000000000 -0400 @@ -40,11 +40,16 @@ #include #include #include +#include #include #include #include +#ifdef CONFIG_PAX_SEGMEXEC +#include +#endif + #include static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); @@ -92,6 +97,8 @@ static struct linux_binfmt elf_format = static int set_brk(unsigned long start, unsigned long end) { + unsigned long e = end; + start = ELF_PAGEALIGN(start); end = ELF_PAGEALIGN(end); if (end > start) { @@ -102,7 +109,7 @@ static int set_brk(unsigned long start, if (BAD_ADDR(addr)) return addr; } - current->mm->start_brk = current->mm->brk = end; + current->mm->start_brk = current->mm->brk = e; return 0; } @@ -318,10 +325,9 @@ static unsigned long load_elf_interp(str { struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; - unsigned long load_addr = 0; - int load_addr_set = 0; + unsigned long load_addr = 0, min_addr, max_addr, task_size = TASK_SIZE; unsigned long last_bss = 0, elf_bss = 0; - unsigned long error = ~0UL; + unsigned long error = -EINVAL; int retval, i, size; /* First of all, some simple consistency checks */ @@ -360,42 +366,64 @@ goto out_close; } +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + eppnt = elf_phdata; + min_addr = task_size; + max_addr = 0; + error = -ENOMEM; + for (i=0; ie_phnum; i++, eppnt++) { - if (eppnt->p_type == PT_LOAD) { - int elf_type = MAP_PRIVATE | MAP_DENYWRITE; - int elf_prot = 0; - unsigned long vaddr = 0; - unsigned long k, map_addr; - - if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; - if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; - vaddr = eppnt->p_vaddr; - if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED; - - map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type); - error = map_addr; - if (BAD_ADDR(map_addr)) - goto out_close; - - if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) { - load_addr = map_addr - ELF_PAGESTART(vaddr); - load_addr_set = 1; - } + if (eppnt->p_type != PT_LOAD) + continue; /* * Check to see if the section's size will overflow the * allowed task size. Note that p_filesz must always be * <= p_memsize so it is only necessary to check p_memsz. */ - k = load_addr + eppnt->p_vaddr; - if (BAD_ADDR(k) || eppnt->p_filesz > eppnt->p_memsz || - eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { - error = -ENOMEM; + if (eppnt->p_filesz > eppnt->p_memsz || eppnt->p_vaddr >= eppnt->p_vaddr + eppnt->p_memsz) + goto out_close; + + if (min_addr > ELF_PAGESTART(eppnt->p_vaddr)) + min_addr = ELF_PAGESTART(eppnt->p_vaddr); + if (max_addr < ELF_PAGEALIGN(eppnt->p_vaddr + eppnt->p_memsz)) + max_addr = ELF_PAGEALIGN(eppnt->p_vaddr + eppnt->p_memsz); + } + if (min_addr >= max_addr) goto out_close; - } + + eppnt = elf_phdata; + for (i=0; ie_phnum; i++, eppnt++) { + int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_FIXED; + int elf_prot = 0; + unsigned long vaddr; + unsigned long k, map_addr; + + if (eppnt->p_type != PT_LOAD) + continue; + + if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; + if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; + if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; + vaddr = eppnt->p_vaddr; + + if (!load_addr && interp_elf_ex->e_type == ET_DYN) { + load_addr = get_unmapped_area(interpreter, 0, max_addr - min_addr, 0, MAP_PRIVATE | MAP_EXECUTABLE); + + if (load_addr > task_size) + goto out_close; + + load_addr -= min_addr; + } + + map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type); + error = map_addr; + if (BAD_ADDR(map_addr)) + goto out_close; /* * Find the end of the file mapping for this phdr, and keep @@ -412,7 +440,6 @@ k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; if (k > last_bss) last_bss = k; - } } /* @@ -449,7 +476,7 @@ out: static unsigned long load_aout_interp(struct exec * interp_ex, struct file * interpreter) { - unsigned long text_data, elf_entry = ~0UL; + unsigned long text_data, elf_entry = -EINVAL; char __user * addr; loff_t offset; @@ -493,6 +520,180 @@ out: return elf_entry; } +#if (defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS)) && defined(CONFIG_PAX_SOFTMODE) +static unsigned long pax_parse_softmode(const struct elf_phdr * const elf_phdata) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PAGEEXEC + if (elf_phdata->p_flags & PF_PAGEEXEC) + pax_flags |= MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (elf_phdata->p_flags & PF_SEGMEXEC) + pax_flags |= MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_PAGEEXEC + if (pax_flags & MF_PAX_PAGEEXEC) + pax_flags &= ~MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_SEGMEXEC + if (pax_flags & MF_PAX_SEGMEXEC) + pax_flags &= ~MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + if (elf_phdata->p_flags & PF_EMUTRAMP) + pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if (elf_phdata->p_flags & PF_MPROTECT) + pax_flags |= MF_PAX_MPROTECT; +#endif + +#if defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK) + if (randomize_va_space && (elf_phdata->p_flags & PF_RANDMMAP)) + pax_flags |= MF_PAX_RANDMMAP; +#endif + + return pax_flags; +} +#endif + +#ifdef CONFIG_PAX_PT_PAX_FLAGS +static unsigned long pax_parse_hardmode(const struct elf_phdr * const elf_phdata) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(elf_phdata->p_flags & PF_NOPAGEEXEC)) + pax_flags |= MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(elf_phdata->p_flags & PF_NOSEGMEXEC)) + pax_flags |= MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_PAGEEXEC + if (pax_flags & MF_PAX_PAGEEXEC) + pax_flags &= ~MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_SEGMEXEC + if (pax_flags & MF_PAX_SEGMEXEC) + pax_flags &= ~MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + if (!(elf_phdata->p_flags & PF_NOEMUTRAMP)) + pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if (!(elf_phdata->p_flags & PF_NOMPROTECT)) + pax_flags |= MF_PAX_MPROTECT; +#endif + +#if defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK) + if (randomize_va_space && !(elf_phdata->p_flags & PF_NORANDMMAP)) + pax_flags |= MF_PAX_RANDMMAP; +#endif + + return pax_flags; +} +#endif + +#ifdef CONFIG_PAX_EI_PAX +static unsigned long pax_parse_ei_pax(const struct elfhdr * const elf_ex) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(elf_ex->e_ident[EI_PAX] & EF_PAX_PAGEEXEC)) + pax_flags |= MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(elf_ex->e_ident[EI_PAX] & EF_PAX_SEGMEXEC)) + pax_flags |= MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_PAGEEXEC + if (pax_flags & MF_PAX_PAGEEXEC) + pax_flags &= ~MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_SEGMEXEC + if (pax_flags & MF_PAX_SEGMEXEC) + pax_flags &= ~MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) && (elf_ex->e_ident[EI_PAX] & EF_PAX_EMUTRAMP)) + pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) && !(elf_ex->e_ident[EI_PAX] & EF_PAX_MPROTECT)) + pax_flags |= MF_PAX_MPROTECT; +#endif + +#ifdef CONFIG_PAX_ASLR + if (randomize_va_space && !(elf_ex->e_ident[EI_PAX] & EF_PAX_RANDMMAP)) + pax_flags |= MF_PAX_RANDMMAP; +#endif + + return pax_flags; +} +#endif + +#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) +static long pax_parse_elf_flags(const struct elfhdr * const elf_ex, const struct elf_phdr * const elf_phdata) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PT_PAX_FLAGS + unsigned long i; +#endif + +#ifdef CONFIG_PAX_EI_PAX + pax_flags = pax_parse_ei_pax(elf_ex); +#endif + +#ifdef CONFIG_PAX_PT_PAX_FLAGS + for (i = 0UL; i < elf_ex->e_phnum; i++) + if (elf_phdata[i].p_type == PT_PAX_FLAGS) { + if (((elf_phdata[i].p_flags & PF_PAGEEXEC) && (elf_phdata[i].p_flags & PF_NOPAGEEXEC)) || + ((elf_phdata[i].p_flags & PF_SEGMEXEC) && (elf_phdata[i].p_flags & PF_NOSEGMEXEC)) || + ((elf_phdata[i].p_flags & PF_EMUTRAMP) && (elf_phdata[i].p_flags & PF_NOEMUTRAMP)) || + ((elf_phdata[i].p_flags & PF_MPROTECT) && (elf_phdata[i].p_flags & PF_NOMPROTECT)) || + ((elf_phdata[i].p_flags & PF_RANDMMAP) && (elf_phdata[i].p_flags & PF_NORANDMMAP))) + return -EINVAL; + +#ifdef CONFIG_PAX_SOFTMODE + if (pax_softmode) + pax_flags = pax_parse_softmode(&elf_phdata[i]); + else +#endif + + pax_flags = pax_parse_hardmode(&elf_phdata[i]); + break; + } +#endif + + if (0 > pax_check_flags(&pax_flags)) + return -EINVAL; + + current->mm->pax_flags = pax_flags; + return 0; +} +#endif + /* * These are the functions used to load ELF style executables and shared * libraries. There is no binary dependent code anywhere else. @@ -529,7 +730,7 @@ static int load_elf_binary(struct linux_ char * elf_interpreter = NULL; unsigned int interpreter_type = INTERPRETER_NONE; unsigned char ibcs2_interpreter = 0; - unsigned long error; + unsigned long error = 0; struct elf_phdr * elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; int elf_exec_fileno; @@ -547,6 +748,7 @@ static int load_elf_binary(struct linux_ struct elfhdr interp_elf_ex; struct exec interp_ex; } *loc; + unsigned long task_size = TASK_SIZE; loc = kmalloc(sizeof(*loc), GFP_KERNEL); if (!loc) { @@ -772,14 +974,88 @@ static int load_elf_binary(struct linux_ current->mm->end_code = 0; current->mm->mmap = NULL; current->flags &= ~PF_FORKNOEXEC; + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + current->mm->pax_flags = 0UL; +#endif + +#ifdef CONFIG_PAX_DLRESOLVE + current->mm->call_dl_resolve = 0UL; +#endif + +#if defined(CONFIG_PPC32) && defined(CONFIG_PAX_EMUSIGRT) + current->mm->call_syscall = 0UL; +#endif + +#ifdef CONFIG_PAX_ASLR + current->mm->delta_mmap = 0UL; + current->mm->delta_exec = 0UL; + current->mm->delta_stack = 0UL; +#endif + current->mm->def_flags = def_flags; +#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) + if (0 > pax_parse_elf_flags(&loc->elf_ex, elf_phdata)) { + send_sig(SIGKILL, current, 0); + goto out_free_dentry; + } +#endif + +#ifdef CONFIG_PAX_HAVE_ACL_FLAGS + pax_set_initial_flags(bprm); +#elif defined(CONFIG_PAX_HOOK_ACL_FLAGS) + if (pax_set_initial_flags_func) + (pax_set_initial_flags_func)(bprm); +#endif + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + if (current->mm->pax_flags & MF_PAX_PAGEEXEC) + current->mm->context.user_cs_limit = PAGE_SIZE; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { + int cpu = get_cpu(); + + current->mm->context.user_cs_base = SEGMEXEC_TASK_SIZE; + current->mm->context.user_cs_limit = -SEGMEXEC_TASK_SIZE; + set_user_cs(current->mm, cpu); + put_cpu(); + task_size = SEGMEXEC_TASK_SIZE; + } +#endif + +#ifdef CONFIG_PAX_ASLR + if (current->mm->pax_flags & MF_PAX_RANDMMAP) { +#define pax_delta_mask(delta, lsb, len) (((delta) & ((1UL << (len)) - 1)) << (lsb)) + + current->mm->delta_mmap = pax_delta_mask(pax_get_random_long(), PAX_DELTA_MMAP_LSB(current), PAX_DELTA_MMAP_LEN(current)); + current->mm->delta_exec = pax_delta_mask(pax_get_random_long(), PAX_DELTA_EXEC_LSB(current), PAX_DELTA_EXEC_LEN(current)); + current->mm->delta_stack = pax_delta_mask(pax_get_random_long(), PAX_DELTA_STACK_LSB(current), PAX_DELTA_STACK_LEN(current)); + } +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (current->mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) + executable_stack = EXSTACK_DEFAULT; +#endif + /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (!(current->mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC))) +#endif + if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; +#ifdef CONFIG_PAX_ASLR + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; arch_pick_mmap_layout(current->mm); @@ -851,6 +1127,15 @@ static int load_elf_binary(struct linux_ base, as well as whatever program they might try to exec. This is because the brk will follow the loader, and is not movable. */ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); + +#ifdef CONFIG_PAX_RANDMMAP + /* PaX: randomize base address at the default exe base if requested */ + if (current->mm->pax_flags & MF_PAX_RANDMMAP) { + load_bias = ELF_PAGESTART(PAX_ELF_ET_DYN_BASE(current) - vaddr + current->mm->delta_exec); + elf_flags |= MAP_FIXED; + } +#endif + } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags); @@ -878,8 +1163,8 @@ static int load_elf_binary(struct linux_ * <= p_memsz so it is only necessary to check p_memsz. */ if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || - elf_ppnt->p_memsz > TASK_SIZE || - TASK_SIZE - elf_ppnt->p_memsz < k) { + elf_ppnt->p_memsz > task_size || + task_size - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ send_sig(SIGKILL, current, 0); goto out_free_dentry; @@ -907,6 +1192,12 @@ static int load_elf_binary(struct linux_ start_data += load_bias; end_data += load_bias; +#ifdef CONFIG_PAX_RANDMMAP + if (current->mm->pax_flags & MF_PAX_RANDMMAP) + elf_brk += PAGE_SIZE + pax_delta_mask(pax_get_random_long(), 4, PAGE_SHIFT); +#undef pax_delta_mask +#endif + /* Calling set_brk effectively mmaps the pages that we need * for the bss and break sections. We must do this before * mapping in the interpreter, to make sure it doesn't wind @@ -1159,7 +1450,7 @@ static int dump_seek(struct file *file, * * I think we should skip something. But I am not sure how. H.J. */ -static int maydump(struct vm_area_struct *vma) +static int maydump(struct vm_area_struct *vma, long signr) { /* Do not dump I/O mapped devices or special mappings */ if (vma->vm_flags & (VM_IO | VM_RESERVED)) @@ -1170,7 +1461,7 @@ static int maydump(struct vm_area_struct return vma->vm_file->f_dentry->d_inode->i_nlink == 0; /* If it hasn't been written to, don't write it out */ - if (!vma->anon_vma) + if (signr != SIGKILL && !vma->anon_vma) return 0; return 1; @@ -1224,8 +1515,11 @@ static int writenote(struct memelfnote * #undef DUMP_SEEK #define DUMP_WRITE(addr, nr) \ + do { \ + gr_learn_resource(current, RLIMIT_CORE, size + (nr), 1); \ if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ - goto end_coredump; + goto end_coredump; \ + } while (0); #define DUMP_SEEK(off) \ if (!dump_seek(file, (off))) \ goto end_coredump; @@ -1575,7 +1869,7 @@ static int elf_core_dump(long signr, str phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = maydump(vma) ? sz : 0; + phdr.p_filesz = maydump(vma, signr) ? sz : 0; phdr.p_memsz = sz; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -1608,7 +1902,7 @@ static int elf_core_dump(long signr, str for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { unsigned long addr; - if (!maydump(vma)) + if (!maydump(vma, signr)) continue; for (addr = vma->vm_start; @@ -1627,6 +1921,7 @@ static int elf_core_dump(long signr, str void *kaddr; flush_cache_page(vma, addr, page_to_pfn(page)); kaddr = kmap(page); + gr_learn_resource(current, RLIMIT_CORE, size + PAGE_SIZE, 1); if ((size += PAGE_SIZE) > limit || !dump_write(file, kaddr, PAGE_SIZE)) { diff -urNp linux-2.6.17.11/fs/binfmt_flat.c linux-2.6.17.11/fs/binfmt_flat.c --- linux-2.6.17.11/fs/binfmt_flat.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/binfmt_flat.c 2006-09-01 16:20:28.000000000 -0400 @@ -553,7 +553,9 @@ static int load_flat_file(struct linux_b realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", (int)-datapos); + down_write(¤t->mm->mmap_sem); do_munmap(current->mm, textpos, text_len); + up_write(¤t->mm->mmap_sem); ret = realdatastart; goto err; } @@ -575,8 +577,10 @@ static int load_flat_file(struct linux_b } if (result >= (unsigned long)-4096) { printk("Unable to read data+bss, errno %d\n", (int)-result); + down_write(¤t->mm->mmap_sem); do_munmap(current->mm, textpos, text_len); do_munmap(current->mm, realdatastart, data_len + extra); + up_write(¤t->mm->mmap_sem); ret = result; goto err; } @@ -640,8 +644,10 @@ static int load_flat_file(struct linux_b } if (result >= (unsigned long)-4096) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); + down_write(¤t->mm->mmap_sem); do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); + up_write(¤t->mm->mmap_sem); ret = result; goto err; } diff -urNp linux-2.6.17.11/fs/binfmt_misc.c linux-2.6.17.11/fs/binfmt_misc.c --- linux-2.6.17.11/fs/binfmt_misc.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/binfmt_misc.c 2006-09-01 16:20:28.000000000 -0400 @@ -112,9 +112,11 @@ static int load_misc_binary(struct linux struct files_struct *files = NULL; retval = -ENOEXEC; - if (!enabled) + if (!enabled || bprm->misc) goto _ret; + bprm->misc++; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); diff -urNp linux-2.6.17.11/fs/buffer.c linux-2.6.17.11/fs/buffer.c --- linux-2.6.17.11/fs/buffer.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/buffer.c 2006-09-01 16:20:28.000000000 -0400 @@ -42,6 +42,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void invalidate_bh_lrus(void); @@ -2167,6 +2168,7 @@ static int __generic_cont_expand(struct err = -EFBIG; limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + gr_learn_resource(current, RLIMIT_FSIZE, (unsigned long) size, 1); if (limit != RLIM_INFINITY && size > (loff_t)limit) { send_sig(SIGXFSZ, current, 0); goto out; diff -urNp linux-2.6.17.11/fs/compat.c linux-2.6.17.11/fs/compat.c --- linux-2.6.17.11/fs/compat.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/compat.c 2006-09-01 16:20:28.000000000 -0400 @@ -46,6 +46,7 @@ #include #include #include +#include #include /* siocdevprivate_ioctl */ @@ -1498,6 +1499,11 @@ int compat_do_execve(char * filename, struct file *file; int retval; int i; +#ifdef CONFIG_GRKERNSEC + struct file *old_exec_file; + struct acl_subject_label *old_acl; + struct rlimit old_rlim[RLIM_NLIMITS]; +#endif retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); @@ -1515,6 +1521,15 @@ int compat_do_execve(char * filename, bprm->file = file; bprm->filename = filename; bprm->interp = filename; + + gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes), 1); + retval = -EAGAIN; + if (gr_handle_nproc()) + goto out_file; + retval = -EACCES; + if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) + goto out_file; + bprm->mm = mm_alloc(); retval = -ENOMEM; if (!bprm->mm) @@ -1553,10 +1568,39 @@ int compat_do_execve(char * filename, if (retval < 0) goto out; + if (!gr_tpe_allow(file)) { + retval = -EACCES; + goto out; + } + + if (gr_check_crash_exec(file)) { + retval = -EACCES; + goto out; + } + + gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); + + gr_handle_exec_args(bprm, (char __user * __user *)argv); + +#ifdef CONFIG_GRKERNSEC + old_acl = current->acl; + memcpy(old_rlim, current->signal->rlim, sizeof(old_rlim)); + old_exec_file = current->exec_file; + get_file(file); + current->exec_file = file; +#endif + + gr_set_proc_label(file->f_dentry, file->f_vfsmnt); + retval = search_binary_handler(bprm, regs); if (retval >= 0) { free_arg_pages(bprm); +#ifdef CONFIG_GRKERNSEC + if (old_exec_file) + fput(old_exec_file); +#endif + /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); @@ -1564,6 +1608,13 @@ int compat_do_execve(char * filename, return retval; } +#ifdef CONFIG_GRKERNSEC + current->acl = old_acl; + memcpy(current->signal->rlim, old_rlim, sizeof(old_rlim)); + fput(current->exec_file); + current->exec_file = old_exec_file; +#endif + out: /* Something went wrong, return the inode and free the argument pages*/ for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -urNp linux-2.6.17.11/fs/dcache.c linux-2.6.17.11/fs/dcache.c --- linux-2.6.17.11/fs/dcache.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/dcache.c 2006-09-01 16:20:28.000000000 -0400 @@ -1399,7 +1399,7 @@ already_unhashed: * * "buflen" should be positive. Caller holds the dcache_lock. */ -static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, +char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, struct dentry *root, struct vfsmount *rootmnt, char *buffer, int buflen) { diff -urNp linux-2.6.17.11/fs/exec.c linux-2.6.17.11/fs/exec.c --- linux-2.6.17.11/fs/exec.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/exec.c 2006-09-01 16:20:28.000000000 -0400 @@ -51,6 +51,8 @@ #include #include #include +#include +#include #include #include @@ -69,6 +71,15 @@ EXPORT_SYMBOL(suid_dumpable); static struct linux_binfmt *formats; static DEFINE_RWLOCK(binfmt_lock); +#ifdef CONFIG_PAX_SOFTMODE +unsigned int pax_softmode; +#endif + +#ifdef CONFIG_PAX_HOOK_ACL_FLAGS +void (*pax_set_initial_flags_func)(struct linux_binprm * bprm); +EXPORT_SYMBOL(pax_set_initial_flags_func); +#endif + int register_binfmt(struct linux_binfmt * fmt) { struct linux_binfmt ** tmp = &formats; @@ -314,6 +325,10 @@ void install_arg_page(struct vm_area_str if (unlikely(anon_vma_prepare(vma))) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if (page_count(page) == 1) +#endif + flush_dcache_page(page); pte = get_locked_pte(mm, address, &ptl); if (!pte) @@ -323,9 +338,21 @@ void install_arg_page(struct vm_area_str goto out; } inc_mm_counter(mm, anon_rss); + +#ifdef CONFIG_PAX_SEGMEXEC + if (page_count(page) == 1) +#endif + lru_cache_add_active(page); set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); + +#ifdef CONFIG_PAX_SEGMEXEC + if (page_count(page) != 1) + page_add_anon_rmap(page, vma, address); + else +#endif + page_add_new_anon_rmap(page, vma, address); pte_unmap_unlock(pte, ptl); @@ -348,6 +375,10 @@ int setup_arg_pages(struct linux_binprm int i, ret; long arg_size; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct *mpnt_m = NULL; +#endif + #ifdef CONFIG_STACK_GROWSUP /* Move the argument and environment strings to the bottom of the * stack space. @@ -406,11 +437,19 @@ int setup_arg_pages(struct linux_binprm bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); if (!mpnt) return -ENOMEM; - memset(mpnt, 0, sizeof(*mpnt)); +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (VM_STACK_FLAGS & VM_MAYEXEC)) { + mpnt_m = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!mpnt_m) { + kmem_cache_free(vm_area_cachep, mpnt); + return -ENOMEM; + } + } +#endif down_write(&mm->mmap_sem); { @@ -432,14 +471,51 @@ int setup_arg_pages(struct linux_binprm else mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_flags |= mm->def_flags; - mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if (!(mm->pax_flags & MF_PAX_PAGEEXEC)) + mpnt->vm_page_prot = protection_map[(mpnt->vm_flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC)]; + else +#endif + + mpnt->vm_page_prot = protection_map[mpnt->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; if ((ret = insert_vm_struct(mm, mpnt))) { up_write(&mm->mmap_sem); kmem_cache_free(vm_area_cachep, mpnt); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mpnt_m) + kmem_cache_free(vm_area_cachep, mpnt_m); +#endif + return ret; } vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); mm->stack_vm = mm->total_vm; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mpnt_m) { + *mpnt_m = *mpnt; + if (!(mpnt->vm_flags & VM_EXEC)) { + mpnt_m->vm_flags &= ~(VM_READ | VM_WRITE | VM_EXEC); + mpnt_m->vm_page_prot = PAGE_NONE; + } + mpnt_m->vm_start += SEGMEXEC_TASK_SIZE; + mpnt_m->vm_end += SEGMEXEC_TASK_SIZE; + if ((ret = insert_vm_struct(mm, mpnt_m))) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, mpnt_m); + return ret; + } + mpnt_m->vm_flags |= VM_MIRROR; + mpnt->vm_flags |= VM_MIRROR; + mpnt_m->vm_mirror = mpnt->vm_start - mpnt_m->vm_start; + mpnt->vm_mirror = mpnt_m->vm_start - mpnt->vm_start; + mpnt_m->vm_pgoff = mpnt->vm_pgoff; + mm->total_vm += vma_pages(mpnt_m); + } +#endif + } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -447,6 +523,14 @@ int setup_arg_pages(struct linux_binprm if (page) { bprm->page[i] = NULL; install_arg_page(mpnt, page, stack_base); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mpnt_m) { + page_cache_get(page); + install_arg_page(mpnt_m, page, stack_base + SEGMEXEC_TASK_SIZE); + } +#endif + } stack_base += PAGE_SIZE; } @@ -1150,6 +1234,11 @@ int do_execve(char * filename, struct file *file; int retval; int i; +#ifdef CONFIG_GRKERNSEC + struct file *old_exec_file; + struct acl_subject_label *old_acl; + struct rlimit old_rlim[RLIM_NLIMITS]; +#endif retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); @@ -1161,10 +1250,29 @@ int do_execve(char * filename, if (IS_ERR(file)) goto out_kfree; + gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes), 1); + + if (gr_handle_nproc()) { + allow_write_access(file); + fput(file); + return -EAGAIN; + } + + if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) { + allow_write_access(file); + fput(file); + return -EACCES; + } + sched_exec(); bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); +#ifdef CONFIG_PAX_RANDUSTACK + if (randomize_va_space) + bprm->p -= (pax_get_random_long() & ~(sizeof(void *)-1)) & ~PAGE_MASK; +#endif + bprm->file = file; bprm->filename = filename; bprm->interp = filename; @@ -1206,8 +1314,38 @@ int do_execve(char * filename, if (retval < 0) goto out; + if (!gr_tpe_allow(file)) { + retval = -EACCES; + goto out; + } + + if (gr_check_crash_exec(file)) { + retval = -EACCES; + goto out; + } + + gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); + + gr_handle_exec_args(bprm, argv); + +#ifdef CONFIG_GRKERNSEC + old_acl = current->acl; + memcpy(old_rlim, current->signal->rlim, sizeof(old_rlim)); + old_exec_file = current->exec_file; + get_file(file); + current->exec_file = file; +#endif + + retval = gr_set_proc_label(file->f_dentry, file->f_vfsmnt); + if (retval < 0) + goto out_fail; + retval = search_binary_handler(bprm,regs); if (retval >= 0) { +#ifdef CONFIG_GRKERNSEC + if (old_exec_file) + fput(old_exec_file); +#endif free_arg_pages(bprm); /* execve success */ @@ -1217,6 +1355,14 @@ int do_execve(char * filename, return retval; } +out_fail: +#ifdef CONFIG_GRKERNSEC + current->acl = old_acl; + memcpy(current->signal->rlim, old_rlim, sizeof(old_rlim)); + fput(current->exec_file); + current->exec_file = old_exec_file; +#endif + out: /* Something went wrong, return the inode and free the argument pages*/ for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -1377,6 +1523,114 @@ static void format_corename(char *corena *out_ptr = 0; } +int pax_check_flags(unsigned long * flags) +{ + int retval = 0; + +#if !defined(__i386__) || !defined(CONFIG_PAX_SEGMEXEC) + if (*flags & MF_PAX_SEGMEXEC) + { + *flags &= ~MF_PAX_SEGMEXEC; + retval = -EINVAL; + } +#endif + + if ((*flags & MF_PAX_PAGEEXEC) + +#ifdef CONFIG_PAX_PAGEEXEC + && (*flags & MF_PAX_SEGMEXEC) +#endif + + ) + { + *flags &= ~MF_PAX_PAGEEXEC; + retval = -EINVAL; + } + + if ((*flags & MF_PAX_MPROTECT) + +#ifdef CONFIG_PAX_MPROTECT + && !(*flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) +#endif + + ) + { + *flags &= ~MF_PAX_MPROTECT; + retval = -EINVAL; + } + + if ((*flags & MF_PAX_EMUTRAMP) + +#ifdef CONFIG_PAX_EMUTRAMP + && !(*flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) +#endif + + ) + { + *flags &= ~MF_PAX_EMUTRAMP; + retval = -EINVAL; + } + + return retval; +} + +EXPORT_SYMBOL(pax_check_flags); + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_fault(struct pt_regs *regs, void *pc, void *sp) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = current->mm; + char* buffer_exec = (char*)__get_free_page(GFP_ATOMIC); + char* buffer_fault = (char*)__get_free_page(GFP_ATOMIC); + char* path_exec=NULL; + char* path_fault=NULL; + unsigned long start=0UL, end=0UL, offset=0UL; + + if (buffer_exec && buffer_fault) { + struct vm_area_struct* vma, * vma_exec=NULL, * vma_fault=NULL; + + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma && (!vma_exec || !vma_fault)) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + vma_exec = vma; + if (vma->vm_start <= (unsigned long)pc && (unsigned long)pc < vma->vm_end) + vma_fault = vma; + vma = vma->vm_next; + } + if (vma_exec) { + path_exec = d_path(vma_exec->vm_file->f_dentry, vma_exec->vm_file->f_vfsmnt, buffer_exec, PAGE_SIZE); + if (IS_ERR(path_exec)) + path_exec = ""; + } + if (vma_fault) { + start = vma_fault->vm_start; + end = vma_fault->vm_end; + offset = vma_fault->vm_pgoff << PAGE_SHIFT; + if (vma_fault->vm_file) { + path_fault = d_path(vma_fault->vm_file->f_dentry, vma_fault->vm_file->f_vfsmnt, buffer_fault, PAGE_SIZE); + if (IS_ERR(path_fault)) + path_fault = ""; + } else + path_fault = ""; + } + up_read(&mm->mmap_sem); + } + if (tsk->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: execution attempt in: %s, %08lx-%08lx %08lx\n", NIPQUAD(tsk->signal->curr_ip), path_fault, start, end, offset); + else + printk(KERN_ERR "PAX: execution attempt in: %s, %08lx-%08lx %08lx\n", path_fault, start, end, offset); + printk(KERN_ERR "PAX: terminating task: %s(%s):%d, uid/euid: %u/%u, " + "PC: %p, SP: %p\n", path_exec, tsk->comm, tsk->pid, + tsk->uid, tsk->euid, pc, sp); + free_page((unsigned long)buffer_exec); + free_page((unsigned long)buffer_fault); + pax_report_insns(pc, sp); + do_coredump(SIGKILL, SIGKILL, regs); +} +#endif + static void zap_threads (struct mm_struct *mm) { struct task_struct *g, *p; @@ -1494,6 +1748,10 @@ int do_coredump(long signr, int exit_cod */ clear_thread_flag(TIF_SIGPENDING); + if (signr == SIGKILL || signr == SIGILL) + gr_handle_brute_attach(current); + + gr_learn_resource(current, RLIMIT_CORE, binfmt->min_coredump, 1); if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) goto fail_unlock; diff -urNp linux-2.6.17.11/fs/fcntl.c linux-2.6.17.11/fs/fcntl.c --- linux-2.6.17.11/fs/fcntl.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/fcntl.c 2006-09-01 16:20:28.000000000 -0400 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,7 @@ static int locate_fd(struct files_struct struct fdtable *fdt; error = -EINVAL; + gr_learn_resource(current, RLIMIT_NOFILE, orig_start, 0); if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; @@ -84,6 +86,7 @@ repeat: } error = -EMFILE; + gr_learn_resource(current, RLIMIT_NOFILE, newfd, 0); if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; if (!vx_files_avail(1)) @@ -145,6 +148,8 @@ asmlinkage long sys_dup2(unsigned int ol struct files_struct * files = current->files; struct fdtable *fdt; + gr_learn_resource(current, RLIMIT_NOFILE, newfd, 0); + spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; @@ -434,7 +439,8 @@ static inline int sigio_perm(struct task return (((fown->euid == 0) || (fown->euid == p->suid) || (fown->euid == p->uid) || (fown->uid == p->suid) || (fown->uid == p->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + !security_file_send_sigiotask(p, fown, sig) && + !gr_check_protected_task(p) && !gr_pid_is_chrooted(p)); } static void send_sigio_to_task(struct task_struct *p, diff -urNp linux-2.6.17.11/fs/Kconfig linux-2.6.17.11/fs/Kconfig --- linux-2.6.17.11/fs/Kconfig 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/Kconfig 2006-09-01 16:20:28.000000000 -0400 @@ -796,7 +796,7 @@ config PROC_FS config PROC_KCORE bool "/proc/kcore support" if !ARM - depends on PROC_FS && MMU + depends on PROC_FS && MMU && !GRKERNSEC_PROC_ADD config PROC_VMCORE bool "/proc/vmcore support (EXPERIMENTAL)" diff -urNp linux-2.6.17.11/fs/namei.c linux-2.6.17.11/fs/namei.c --- linux-2.6.17.11/fs/namei.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/namei.c 2006-09-01 16:20:28.000000000 -0400 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -635,6 +636,13 @@ static inline int do_follow_link(struct err = security_inode_follow_link(path->dentry, nd); if (err) goto loop; + + if (gr_handle_follow_link(path->dentry->d_parent->d_inode, + path->dentry->d_inode, path->dentry, nd->mnt)) { + err = -EACCES; + goto loop; + } + current->link_count++; current->total_link_count++; nd->depth++; @@ -997,11 +1005,18 @@ return_reval: break; } return_base: + if (!gr_acl_handle_hidden_file(nd->dentry, nd->mnt)) { + path_release(nd); + return -ENOENT; + } return 0; out_dput: dput_path(&next, nd); break; } + if (!gr_acl_handle_hidden_file(nd->dentry, nd->mnt)) + err = -ENOENT; + path_release(nd); return_err: return err; @@ -1661,6 +1676,17 @@ int open_namei(int dfd, const char *path nd, flag); if (error) return error; + + if (gr_handle_rawio(nd->dentry->d_inode)) { + error = -EPERM; + goto exit; + } + + if (!gr_acl_handle_open(nd->dentry, nd->mnt, flag)) { + error = -EACCES; + goto exit; + } + goto ok; } @@ -1701,9 +1727,16 @@ do_last: /* Negative dentry, just create the file */ if (!path.dentry->d_inode) { + if (!gr_acl_handle_creat(path.dentry, nd->dentry, nd->mnt, flag, mode)) { + error = -EACCES; + mutex_unlock(&dir->d_inode->i_mutex); + goto exit_dput; + } if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; error = vfs_create(dir->d_inode, path.dentry, mode, nd); + if (!error) + gr_handle_create(path.dentry, nd->mnt); mutex_unlock(&dir->d_inode->i_mutex); dput(nd->dentry); nd->dentry = path.dentry; @@ -1718,6 +1751,23 @@ do_last: /* * It already exists. */ + + if (gr_handle_rawio(path.dentry->d_inode)) { + mutex_unlock(&dir->d_inode->i_mutex); + error = -EPERM; + goto exit_dput; + } + if (!gr_acl_handle_open(path.dentry, nd->mnt, flag)) { + mutex_unlock(&dir->d_inode->i_mutex); + error = -EACCES; + goto exit_dput; + } + if (gr_handle_fifo(path.dentry, nd->mnt, dir, flag, acc_mode)) { + mutex_unlock(&dir->d_inode->i_mutex); + error = -EACCES; + goto exit_dput; + } + mutex_unlock(&dir->d_inode->i_mutex); error = -EEXIST; @@ -1784,6 +1834,13 @@ do_link: error = security_inode_follow_link(path.dentry, nd); if (error) goto exit_dput; + + if (gr_handle_follow_link(path.dentry->d_parent->d_inode, path.dentry->d_inode, + path.dentry, nd->mnt)) { + error = -EACCES; + goto exit_dput; + } + error = __do_follow_link(&path, nd); if (error) { /* Does someone understand code flow here? Or it is only @@ -1911,6 +1968,22 @@ asmlinkage long sys_mknodat(int dfd, con if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; if (!IS_ERR(dentry)) { + if (gr_handle_chroot_mknod(dentry, nd.mnt, mode)) { + error = -EPERM; + dput(dentry); + mutex_unlock(&nd.dentry->d_inode->i_mutex); + path_release(&nd); + goto out; + } + + if (!gr_acl_handle_mknod(dentry, nd.dentry, nd.mnt, mode)) { + error = -EACCES; + dput(dentry); + mutex_unlock(&nd.dentry->d_inode->i_mutex); + path_release(&nd); + goto out; + } + switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); @@ -1929,6 +2002,10 @@ asmlinkage long sys_mknodat(int dfd, con default: error = -EINVAL; } + + if (!error) + gr_handle_create(dentry, nd.mnt); + dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -1984,10 +2061,19 @@ asmlinkage long sys_mkdirat(int dfd, con dentry = lookup_create(&nd, 1); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { + error = 0; if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, - mode, &nd); + + if (!gr_acl_handle_mkdir(dentry, nd.dentry, nd.mnt)) + error = -EACCES; + + if (!error) + error = vfs_mkdir(nd.dentry->d_inode, dentry, + mode, &nd); + if (!error) + gr_handle_create(dentry, nd.mnt); + dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2072,6 +2158,8 @@ static long do_rmdir(int dfd, const char char * name; struct dentry *dentry; struct nameidata nd; + ino_t saved_ino = 0; + dev_t saved_dev = 0; name = getname(pathname); if(IS_ERR(name)) @@ -2096,7 +2184,21 @@ static long do_rmdir(int dfd, const char dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); + error = 0; + if (dentry->d_inode) { + if (dentry->d_inode->i_nlink <= 1) { + saved_ino = dentry->d_inode->i_ino; + saved_dev = dentry->d_inode->i_sb->s_dev; + } + + if (!gr_acl_handle_rmdir(dentry, nd.mnt)) + error = -EACCES; + } + + if (!error) + error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); + if (!error && (saved_dev || saved_ino)) + gr_handle_delete(saved_ino, saved_dev); dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2156,6 +2258,8 @@ static long do_unlinkat(int dfd, const c struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; + ino_t saved_ino = 0; + dev_t saved_dev = 0; name = getname(pathname); if(IS_ERR(name)) @@ -2171,13 +2275,26 @@ static long do_unlinkat(int dfd, const c dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { + error = 0; /* Why not before? Because we want correct error value */ if (nd.last.name[nd.last.len]) goto slashes; inode = dentry->d_inode; - if (inode) + if (inode) { + if (inode->i_nlink <= 1) { + saved_ino = inode->i_ino; + saved_dev = inode->i_sb->s_dev; + } + + if (!gr_acl_handle_unlink(dentry, nd.mnt)) + error = -EACCES; + atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); + } + if (!error) + error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); + if (!error && (saved_ino || saved_dev)) + gr_handle_delete(saved_ino, saved_dev); exit2: dput(dentry); } @@ -2256,8 +2373,15 @@ asmlinkage long sys_symlinkat(const char dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, - from, S_IALLUGO, &nd); + error = 0; + if (!gr_acl_handle_symlink(dentry, nd.dentry, nd.mnt, from)) + error = -EACCES; + + if (!error) + error = vfs_symlink(nd.dentry->d_inode, dentry, + from, S_IALLUGO, &nd); + if (!error) + gr_handle_create(dentry, nd.mnt); dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2350,8 +2474,20 @@ asmlinkage long sys_linkat(int olddfd, c new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, - new_dentry, &nd); + error = 0; + if (gr_handle_hardlink(old_nd.dentry, old_nd.mnt, + old_nd.dentry->d_inode, + old_nd.dentry->d_inode->i_mode, to)) + error = -EPERM; + if (!gr_acl_handle_link(new_dentry, nd.dentry, nd.mnt, + old_nd.dentry, old_nd.mnt, to)) + error = -EACCES; + if (!error) + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry, + &nd); + if (!error) + gr_handle_create(new_dentry, nd.mnt); + dput(new_dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2580,8 +2716,16 @@ static int do_rename(int olddfd, const c if (new_dentry == trap) goto exit5; - error = vfs_rename(old_dir->d_inode, old_dentry, + error = gr_acl_handle_rename(new_dentry, newnd.dentry, newnd.mnt, + old_dentry, old_dir->d_inode, oldnd.mnt, + newname); + + if (!error) + error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + if (!error) + gr_handle_rename(old_dir->d_inode, newnd.dentry->d_inode, old_dentry, + new_dentry, oldnd.mnt, new_dentry->d_inode ? 1 : 0); exit5: dput(new_dentry); exit4: diff -urNp linux-2.6.17.11/fs/namespace.c linux-2.6.17.11/fs/namespace.c --- linux-2.6.17.11/fs/namespace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/namespace.c 2006-09-01 16:20:28.000000000 -0400 @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include "pnode.h" @@ -655,6 +657,8 @@ static int do_umount(struct vfsmount *mn DQUOT_OFF(sb->s_dqh); retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); + + gr_log_remount(mnt->mnt_devname, retval); } up_write(&sb->s_umount); return retval; @@ -675,6 +679,9 @@ static int do_umount(struct vfsmount *mn security_sb_umount_busy(mnt); up_write(&namespace_sem); release_mounts(&umount_list); + + gr_log_unmount(mnt->mnt_devname, retval); + return retval; } @@ -1425,6 +1432,11 @@ long do_mount(char *dev_name, char *dir_ if (retval) goto dput_out; + if (gr_handle_chroot_mount(nd.dentry, nd.mnt, dev_name)) { + retval = -EPERM; + goto dput_out; + } + if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, data_page, tag); @@ -1439,6 +1451,9 @@ long do_mount(char *dev_name, char *dir_ dev_name, data_page); dput_out: path_release(&nd); + + gr_log_mount(dev_name, dir_name, retval); + return retval; } @@ -1691,6 +1706,9 @@ asmlinkage long sys_pivot_root(const cha if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (gr_handle_chroot_pivot()) + return -EPERM; + lock_kernel(); error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, diff -urNp linux-2.6.17.11/fs/open.c linux-2.6.17.11/fs/open.c --- linux-2.6.17.11/fs/open.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/open.c 2006-09-01 16:20:28.000000000 -0400 @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -212,6 +213,9 @@ int do_truncate(struct dentry *dentry, l if (length < 0) return -EINVAL; + if (filp && !gr_acl_handle_truncate(dentry, filp->f_vfsmnt)) + return -EACCES; + newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { @@ -412,6 +416,12 @@ asmlinkage long sys_utime(char __user * (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } + + if (!gr_acl_handle_utime(nd.dentry, nd.mnt)) { + error = -EACCES; + goto dput_and_out; + } + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); mutex_unlock(&inode->i_mutex); @@ -465,6 +475,12 @@ long do_utimes(int dfd, char __user *fil (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } + + if (!gr_acl_handle_utime(nd.dentry, nd.mnt)) { + error = -EACCES; + goto dput_and_out; + } + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); mutex_unlock(&inode->i_mutex); @@ -532,6 +548,10 @@ asmlinkage long sys_faccessat(int dfd, c && (IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt)) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; + + if (!res && !gr_acl_handle_access(nd.dentry, nd.mnt, mode)) + res = -EACCES; + path_release(&nd); } @@ -560,6 +580,8 @@ asmlinkage long sys_chdir(const char __u if (error) goto dput_and_out; + gr_log_chdir(nd.dentry, nd.mnt); + set_fs_pwd(current->fs, nd.mnt, nd.dentry); dput_and_out: @@ -590,6 +612,13 @@ asmlinkage long sys_fchdir(unsigned int goto out_putf; error = file_permission(file, MAY_EXEC); + + if (!error && !gr_chroot_fchdir(dentry, mnt)) + error = -EPERM; + + if (!error) + gr_log_chdir(dentry, mnt); + if (!error) set_fs_pwd(current->fs, mnt, dentry); out_putf: @@ -615,8 +644,16 @@ asmlinkage long sys_chroot(const char __ if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; + if (gr_handle_chroot_chroot(nd.dentry, nd.mnt)) + goto dput_and_out; + set_fs_root(current->fs, nd.mnt, nd.dentry); set_fs_altroot(); + + gr_handle_chroot_caps(current); + + gr_handle_chroot_chdir(nd.dentry, nd.mnt); + error = 0; dput_and_out: path_release(&nd); @@ -647,9 +684,22 @@ asmlinkage long sys_fchmod(unsigned int err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_putf; + + if (!gr_acl_handle_fchmod(dentry, file->f_vfsmnt, mode)) { + err = -EACCES; + goto out_putf; + } + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; + + if (gr_handle_chroot_chmod(dentry, file->f_vfsmnt, mode)) { + err = -EPERM; + mutex_unlock(&inode->i_mutex); + goto out_putf; + } + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); @@ -682,9 +732,21 @@ asmlinkage long sys_fchmodat(int dfd, co if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; + if (!gr_acl_handle_chmod(nd.dentry, nd.mnt, mode)) { + error = -EACCES; + goto dput_and_out; + }; + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; + + if (gr_handle_chroot_chmod(nd.dentry, nd.mnt, mode)) { + error = -EACCES; + mutex_unlock(&inode->i_mutex); + goto dput_and_out; + } + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(nd.dentry, &newattrs); @@ -719,6 +781,12 @@ static int chown_common(struct dentry * error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; + + if (!gr_acl_handle_chown(dentry, mnt)) { + error = -EACCES; + goto out; + } + newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; @@ -1002,6 +1070,7 @@ repeat: * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ + gr_learn_resource(current, RLIMIT_NOFILE, fd, 0); if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; diff -urNp linux-2.6.17.11/fs/pipe.c linux-2.6.17.11/fs/pipe.c --- linux-2.6.17.11/fs/pipe.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/pipe.c 2006-09-01 16:20:28.000000000 -0400 @@ -842,7 +842,7 @@ void free_pipe_info(struct inode *inode) inode->i_pipe = NULL; } -static struct vfsmount *pipe_mnt __read_mostly; +struct vfsmount *pipe_mnt __read_mostly; static int pipefs_delete_dentry(struct dentry *dentry) { return 1; diff -urNp linux-2.6.17.11/fs/proc/array.c linux-2.6.17.11/fs/proc/array.c --- linux-2.6.17.11/fs/proc/array.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/array.c 2006-09-01 16:20:28.000000000 -0400 @@ -306,6 +306,21 @@ static inline char *task_cap(struct task (unsigned)vx_info_mbcap(vxi, p->cap_effective)); } +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) +static inline char *task_pax(struct task_struct *p, char *buffer) +{ + if (p->mm) + return buffer + sprintf(buffer, "PaX:\t%c%c%c%c%c\n", + p->mm->pax_flags & MF_PAX_PAGEEXEC ? 'P' : 'p', + p->mm->pax_flags & MF_PAX_EMUTRAMP ? 'E' : 'e', + p->mm->pax_flags & MF_PAX_MPROTECT ? 'M' : 'm', + p->mm->pax_flags & MF_PAX_RANDMMAP ? 'R' : 'r', + p->mm->pax_flags & MF_PAX_SEGMEXEC ? 'S' : 's'); + else + return buffer + sprintf(buffer, "PaX:\t-----\n"); +} +#endif + int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; @@ -370,9 +385,20 @@ int proc_pid_status(struct task_struct * #if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + buffer = task_pax(task, buffer); +#endif + return buffer - orig; } +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP +#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ + (_mm->pax_flags & MF_PAX_RANDMMAP || \ + _mm->pax_flags & MF_PAX_SEGMEXEC)) +#endif + static int do_task_stat(struct task_struct *task, char * buffer, int whole) { unsigned long vsize, eip, esp, wchan = ~0UL; @@ -461,6 +487,19 @@ static int do_task_stat(struct task_stru stime = task->stime; } +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + if (PAX_RAND_FLAGS(mm)) { + eip = 0; + esp = 0; + wchan = 0; + } +#endif +#ifdef CONFIG_GRKERNSEC_HIDESYM + wchan = 0; + eip =0; + esp =0; +#endif + /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ priority = task_prio(task); @@ -511,9 +550,15 @@ static int do_task_stat(struct task_stru vsize, mm ? get_mm_rss(mm) : 0, rsslim, +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->start_code : 0), + PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->end_code : 0), + PAX_RAND_FLAGS(mm) ? 0 : (mm ? mm->start_stack : 0), +#else mm ? mm->start_code : 0, mm ? mm->end_code : 0, mm ? mm->start_stack : 0, +#endif esp, eip, /* The signal information here is obsolete. @@ -559,3 +604,14 @@ int proc_pid_statm(struct task_struct *t return sprintf(buffer,"%d %d %d %d %d %d %d\n", size, resident, shared, text, lib, data, 0); } + +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR +int proc_pid_ipaddr(struct task_struct *task, char * buffer) +{ + int len; + + len = sprintf(buffer, "%u.%u.%u.%u\n", NIPQUAD(task->signal->curr_ip)); + return len; +} +#endif + diff -urNp linux-2.6.17.11/fs/proc/base.c linux-2.6.17.11/fs/proc/base.c --- linux-2.6.17.11/fs/proc/base.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/base.c 2006-09-01 16:20:28.000000000 -0400 @@ -75,6 +75,7 @@ #include #include #include +#include #include "internal.h" /* @@ -130,6 +131,9 @@ enum pid_directory_inos { #ifdef CONFIG_AUDITSYSCALL PROC_TGID_LOGINUID, #endif +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR + PROC_TGID_IPADDR, +#endif PROC_TGID_OOM_SCORE, PROC_TGID_OOM_ADJUST, PROC_TID_INO, @@ -211,6 +215,9 @@ static struct pid_entry tgid_base_stuff[ E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR), +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR + E(PROC_TGID_IPADDR, "ipaddr", S_IFREG|S_IRUSR), +#endif #ifdef CONFIG_MMU E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), #endif @@ -421,7 +428,7 @@ static int proc_task_root_link(struct in (task->parent == current && \ (task->ptrace & PT_PTRACED) && \ (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ - security_ptrace(current,task) == 0)) + security_ptrace(current,task) == 0 && !gr_handle_proc_ptrace(task))) static int proc_pid_environ(struct task_struct *task, char * buffer) { @@ -607,9 +614,25 @@ static int proc_check_root(struct inode static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) { + int ret = -EACCES; + struct task_struct *task; + if (generic_permission(inode, mask, NULL) != 0) - return -EACCES; - return proc_check_root(inode); + goto out; + + ret = proc_check_root(inode); + if (ret) + goto out; + + task = proc_task(inode); + + if (!task) + goto out; + + ret = gr_acl_handle_procpidmem(task); + +out: + return ret; } static int proc_setattr(struct dentry *dentry, struct iattr *attr) @@ -1423,6 +1446,9 @@ static struct inode *proc_pid_make_inode } /* procfs is xid tagged */ inode->i_tag = (tag_t)vx_task_xid(task); +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; +#endif security_task_to_inode(task, inode); out: @@ -1455,7 +1481,9 @@ static int pid_revalidate(struct dentry if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { inode->i_uid = task->euid; +#ifndef CONFIG_GRKERNSEC_PROC_USERGROUP inode->i_gid = task->egid; +#endif } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1791,6 +1819,12 @@ static struct dentry *proc_pident_lookup inode->i_fop = &proc_info_file_operations; ei->op.proc_read = proc_pid_status; break; +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR + case PROC_TGID_IPADDR: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_ipaddr; + break; +#endif case PROC_TID_STAT: inode->i_fop = &proc_info_file_operations; ei->op.proc_read = proc_tid_stat; @@ -2140,11 +2174,34 @@ struct dentry *proc_pid_lookup(struct in if (!proc_pid_visible(task, tgid)) goto out_drop_task; + if (gr_check_hidden_task(task)) { + put_task_struct(task); + goto out; + } + +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + if (current->uid && (task->uid != current->uid) +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + && !in_group_p(CONFIG_GRKERNSEC_PROC_GID) +#endif + ) { + put_task_struct(task); + goto out; + } +#endif + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); if (!inode) goto out_drop_task; +#ifdef CONFIG_GRKERNSEC_PROC_USER + inode->i_mode = S_IFDIR|S_IRUSR|S_IXUSR; +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + inode->i_mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP; + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; +#else inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; +#endif inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; @@ -2243,6 +2300,9 @@ out: static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) { struct task_struct *p; +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + struct task_struct *tmp = current; +#endif int nr_tgids = 0; index--; @@ -2267,6 +2327,18 @@ static int get_tgid_list(int index, unsi /* check for context visibility */ if (!proc_pid_visible(p, tgid)) continue; + if (gr_pid_is_chrooted(p)) + continue; + if (gr_check_hidden_task(p)) + continue; +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + if (tmp->uid && (p->uid != tmp->uid) +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + && !in_group_p(CONFIG_GRKERNSEC_PROC_GID) +#endif + ) + continue; +#endif if (--index >= 0) continue; tgids[nr_tgids] = vx_map_tgid(tgid); diff -urNp linux-2.6.17.11/fs/proc/inode.c linux-2.6.17.11/fs/proc/inode.c --- linux-2.6.17.11/fs/proc/inode.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/inode.c 2006-09-01 16:20:28.000000000 -0400 @@ -169,7 +169,11 @@ struct inode *proc_get_inode(struct supe if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; +#else inode->i_gid = de->gid; +#endif } if (de->vx_flags) PROC_I(inode)->vx_flags = de->vx_flags; diff -urNp linux-2.6.17.11/fs/proc/internal.h linux-2.6.17.11/fs/proc/internal.h --- linux-2.6.17.11/fs/proc/internal.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/internal.h 2006-09-01 16:20:28.000000000 -0400 @@ -36,6 +36,9 @@ extern int proc_tid_stat(struct task_str extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR +extern int proc_pid_ipaddr(struct task_struct*,char*); +#endif void free_proc_entry(struct proc_dir_entry *de); diff -urNp linux-2.6.17.11/fs/proc/proc_misc.c linux-2.6.17.11/fs/proc/proc_misc.c --- linux-2.6.17.11/fs/proc/proc_misc.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/proc_misc.c 2006-09-01 16:20:28.000000000 -0400 @@ -651,6 +651,8 @@ void create_seq_entry(char *name, mode_t void __init proc_misc_init(void) { struct proc_dir_entry *entry; + int gr_mode = 0; + static struct { char *name; int (*read_proc)(char*,char**,off_t,int,int*,void*); @@ -666,7 +668,9 @@ void __init proc_misc_init(void) {"stram", stram_read_proc}, #endif {"filesystems", filesystems_read_proc}, +#ifndef CONFIG_GRKERNSEC_PROC_ADD {"cmdline", cmdline_read_proc}, +#endif {"locks", locks_read_proc}, {"execdomains", execdomains_read_proc}, {NULL,} @@ -674,19 +678,36 @@ void __init proc_misc_init(void) for (p = simple_ones; p->name; p++) create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); +#ifdef CONFIG_GRKERNSEC_PROC_USER + gr_mode = S_IRUSR; +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + gr_mode = S_IRUSR | S_IRGRP; +#endif +#ifdef CONFIG_GRKERNSEC_PROC_ADD + create_proc_read_entry("cmdline", gr_mode, NULL, &cmdline_read_proc, NULL); +#endif + proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); if (entry) entry->proc_fops = &proc_kmsg_operations; +#ifdef CONFIG_GRKERNSEC_PROC_ADD + create_seq_entry("devices", gr_mode, &proc_devinfo_operations); +#else create_seq_entry("devices", 0, &proc_devinfo_operations); +#endif create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); create_seq_entry("partitions", 0, &proc_partitions_operations); create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); #ifdef CONFIG_SLAB +#ifdef CONFIG_GRKERNSEC_PROC_ADD + create_seq_entry("slabinfo",S_IWUSR|gr_mode,&proc_slabinfo_operations); +#else create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); +#endif #ifdef CONFIG_DEBUG_SLAB_LEAK create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); #endif @@ -701,7 +722,7 @@ void __init proc_misc_init(void) #ifdef CONFIG_SCHEDSTATS create_seq_entry("schedstat", 0, &proc_schedstat_operations); #endif -#ifdef CONFIG_PROC_KCORE +#if defined(CONFIG_PROC_KCORE) && !defined(CONFIG_GRKERNSEC_PROC_ADD) proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { proc_root_kcore->proc_fops = &proc_kcore_operations; diff -urNp linux-2.6.17.11/fs/proc/root.c linux-2.6.17.11/fs/proc/root.c --- linux-2.6.17.11/fs/proc/root.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/root.c 2006-09-01 16:20:28.000000000 -0400 @@ -53,7 +53,13 @@ void __init proc_root_init(void) return; } proc_misc_init(); +#ifdef CONFIG_GRKERNSEC_PROC_USER + proc_net = proc_mkdir_mode("net", S_IRUSR | S_IXUSR, NULL); +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + proc_net = proc_mkdir_mode("net", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); +#else proc_net = proc_mkdir("net", NULL); +#endif proc_net_stat = proc_mkdir("net/stat", NULL); #ifdef CONFIG_SYSVIPC @@ -77,7 +83,15 @@ void __init proc_root_init(void) #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + proc_bus = proc_mkdir_mode("bus", S_IRUSR | S_IXUSR, NULL); +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + proc_bus = proc_mkdir_mode("bus", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); +#endif +#else proc_bus = proc_mkdir("bus", NULL); +#endif proc_vx_init(); } diff -urNp linux-2.6.17.11/fs/proc/task_mmu.c linux-2.6.17.11/fs/proc/task_mmu.c --- linux-2.6.17.11/fs/proc/task_mmu.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/proc/task_mmu.c 2006-09-01 16:20:28.000000000 -0400 @@ -43,15 +43,27 @@ char *task_mem(struct mm_struct *mm, cha "VmStk:\t%8lu kB\n" "VmExe:\t%8lu kB\n" "VmLib:\t%8lu kB\n" - "VmPTE:\t%8lu kB\n", - hiwater_vm << (PAGE_SHIFT-10), + "VmPTE:\t%8lu kB\n" + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + "CsBase:\t%8lx\nCsLim:\t%8lx\n" +#endif + + ,hiwater_vm << (PAGE_SHIFT-10), (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); + (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10 + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + , mm->context.user_cs_base, mm->context.user_cs_limit +#endif + + ); + return buffer; } @@ -118,6 +130,12 @@ struct mem_size_stats unsigned long private_dirty; }; +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP +#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ + (_mm->pax_flags & MF_PAX_RANDMMAP || \ + _mm->pax_flags & MF_PAX_SEGMEXEC)) +#endif + static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct task_struct *task = m->private; @@ -136,13 +154,30 @@ static int show_map_internal(struct seq_ } seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_start, + PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_end, +#else vma->vm_start, vma->vm_end, +#endif + +#if 0 + flags & VM_MAYREAD ? flags & VM_READ ? 'R' : '+' : flags & VM_READ ? 'r' : '-', + flags & VM_MAYWRITE ? flags & VM_WRITE ? 'W' : '+' : flags & VM_WRITE ? 'w' : '-', + flags & VM_MAYEXEC ? flags & VM_EXEC ? 'X' : '+' : flags & VM_EXEC ? 'x' : '-', +#else flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', +#endif + flags & VM_MAYSHARE ? 's' : 'p', +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_pgoff << PAGE_SHIFT, +#else vma->vm_pgoff << PAGE_SHIFT, +#endif MAJOR(dev), MINOR(dev), ino, &len); /* @@ -154,13 +189,13 @@ static int show_map_internal(struct seq_ seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); } else { if (mm) { - if (vma->vm_start <= mm->start_brk && - vma->vm_end >= mm->brk) { + if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { pad_len_spaces(m, len); seq_puts(m, "[heap]"); } else { - if (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack) { + if ((vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP)) || + (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack)) { pad_len_spaces(m, len); seq_puts(m, "[stack]"); @@ -173,7 +208,25 @@ static int show_map_internal(struct seq_ } seq_putc(m, '\n'); - if (mss) + + if (mss) { +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + if (PAX_RAND_FLAGS(mm)) + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n", + 0UL, + 0UL, + 0UL, + 0UL, + 0UL, + 0UL); + else +#endif seq_printf(m, "Size: %8lu kB\n" "Rss: %8lu kB\n" @@ -187,6 +240,7 @@ static int show_map_internal(struct seq_ mss->shared_dirty >> 10, mss->private_clean >> 10, mss->private_dirty >> 10); + } if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; diff -urNp linux-2.6.17.11/fs/readdir.c linux-2.6.17.11/fs/readdir.c --- linux-2.6.17.11/fs/readdir.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/readdir.c 2006-09-01 16:20:28.000000000 -0400 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include @@ -65,6 +67,7 @@ struct old_linux_dirent { struct readdir_callback { struct old_linux_dirent __user * dirent; + struct file * file; int result; }; @@ -76,6 +79,10 @@ static int fillonedir(void * __buf, cons if (buf->result) return -EINVAL; + + if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) + return 0; + buf->result++; dirent = buf->dirent; if (!access_ok(VERIFY_WRITE, dirent, @@ -107,6 +114,7 @@ asmlinkage long old_readdir(unsigned int buf.result = 0; buf.dirent = dirent; + buf.file = file; error = vfs_readdir(file, fillonedir, &buf); if (error >= 0) @@ -133,6 +141,7 @@ struct linux_dirent { struct getdents_callback { struct linux_dirent __user * current_dir; struct linux_dirent __user * previous; + struct file * file; int count; int error; }; @@ -147,6 +156,10 @@ static int filldir(void * __buf, const c buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; + + if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) + return 0; + dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) @@ -191,6 +204,7 @@ asmlinkage long sys_getdents(unsigned in buf.current_dir = dirent; buf.previous = NULL; + buf.file = file; buf.count = count; buf.error = 0; @@ -217,6 +231,7 @@ out: struct getdents_callback64 { struct linux_dirent64 __user * current_dir; struct linux_dirent64 __user * previous; + struct file * file; int count; int error; }; @@ -231,6 +246,10 @@ static int filldir64(void * __buf, const buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; + + if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) + return 0; + dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) @@ -277,6 +296,7 @@ asmlinkage long sys_getdents64(unsigned buf.current_dir = dirent; buf.previous = NULL; + buf.file = file; buf.count = count; buf.error = 0; diff -urNp linux-2.6.17.11/fs/xfs/linux-2.6/xfs_file.c linux-2.6.17.11/fs/xfs/linux-2.6/xfs_file.c --- linux-2.6.17.11/fs/xfs/linux-2.6/xfs_file.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/fs/xfs/linux-2.6/xfs_file.c 2006-09-01 16:20:28.000000000 -0400 @@ -461,6 +461,11 @@ xfs_file_mmap( vattr_t vattr; int error; +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if ((vma->vm_mm->pax_flags & MF_PAX_PAGEEXEC) && !(vma->vm_flags & VM_EXEC)) + vma->vm_page_prot = __pgprot(pte_val(pte_exprotect(__pte(pgprot_val(vma->vm_page_prot))))); +#endif + vma->vm_ops = &xfs_file_vm_ops; #ifdef CONFIG_XFS_DMAPI diff -urNp linux-2.6.17.11/grsecurity/gracl_alloc.c linux-2.6.17.11/grsecurity/gracl_alloc.c --- linux-2.6.17.11/grsecurity/gracl_alloc.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_alloc.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include + +static unsigned long alloc_stack_next = 1; +static unsigned long alloc_stack_size = 1; +static void **alloc_stack; + +static __inline__ int +alloc_pop(void) +{ + if (alloc_stack_next == 1) + return 0; + + kfree(alloc_stack[alloc_stack_next - 2]); + + alloc_stack_next--; + + return 1; +} + +static __inline__ void +alloc_push(void *buf) +{ + if (alloc_stack_next >= alloc_stack_size) + BUG(); + + alloc_stack[alloc_stack_next - 1] = buf; + + alloc_stack_next++; + + return; +} + +void * +acl_alloc(unsigned long len) +{ + void *ret; + + if (len > PAGE_SIZE) + BUG(); + + ret = kmalloc(len, GFP_KERNEL); + + if (ret) + alloc_push(ret); + + return ret; +} + +void +acl_free_all(void) +{ + if (gr_acl_is_enabled() || !alloc_stack) + return; + + while (alloc_pop()) ; + + if (alloc_stack) { + if ((alloc_stack_size * sizeof (void *)) <= PAGE_SIZE) + kfree(alloc_stack); + else + vfree(alloc_stack); + } + + alloc_stack = NULL; + alloc_stack_size = 1; + alloc_stack_next = 1; + + return; +} + +int +acl_alloc_stack_init(unsigned long size) +{ + if ((size * sizeof (void *)) <= PAGE_SIZE) + alloc_stack = + (void **) kmalloc(size * sizeof (void *), GFP_KERNEL); + else + alloc_stack = (void **) vmalloc(size * sizeof (void *)); + + alloc_stack_size = size; + + if (!alloc_stack) + return 0; + else + return 1; +} diff -urNp linux-2.6.17.11/grsecurity/gracl.c linux-2.6.17.11/grsecurity/gracl.c --- linux-2.6.17.11/grsecurity/gracl.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,3547 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct acl_role_db acl_role_set; +static struct name_db name_set; +static struct inodev_db inodev_set; + +/* for keeping track of userspace pointers used for subjects, so we + can share references in the kernel as well +*/ + +static struct dentry *real_root; +static struct vfsmount *real_root_mnt; + +static struct acl_subj_map_db subj_map_set; + +static struct acl_role_label *default_role; + +static u16 acl_sp_role_value; + +extern char *gr_shared_page[4]; +static DECLARE_MUTEX(gr_dev_sem); +rwlock_t gr_inode_lock = RW_LOCK_UNLOCKED; + +struct gr_arg *gr_usermode; + +static unsigned int gr_status = GR_STATUS_INIT; + +extern int chkpw(struct gr_arg *entry, unsigned char *salt, unsigned char *sum); +extern void gr_clear_learn_entries(void); + +#ifdef CONFIG_GRKERNSEC_RESLOG +extern void gr_log_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt); +#endif + +extern char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, + char *buffer, int buflen); + +unsigned char *gr_system_salt; +unsigned char *gr_system_sum; + +static struct sprole_pw **acl_special_roles = NULL; +static __u16 num_sprole_pws = 0; + +static struct acl_role_label *kernel_role = NULL; + +static unsigned int gr_auth_attempts = 0; +static unsigned long gr_auth_expires = 0UL; + +extern struct vfsmount *sock_mnt; +extern struct vfsmount *pipe_mnt; +extern struct vfsmount *shm_mnt; +static struct acl_object_label *fakefs_obj; + +extern int gr_init_uidset(void); +extern void gr_free_uidset(void); +extern void gr_remove_uid(uid_t uid); +extern int gr_find_uid(uid_t uid); + +__inline__ int +gr_acl_is_enabled(void) +{ + return (gr_status & GR_READY); +} + +char gr_roletype_to_char(void) +{ + switch (current->role->roletype & + (GR_ROLE_DEFAULT | GR_ROLE_USER | GR_ROLE_GROUP | + GR_ROLE_SPECIAL)) { + case GR_ROLE_DEFAULT: + return 'D'; + case GR_ROLE_USER: + return 'U'; + case GR_ROLE_GROUP: + return 'G'; + case GR_ROLE_SPECIAL: + return 'S'; + } + + return 'X'; +} + +__inline__ int +gr_acl_tpe_check(void) +{ + if (unlikely(!(gr_status & GR_READY))) + return 0; + if (current->role->roletype & GR_ROLE_TPE) + return 1; + else + return 0; +} + +int +gr_handle_rawio(const struct inode *inode) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS + if (inode && S_ISBLK(inode->i_mode) && + grsec_enable_chroot_caps && proc_is_chrooted(current) && + !capable(CAP_SYS_RAWIO)) + return 1; +#endif + return 0; +} + +static int +gr_streq(const char *a, const char *b, const unsigned int lena, const unsigned int lenb) +{ + int i; + unsigned long *l1; + unsigned long *l2; + unsigned char *c1; + unsigned char *c2; + int num_longs; + + if (likely(lena != lenb)) + return 0; + + l1 = (unsigned long *)a; + l2 = (unsigned long *)b; + + num_longs = lena / sizeof(unsigned long); + + for (i = num_longs; i--; l1++, l2++) { + if (unlikely(*l1 != *l2)) + return 0; + } + + c1 = (unsigned char *) l1; + c2 = (unsigned char *) l2; + + i = lena - (num_longs * sizeof(unsigned long)); + + for (; i--; c1++, c2++) { + if (unlikely(*c1 != *c2)) + return 0; + } + + return 1; +} + +static char * +gen_full_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, char *buf, int buflen) +{ + char *end = buf + buflen; + char *retval; + int namelen = 0; + + *--end = '\0'; + + retval = end - 1; + *retval = '/'; + + if (dentry == root && vfsmnt == rootmnt) + return retval; + if (dentry != vfsmnt->mnt_root && !IS_ROOT(dentry)) { + namelen = strlen(dentry->d_name.name); + buflen -= namelen; + if (buflen < 2) + goto err; + if (dentry->d_parent != root || vfsmnt != rootmnt) + buflen--; + } + + retval = __d_path(dentry->d_parent, vfsmnt, root, rootmnt, buf, buflen); + if (unlikely(IS_ERR(retval))) +err: + retval = strcpy(buf, ""); + else if (namelen != 0) { + end = buf + buflen - 1; // accounts for null termination + if (dentry->d_parent != root || vfsmnt != rootmnt) + *end++ = '/'; // accounted for above with buflen-- + memcpy(end, dentry->d_name.name, namelen); + } + + return retval; +} + +static char * +__d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt, + char *buf, int buflen) +{ + char *res; + + /* we can use real_root, real_root_mnt, because this is only called + by the RBAC system */ + res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, real_root, real_root_mnt, buf, buflen); + + return res; +} + +static char * +d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt, + char *buf, int buflen) +{ + char *res; + struct dentry *root; + struct vfsmount *rootmnt; + + /* we can't use real_root, real_root_mnt, because they belong only to the RBAC system */ + read_lock(&child_reaper->fs->lock); + root = dget(child_reaper->fs->root); + rootmnt = mntget(child_reaper->fs->rootmnt); + read_unlock(&child_reaper->fs->lock); + + spin_lock(&dcache_lock); + res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, root, rootmnt, buf, buflen); + spin_unlock(&dcache_lock); + + dput(root); + mntput(rootmnt); + return res; +} + +static char * +gr_to_filename_rbac(const struct dentry *dentry, const struct vfsmount *mnt) +{ + char *ret; + spin_lock(&dcache_lock); + ret = __d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0],smp_processor_id()), + PAGE_SIZE); + spin_unlock(&dcache_lock); + return ret; +} + +char * +gr_to_filename_nolock(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return __d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0],smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename1(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[1], smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename2(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[2], smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename3(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[3], smp_processor_id()), + PAGE_SIZE); +} + +__inline__ __u32 +to_gr_audit(const __u32 reqmode) +{ + /* masks off auditable permission flags, then shifts them to create + auditing flags, and adds the special case of append auditing if + we're requesting write */ + return (((reqmode & GR_AUDIT_READ) << 10) | ((reqmode & GR_WRITE) ? GR_AUDIT_APPEND : 0)); +} + +struct acl_subject_label * +lookup_subject_map(const struct acl_subject_label *userp) +{ + unsigned int index = shash(userp, subj_map_set.s_size); + struct subject_map *match; + + match = subj_map_set.s_hash[index]; + + while (match && match->user != userp) + match = match->next; + + if (match != NULL) + return match->kernel; + else + return NULL; +} + +static void +insert_subj_map_entry(struct subject_map *subjmap) +{ + unsigned int index = shash(subjmap->user, subj_map_set.s_size); + struct subject_map **curr; + + subjmap->prev = NULL; + + curr = &subj_map_set.s_hash[index]; + if (*curr != NULL) + (*curr)->prev = subjmap; + + subjmap->next = *curr; + *curr = subjmap; + + return; +} + +static struct acl_role_label * +lookup_acl_role_label(const struct task_struct *task, const uid_t uid, + const gid_t gid) +{ + unsigned int index = rhash(uid, GR_ROLE_USER, acl_role_set.r_size); + struct acl_role_label *match; + struct role_allowed_ip *ipp; + unsigned int x; + + match = acl_role_set.r_hash[index]; + + while (match) { + if ((match->roletype & (GR_ROLE_DOMAIN | GR_ROLE_USER)) == (GR_ROLE_DOMAIN | GR_ROLE_USER)) { + for (x = 0; x < match->domain_child_num; x++) { + if (match->domain_children[x] == uid) + goto found; + } + } else if (match->uidgid == uid && match->roletype & GR_ROLE_USER) + break; + match = match->next; + } +found: + if (match == NULL) { + try_group: + index = rhash(gid, GR_ROLE_GROUP, acl_role_set.r_size); + match = acl_role_set.r_hash[index]; + + while (match) { + if ((match->roletype & (GR_ROLE_DOMAIN | GR_ROLE_GROUP)) == (GR_ROLE_DOMAIN | GR_ROLE_GROUP)) { + for (x = 0; x < match->domain_child_num; x++) { + if (match->domain_children[x] == gid) + goto found2; + } + } else if (match->uidgid == gid && match->roletype & GR_ROLE_GROUP) + break; + match = match->next; + } +found2: + if (match == NULL) + match = default_role; + if (match->allowed_ips == NULL) + return match; + else { + for (ipp = match->allowed_ips; ipp; ipp = ipp->next) { + if (likely + ((ntohl(task->signal->curr_ip) & ipp->netmask) == + (ntohl(ipp->addr) & ipp->netmask))) + return match; + } + match = default_role; + } + } else if (match->allowed_ips == NULL) { + return match; + } else { + for (ipp = match->allowed_ips; ipp; ipp = ipp->next) { + if (likely + ((ntohl(task->signal->curr_ip) & ipp->netmask) == + (ntohl(ipp->addr) & ipp->netmask))) + return match; + } + goto try_group; + } + + return match; +} + +struct acl_subject_label * +lookup_acl_subj_label(const ino_t ino, const dev_t dev, + const struct acl_role_label *role) +{ + unsigned int index = fhash(ino, dev, role->subj_hash_size); + struct acl_subject_label *match; + + match = role->subj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + (match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && !(match->mode & GR_DELETED)) + return match; + else + return NULL; +} + +static struct acl_object_label * +lookup_acl_obj_label(const ino_t ino, const dev_t dev, + const struct acl_subject_label *subj) +{ + unsigned int index = fhash(ino, dev, subj->obj_hash_size); + struct acl_object_label *match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + (match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && !(match->mode & GR_DELETED)) + return match; + else + return NULL; +} + +static struct acl_object_label * +lookup_acl_obj_label_create(const ino_t ino, const dev_t dev, + const struct acl_subject_label *subj) +{ + unsigned int index = fhash(ino, dev, subj->obj_hash_size); + struct acl_object_label *match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + !(match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && (match->mode & GR_DELETED)) + return match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + (match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && !(match->mode & GR_DELETED)) + return match; + else + return NULL; +} + +static struct name_entry * +lookup_name_entry(const char *name) +{ + unsigned int len = strlen(name); + unsigned int key = full_name_hash(name, len); + unsigned int index = key % name_set.n_size; + struct name_entry *match; + + match = name_set.n_hash[index]; + + while (match && (match->key != key || !gr_streq(match->name, name, match->len, len))) + match = match->next; + + return match; +} + +static struct inodev_entry * +lookup_inodev_entry(const ino_t ino, const dev_t dev) +{ + unsigned int index = fhash(ino, dev, inodev_set.i_size); + struct inodev_entry *match; + + match = inodev_set.i_hash[index]; + + while (match && (match->nentry->inode != ino || match->nentry->device != dev)) + match = match->next; + + return match; +} + +static void +insert_inodev_entry(struct inodev_entry *entry) +{ + unsigned int index = fhash(entry->nentry->inode, entry->nentry->device, + inodev_set.i_size); + struct inodev_entry **curr; + + entry->prev = NULL; + + curr = &inodev_set.i_hash[index]; + if (*curr != NULL) + (*curr)->prev = entry; + + entry->next = *curr; + *curr = entry; + + return; +} + +static void +__insert_acl_role_label(struct acl_role_label *role, uid_t uidgid) +{ + unsigned int index = + rhash(uidgid, role->roletype & (GR_ROLE_USER | GR_ROLE_GROUP), acl_role_set.r_size); + struct acl_role_label **curr; + + role->prev = NULL; + + curr = &acl_role_set.r_hash[index]; + if (*curr != NULL) + (*curr)->prev = role; + + role->next = *curr; + *curr = role; + + return; +} + +static void +insert_acl_role_label(struct acl_role_label *role) +{ + int i; + + if (role->roletype & GR_ROLE_DOMAIN) { + for (i = 0; i < role->domain_child_num; i++) + __insert_acl_role_label(role, role->domain_children[i]); + } else + __insert_acl_role_label(role, role->uidgid); +} + +static int +insert_name_entry(char *name, const ino_t inode, const dev_t device) +{ + struct name_entry **curr, *nentry; + struct inodev_entry *ientry; + unsigned int len = strlen(name); + unsigned int key = full_name_hash(name, len); + unsigned int index = key % name_set.n_size; + + curr = &name_set.n_hash[index]; + + while (*curr && ((*curr)->key != key || !gr_streq((*curr)->name, name, (*curr)->len, len))) + curr = &((*curr)->next); + + if (*curr != NULL) + return 1; + + nentry = acl_alloc(sizeof (struct name_entry)); + if (nentry == NULL) + return 0; + ientry = acl_alloc(sizeof (struct inodev_entry)); + if (ientry == NULL) + return 0; + ientry->nentry = nentry; + + nentry->key = key; + nentry->name = name; + nentry->inode = inode; + nentry->device = device; + nentry->len = len; + + nentry->prev = NULL; + curr = &name_set.n_hash[index]; + if (*curr != NULL) + (*curr)->prev = nentry; + nentry->next = *curr; + *curr = nentry; + + /* insert us into the table searchable by inode/dev */ + insert_inodev_entry(ientry); + + return 1; +} + +static void +insert_acl_obj_label(struct acl_object_label *obj, + struct acl_subject_label *subj) +{ + unsigned int index = + fhash(obj->inode, obj->device, subj->obj_hash_size); + struct acl_object_label **curr; + + + obj->prev = NULL; + + curr = &subj->obj_hash[index]; + if (*curr != NULL) + (*curr)->prev = obj; + + obj->next = *curr; + *curr = obj; + + return; +} + +static void +insert_acl_subj_label(struct acl_subject_label *obj, + struct acl_role_label *role) +{ + unsigned int index = fhash(obj->inode, obj->device, role->subj_hash_size); + struct acl_subject_label **curr; + + obj->prev = NULL; + + curr = &role->subj_hash[index]; + if (*curr != NULL) + (*curr)->prev = obj; + + obj->next = *curr; + *curr = obj; + + return; +} + +/* allocating chained hash tables, so optimal size is where lambda ~ 1 */ + +static void * +create_table(__u32 * len, int elementsize) +{ + unsigned int table_sizes[] = { + 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, + 32749, 65521, 131071, 262139, 524287, 1048573, 2097143, + 4194301, 8388593, 16777213, 33554393, 67108859, 134217689, + 268435399, 536870909, 1073741789, 2147483647 + }; + void *newtable = NULL; + unsigned int pwr = 0; + + while ((pwr < ((sizeof (table_sizes) / sizeof (table_sizes[0])) - 1)) && + table_sizes[pwr] <= *len) + pwr++; + + if (table_sizes[pwr] <= *len) + return newtable; + + if ((table_sizes[pwr] * elementsize) <= PAGE_SIZE) + newtable = + kmalloc(table_sizes[pwr] * elementsize, GFP_KERNEL); + else + newtable = vmalloc(table_sizes[pwr] * elementsize); + + *len = table_sizes[pwr]; + + return newtable; +} + +static int +init_variables(const struct gr_arg *arg) +{ + unsigned int stacksize; + + subj_map_set.s_size = arg->role_db.num_subjects; + acl_role_set.r_size = arg->role_db.num_roles + arg->role_db.num_domain_children; + name_set.n_size = arg->role_db.num_objects; + inodev_set.i_size = arg->role_db.num_objects; + + if (!subj_map_set.s_size || !acl_role_set.r_size || + !name_set.n_size || !inodev_set.i_size) + return 1; + + if (!gr_init_uidset()) + return 1; + + /* set up the stack that holds allocation info */ + + stacksize = arg->role_db.num_pointers + 5; + + if (!acl_alloc_stack_init(stacksize)) + return 1; + + /* grab reference for the real root dentry and vfsmount */ + read_lock(&child_reaper->fs->lock); + real_root_mnt = mntget(child_reaper->fs->rootmnt); + real_root = dget(child_reaper->fs->root); + read_unlock(&child_reaper->fs->lock); + + fakefs_obj = acl_alloc(sizeof(struct acl_object_label)); + if (fakefs_obj == NULL) + return 1; + fakefs_obj->mode = GR_FIND | GR_READ | GR_WRITE | GR_EXEC; + + subj_map_set.s_hash = + (struct subject_map **) create_table(&subj_map_set.s_size, sizeof(void *)); + acl_role_set.r_hash = + (struct acl_role_label **) create_table(&acl_role_set.r_size, sizeof(void *)); + name_set.n_hash = (struct name_entry **) create_table(&name_set.n_size, sizeof(void *)); + inodev_set.i_hash = + (struct inodev_entry **) create_table(&inodev_set.i_size, sizeof(void *)); + + if (!subj_map_set.s_hash || !acl_role_set.r_hash || + !name_set.n_hash || !inodev_set.i_hash) + return 1; + + memset(subj_map_set.s_hash, 0, + sizeof(struct subject_map *) * subj_map_set.s_size); + memset(acl_role_set.r_hash, 0, + sizeof (struct acl_role_label *) * acl_role_set.r_size); + memset(name_set.n_hash, 0, + sizeof (struct name_entry *) * name_set.n_size); + memset(inodev_set.i_hash, 0, + sizeof (struct inodev_entry *) * inodev_set.i_size); + + return 0; +} + +/* free information not needed after startup + currently contains user->kernel pointer mappings for subjects +*/ + +static void +free_init_variables(void) +{ + __u32 i; + + if (subj_map_set.s_hash) { + for (i = 0; i < subj_map_set.s_size; i++) { + if (subj_map_set.s_hash[i]) { + kfree(subj_map_set.s_hash[i]); + subj_map_set.s_hash[i] = NULL; + } + } + + if ((subj_map_set.s_size * sizeof (struct subject_map *)) <= + PAGE_SIZE) + kfree(subj_map_set.s_hash); + else + vfree(subj_map_set.s_hash); + } + + return; +} + +static void +free_variables(void) +{ + struct acl_subject_label *s; + struct acl_role_label *r; + struct task_struct *task, *task2; + unsigned int i, x; + + gr_clear_learn_entries(); + + read_lock(&tasklist_lock); + do_each_thread(task2, task) { + task->acl_sp_role = 0; + task->acl_role_id = 0; + task->acl = NULL; + task->role = NULL; + } while_each_thread(task2, task); + read_unlock(&tasklist_lock); + + /* release the reference to the real root dentry and vfsmount */ + if (real_root) + dput(real_root); + real_root = NULL; + if (real_root_mnt) + mntput(real_root_mnt); + real_root_mnt = NULL; + + /* free all object hash tables */ + + FOR_EACH_ROLE_START(r, i) + if (r->subj_hash == NULL) + break; + FOR_EACH_SUBJECT_START(r, s, x) + if (s->obj_hash == NULL) + break; + if ((s->obj_hash_size * sizeof (struct acl_object_label *)) <= PAGE_SIZE) + kfree(s->obj_hash); + else + vfree(s->obj_hash); + FOR_EACH_SUBJECT_END(s, x) + FOR_EACH_NESTED_SUBJECT_START(r, s) + if (s->obj_hash == NULL) + break; + if ((s->obj_hash_size * sizeof (struct acl_object_label *)) <= PAGE_SIZE) + kfree(s->obj_hash); + else + vfree(s->obj_hash); + FOR_EACH_NESTED_SUBJECT_END(s) + if ((r->subj_hash_size * sizeof (struct acl_subject_label *)) <= PAGE_SIZE) + kfree(r->subj_hash); + else + vfree(r->subj_hash); + r->subj_hash = NULL; + FOR_EACH_ROLE_END(r,i) + + acl_free_all(); + + if (acl_role_set.r_hash) { + if ((acl_role_set.r_size * sizeof (struct acl_role_label *)) <= + PAGE_SIZE) + kfree(acl_role_set.r_hash); + else + vfree(acl_role_set.r_hash); + } + if (name_set.n_hash) { + if ((name_set.n_size * sizeof (struct name_entry *)) <= + PAGE_SIZE) + kfree(name_set.n_hash); + else + vfree(name_set.n_hash); + } + + if (inodev_set.i_hash) { + if ((inodev_set.i_size * sizeof (struct inodev_entry *)) <= + PAGE_SIZE) + kfree(inodev_set.i_hash); + else + vfree(inodev_set.i_hash); + } + + gr_free_uidset(); + + memset(&name_set, 0, sizeof (struct name_db)); + memset(&inodev_set, 0, sizeof (struct inodev_db)); + memset(&acl_role_set, 0, sizeof (struct acl_role_db)); + memset(&subj_map_set, 0, sizeof (struct acl_subj_map_db)); + + default_role = NULL; + + return; +} + +static __u32 +count_user_objs(struct acl_object_label *userp) +{ + struct acl_object_label o_tmp; + __u32 num = 0; + + while (userp) { + if (copy_from_user(&o_tmp, userp, + sizeof (struct acl_object_label))) + break; + + userp = o_tmp.prev; + num++; + } + + return num; +} + +static struct acl_subject_label * +do_copy_user_subj(struct acl_subject_label *userp, struct acl_role_label *role); + +static int +copy_user_glob(struct acl_object_label *obj) +{ + struct acl_object_label *g_tmp, **guser; + unsigned int len; + char *tmp; + + if (obj->globbed == NULL) + return 0; + + guser = &obj->globbed; + while (*guser) { + g_tmp = (struct acl_object_label *) + acl_alloc(sizeof (struct acl_object_label)); + if (g_tmp == NULL) + return -ENOMEM; + + if (copy_from_user(g_tmp, *guser, + sizeof (struct acl_object_label))) + return -EFAULT; + + len = strnlen_user(g_tmp->filename, PATH_MAX); + + if (!len || len >= PATH_MAX) + return -EINVAL; + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(tmp, g_tmp->filename, len)) + return -EFAULT; + + g_tmp->filename = tmp; + + *guser = g_tmp; + guser = &(g_tmp->next); + } + + return 0; +} + +static int +copy_user_objs(struct acl_object_label *userp, struct acl_subject_label *subj, + struct acl_role_label *role) +{ + struct acl_object_label *o_tmp; + unsigned int len; + int ret; + char *tmp; + + while (userp) { + if ((o_tmp = (struct acl_object_label *) + acl_alloc(sizeof (struct acl_object_label))) == NULL) + return -ENOMEM; + + if (copy_from_user(o_tmp, userp, + sizeof (struct acl_object_label))) + return -EFAULT; + + userp = o_tmp->prev; + + len = strnlen_user(o_tmp->filename, PATH_MAX); + + if (!len || len >= PATH_MAX) + return -EINVAL; + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(tmp, o_tmp->filename, len)) + return -EFAULT; + + o_tmp->filename = tmp; + + insert_acl_obj_label(o_tmp, subj); + if (!insert_name_entry(o_tmp->filename, o_tmp->inode, + o_tmp->device)) + return -ENOMEM; + + ret = copy_user_glob(o_tmp); + if (ret) + return ret; + + if (o_tmp->nested) { + o_tmp->nested = do_copy_user_subj(o_tmp->nested, role); + if (IS_ERR(o_tmp->nested)) + return PTR_ERR(o_tmp->nested); + + /* insert into nested subject list */ + o_tmp->nested->next = role->hash->first; + role->hash->first = o_tmp->nested; + } + } + + return 0; +} + +static __u32 +count_user_subjs(struct acl_subject_label *userp) +{ + struct acl_subject_label s_tmp; + __u32 num = 0; + + while (userp) { + if (copy_from_user(&s_tmp, userp, + sizeof (struct acl_subject_label))) + break; + + userp = s_tmp.prev; + /* do not count nested subjects against this count, since + they are not included in the hash table, but are + attached to objects. We have already counted + the subjects in userspace for the allocation + stack + */ + if (!(s_tmp.mode & GR_NESTED)) + num++; + } + + return num; +} + +static int +copy_user_allowedips(struct acl_role_label *rolep) +{ + struct role_allowed_ip *ruserip, *rtmp = NULL, *rlast; + + ruserip = rolep->allowed_ips; + + while (ruserip) { + rlast = rtmp; + + if ((rtmp = (struct role_allowed_ip *) + acl_alloc(sizeof (struct role_allowed_ip))) == NULL) + return -ENOMEM; + + if (copy_from_user(rtmp, ruserip, + sizeof (struct role_allowed_ip))) + return -EFAULT; + + ruserip = rtmp->prev; + + if (!rlast) { + rtmp->prev = NULL; + rolep->allowed_ips = rtmp; + } else { + rlast->next = rtmp; + rtmp->prev = rlast; + } + + if (!ruserip) + rtmp->next = NULL; + } + + return 0; +} + +static int +copy_user_transitions(struct acl_role_label *rolep) +{ + struct role_transition *rusertp, *rtmp = NULL, *rlast; + + unsigned int len; + char *tmp; + + rusertp = rolep->transitions; + + while (rusertp) { + rlast = rtmp; + + if ((rtmp = (struct role_transition *) + acl_alloc(sizeof (struct role_transition))) == NULL) + return -ENOMEM; + + if (copy_from_user(rtmp, rusertp, + sizeof (struct role_transition))) + return -EFAULT; + + rusertp = rtmp->prev; + + len = strnlen_user(rtmp->rolename, GR_SPROLE_LEN); + + if (!len || len >= GR_SPROLE_LEN) + return -EINVAL; + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(tmp, rtmp->rolename, len)) + return -EFAULT; + + rtmp->rolename = tmp; + + if (!rlast) { + rtmp->prev = NULL; + rolep->transitions = rtmp; + } else { + rlast->next = rtmp; + rtmp->prev = rlast; + } + + if (!rusertp) + rtmp->next = NULL; + } + + return 0; +} + +static struct acl_subject_label * +do_copy_user_subj(struct acl_subject_label *userp, struct acl_role_label *role) +{ + struct acl_subject_label *s_tmp = NULL, *s_tmp2; + unsigned int len; + char *tmp; + __u32 num_objs; + struct acl_ip_label **i_tmp, *i_utmp2; + struct gr_hash_struct ghash; + struct subject_map *subjmap; + unsigned int i_num; + int err; + + s_tmp = lookup_subject_map(userp); + + /* we've already copied this subject into the kernel, just return + the reference to it, and don't copy it over again + */ + if (s_tmp) + return(s_tmp); + + if ((s_tmp = (struct acl_subject_label *) + acl_alloc(sizeof (struct acl_subject_label))) == NULL) + return ERR_PTR(-ENOMEM); + + subjmap = (struct subject_map *)kmalloc(sizeof (struct subject_map), GFP_KERNEL); + if (subjmap == NULL) + return ERR_PTR(-ENOMEM); + + subjmap->user = userp; + subjmap->kernel = s_tmp; + insert_subj_map_entry(subjmap); + + if (copy_from_user(s_tmp, userp, + sizeof (struct acl_subject_label))) + return ERR_PTR(-EFAULT); + + len = strnlen_user(s_tmp->filename, PATH_MAX); + + if (!len || len >= PATH_MAX) + return ERR_PTR(-EINVAL); + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(tmp, s_tmp->filename, len)) + return ERR_PTR(-EFAULT); + + s_tmp->filename = tmp; + + if (!strcmp(s_tmp->filename, "/")) + role->root_label = s_tmp; + + if (copy_from_user(&ghash, s_tmp->hash, sizeof(struct gr_hash_struct))) + return ERR_PTR(-EFAULT); + + /* copy user and group transition tables */ + + if (s_tmp->user_trans_num) { + uid_t *uidlist; + + uidlist = (uid_t *)acl_alloc(s_tmp->user_trans_num * sizeof(uid_t)); + if (uidlist == NULL) + return ERR_PTR(-ENOMEM); + if (copy_from_user(uidlist, s_tmp->user_transitions, s_tmp->user_trans_num * sizeof(uid_t))) + return ERR_PTR(-EFAULT); + + s_tmp->user_transitions = uidlist; + } + + if (s_tmp->group_trans_num) { + gid_t *gidlist; + + gidlist = (gid_t *)acl_alloc(s_tmp->group_trans_num * sizeof(gid_t)); + if (gidlist == NULL) + return ERR_PTR(-ENOMEM); + if (copy_from_user(gidlist, s_tmp->group_transitions, s_tmp->group_trans_num * sizeof(gid_t))) + return ERR_PTR(-EFAULT); + + s_tmp->group_transitions = gidlist; + } + + /* set up object hash table */ + num_objs = count_user_objs(ghash.first); + + s_tmp->obj_hash_size = num_objs; + s_tmp->obj_hash = + (struct acl_object_label **) + create_table(&(s_tmp->obj_hash_size), sizeof(void *)); + + if (!s_tmp->obj_hash) + return ERR_PTR(-ENOMEM); + + memset(s_tmp->obj_hash, 0, + s_tmp->obj_hash_size * + sizeof (struct acl_object_label *)); + + /* add in objects */ + err = copy_user_objs(ghash.first, s_tmp, role); + + if (err) + return ERR_PTR(err); + + /* set pointer for parent subject */ + if (s_tmp->parent_subject) { + s_tmp2 = do_copy_user_subj(s_tmp->parent_subject, role); + + if (IS_ERR(s_tmp2)) + return s_tmp2; + + s_tmp->parent_subject = s_tmp2; + } + + /* add in ip acls */ + + if (!s_tmp->ip_num) { + s_tmp->ips = NULL; + goto insert; + } + + i_tmp = + (struct acl_ip_label **) acl_alloc(s_tmp->ip_num * + sizeof (struct + acl_ip_label *)); + + if (!i_tmp) + return ERR_PTR(-ENOMEM); + + for (i_num = 0; i_num < s_tmp->ip_num; i_num++) { + *(i_tmp + i_num) = + (struct acl_ip_label *) + acl_alloc(sizeof (struct acl_ip_label)); + if (!*(i_tmp + i_num)) + return ERR_PTR(-ENOMEM); + + if (copy_from_user + (&i_utmp2, s_tmp->ips + i_num, + sizeof (struct acl_ip_label *))) + return ERR_PTR(-EFAULT); + + if (copy_from_user + (*(i_tmp + i_num), i_utmp2, + sizeof (struct acl_ip_label))) + return ERR_PTR(-EFAULT); + + if ((*(i_tmp + i_num))->iface == NULL) + continue; + + len = strnlen_user((*(i_tmp + i_num))->iface, IFNAMSIZ); + if (!len || len >= IFNAMSIZ) + return ERR_PTR(-EINVAL); + tmp = acl_alloc(len); + if (tmp == NULL) + return ERR_PTR(-ENOMEM); + if (copy_from_user(tmp, (*(i_tmp + i_num))->iface, len)) + return ERR_PTR(-EFAULT); + (*(i_tmp + i_num))->iface = tmp; + } + + s_tmp->ips = i_tmp; + +insert: + if (!insert_name_entry(s_tmp->filename, s_tmp->inode, + s_tmp->device)) + return ERR_PTR(-ENOMEM); + + return s_tmp; +} + +static int +copy_user_subjs(struct acl_subject_label *userp, struct acl_role_label *role) +{ + struct acl_subject_label s_pre; + struct acl_subject_label * ret; + int err; + + while (userp) { + if (copy_from_user(&s_pre, userp, + sizeof (struct acl_subject_label))) + return -EFAULT; + + /* do not add nested subjects here, add + while parsing objects + */ + + if (s_pre.mode & GR_NESTED) { + userp = s_pre.prev; + continue; + } + + ret = do_copy_user_subj(userp, role); + + err = PTR_ERR(ret); + if (IS_ERR(ret)) + return err; + + insert_acl_subj_label(ret, role); + + userp = s_pre.prev; + } + + return 0; +} + +static int +copy_user_acl(struct gr_arg *arg) +{ + struct acl_role_label *r_tmp = NULL, **r_utmp, *r_utmp2; + struct sprole_pw *sptmp; + struct gr_hash_struct *ghash; + uid_t *domainlist; + unsigned int r_num; + unsigned int len; + char *tmp; + int err = 0; + __u16 i; + __u32 num_subjs; + + /* we need a default and kernel role */ + if (arg->role_db.num_roles < 2) + return -EINVAL; + + /* copy special role authentication info from userspace */ + + num_sprole_pws = arg->num_sprole_pws; + acl_special_roles = (struct sprole_pw **) acl_alloc(num_sprole_pws * sizeof(struct sprole_pw *)); + + if (!acl_special_roles) { + err = -ENOMEM; + goto cleanup; + } + + for (i = 0; i < num_sprole_pws; i++) { + sptmp = (struct sprole_pw *) acl_alloc(sizeof(struct sprole_pw)); + if (!sptmp) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(sptmp, arg->sprole_pws + i, + sizeof (struct sprole_pw))) { + err = -EFAULT; + goto cleanup; + } + + len = + strnlen_user(sptmp->rolename, GR_SPROLE_LEN); + + if (!len || len >= GR_SPROLE_LEN) { + err = -EINVAL; + goto cleanup; + } + + if ((tmp = (char *) acl_alloc(len)) == NULL) { + err = -ENOMEM; + goto cleanup; + } + + if (copy_from_user(tmp, sptmp->rolename, len)) { + err = -EFAULT; + goto cleanup; + } + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Copying special role %s\n", tmp); +#endif + sptmp->rolename = tmp; + acl_special_roles[i] = sptmp; + } + + r_utmp = (struct acl_role_label **) arg->role_db.r_table; + + for (r_num = 0; r_num < arg->role_db.num_roles; r_num++) { + r_tmp = acl_alloc(sizeof (struct acl_role_label)); + + if (!r_tmp) { + err = -ENOMEM; + goto cleanup; + } + + if (copy_from_user(&r_utmp2, r_utmp + r_num, + sizeof (struct acl_role_label *))) { + err = -EFAULT; + goto cleanup; + } + + if (copy_from_user(r_tmp, r_utmp2, + sizeof (struct acl_role_label))) { + err = -EFAULT; + goto cleanup; + } + + len = strnlen_user(r_tmp->rolename, GR_SPROLE_LEN); + + if (!len || len >= PATH_MAX) { + err = -EINVAL; + goto cleanup; + } + + if ((tmp = (char *) acl_alloc(len)) == NULL) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(tmp, r_tmp->rolename, len)) { + err = -EFAULT; + goto cleanup; + } + r_tmp->rolename = tmp; + + if (!strcmp(r_tmp->rolename, "default") + && (r_tmp->roletype & GR_ROLE_DEFAULT)) { + default_role = r_tmp; + } else if (!strcmp(r_tmp->rolename, ":::kernel:::")) { + kernel_role = r_tmp; + } + + if ((ghash = (struct gr_hash_struct *) acl_alloc(sizeof(struct gr_hash_struct))) == NULL) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(ghash, r_tmp->hash, sizeof(struct gr_hash_struct))) { + err = -EFAULT; + goto cleanup; + } + + r_tmp->hash = ghash; + + num_subjs = count_user_subjs(r_tmp->hash->first); + + r_tmp->subj_hash_size = num_subjs; + r_tmp->subj_hash = + (struct acl_subject_label **) + create_table(&(r_tmp->subj_hash_size), sizeof(void *)); + + if (!r_tmp->subj_hash) { + err = -ENOMEM; + goto cleanup; + } + + err = copy_user_allowedips(r_tmp); + if (err) + goto cleanup; + + /* copy domain info */ + if (r_tmp->domain_children != NULL) { + domainlist = acl_alloc(r_tmp->domain_child_num * sizeof(uid_t)); + if (domainlist == NULL) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(domainlist, r_tmp->domain_children, r_tmp->domain_child_num * sizeof(uid_t))) { + err = -EFAULT; + goto cleanup; + } + r_tmp->domain_children = domainlist; + } + + err = copy_user_transitions(r_tmp); + if (err) + goto cleanup; + + memset(r_tmp->subj_hash, 0, + r_tmp->subj_hash_size * + sizeof (struct acl_subject_label *)); + + err = copy_user_subjs(r_tmp->hash->first, r_tmp); + + if (err) + goto cleanup; + + /* set nested subject list to null */ + r_tmp->hash->first = NULL; + + insert_acl_role_label(r_tmp); + } + + goto return_err; + cleanup: + free_variables(); + return_err: + return err; + +} + +static int +gracl_init(struct gr_arg *args) +{ + int error = 0; + + memcpy(gr_system_salt, args->salt, GR_SALT_LEN); + memcpy(gr_system_sum, args->sum, GR_SHA_LEN); + + if (init_variables(args)) { + gr_log_str(GR_DONT_AUDIT_GOOD, GR_INITF_ACL_MSG, GR_VERSION); + error = -ENOMEM; + free_variables(); + goto out; + } + + error = copy_user_acl(args); + free_init_variables(); + if (error) { + free_variables(); + goto out; + } + + if ((error = gr_set_acls(0))) { + free_variables(); + goto out; + } + + gr_status |= GR_READY; + out: + return error; +} + +/* derived from glibc fnmatch() 0: match, 1: no match*/ + +static int +glob_match(const char *p, const char *n) +{ + char c; + + while ((c = *p++) != '\0') { + switch (c) { + case '?': + if (*n == '\0') + return 1; + else if (*n == '/') + return 1; + break; + case '\\': + if (*n != c) + return 1; + break; + case '*': + for (c = *p++; c == '?' || c == '*'; c = *p++) { + if (*n == '/') + return 1; + else if (c == '?') { + if (*n == '\0') + return 1; + else + ++n; + } + } + if (c == '\0') { + return 0; + } else { + const char *endp; + + if ((endp = strchr(n, '/')) == NULL) + endp = n + strlen(n); + + if (c == '[') { + for (--p; n < endp; ++n) + if (!glob_match(p, n)) + return 0; + } else if (c == '/') { + while (*n != '\0' && *n != '/') + ++n; + if (*n == '/' && !glob_match(p, n + 1)) + return 0; + } else { + for (--p; n < endp; ++n) + if (*n == c && !glob_match(p, n)) + return 0; + } + + return 1; + } + case '[': + { + int not; + char cold; + + if (*n == '\0' || *n == '/') + return 1; + + not = (*p == '!' || *p == '^'); + if (not) + ++p; + + c = *p++; + for (;;) { + unsigned char fn = (unsigned char)*n; + + if (c == '\0') + return 1; + else { + if (c == fn) + goto matched; + cold = c; + c = *p++; + + if (c == '-' && *p != ']') { + unsigned char cend = *p++; + + if (cend == '\0') + return 1; + + if (cold <= fn && fn <= cend) + goto matched; + + c = *p++; + } + } + + if (c == ']') + break; + } + if (!not) + return 1; + break; + matched: + while (c != ']') { + if (c == '\0') + return 1; + + c = *p++; + } + if (not) + return 1; + } + break; + default: + if (c != *n) + return 1; + } + + ++n; + } + + if (*n == '\0') + return 0; + + if (*n == '/') + return 0; + + return 1; +} + +static struct acl_object_label * +chk_glob_label(struct acl_object_label *globbed, + struct dentry *dentry, struct vfsmount *mnt, char **path) +{ + struct acl_object_label *tmp; + + if (*path == NULL) + *path = gr_to_filename_nolock(dentry, mnt); + + tmp = globbed; + + while (tmp) { + if (!glob_match(tmp->filename, *path)) + return tmp; + tmp = tmp->next; + } + + return NULL; +} + +static struct acl_object_label * +__full_lookup(const struct dentry *orig_dentry, const struct vfsmount *orig_mnt, + const ino_t curr_ino, const dev_t curr_dev, + const struct acl_subject_label *subj, char **path) +{ + struct acl_subject_label *tmpsubj; + struct acl_object_label *retval; + struct acl_object_label *retval2; + + tmpsubj = (struct acl_subject_label *) subj; + read_lock(&gr_inode_lock); + do { + retval = lookup_acl_obj_label(curr_ino, curr_dev, tmpsubj); + if (retval) { + if (retval->globbed) { + retval2 = chk_glob_label(retval->globbed, (struct dentry *)orig_dentry, + (struct vfsmount *)orig_mnt, path); + if (retval2) + retval = retval2; + } + break; + } + } while ((tmpsubj = tmpsubj->parent_subject)); + read_unlock(&gr_inode_lock); + + return retval; +} + +static __inline__ struct acl_object_label * +full_lookup(const struct dentry *orig_dentry, const struct vfsmount *orig_mnt, + const struct dentry *curr_dentry, + const struct acl_subject_label *subj, char **path) +{ + return __full_lookup(orig_dentry, orig_mnt, + curr_dentry->d_inode->i_ino, + curr_dentry->d_inode->i_sb->s_dev, subj, path); +} + +static struct acl_object_label * +__chk_obj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_subject_label *subj, char *path) +{ + struct dentry *dentry = (struct dentry *) l_dentry; + struct vfsmount *mnt = (struct vfsmount *) l_mnt; + struct acl_object_label *retval; + + spin_lock(&dcache_lock); + + if (unlikely(mnt == shm_mnt || mnt == pipe_mnt || mnt == sock_mnt)) { + retval = fakefs_obj; + goto out; + } + + for (;;) { + if (dentry == real_root && mnt == real_root_mnt) + break; + + if (dentry == mnt->mnt_root || IS_ROOT(dentry)) { + if (mnt->mnt_parent == mnt) + break; + + retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); + if (retval != NULL) + goto out; + + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + continue; + } + + retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); + if (retval != NULL) + goto out; + + dentry = dentry->d_parent; + } + + retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); + + if (retval == NULL) + retval = full_lookup(l_dentry, l_mnt, real_root, subj, &path); +out: + spin_unlock(&dcache_lock); + return retval; +} + +static __inline__ struct acl_object_label * +chk_obj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_subject_label *subj) +{ + char *path = NULL; + return __chk_obj_label(l_dentry, l_mnt, subj, path); +} + +static __inline__ struct acl_object_label * +chk_obj_create_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_subject_label *subj, char *path) +{ + return __chk_obj_label(l_dentry, l_mnt, subj, path); +} + +static struct acl_subject_label * +chk_subj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_role_label *role) +{ + struct dentry *dentry = (struct dentry *) l_dentry; + struct vfsmount *mnt = (struct vfsmount *) l_mnt; + struct acl_subject_label *retval; + + spin_lock(&dcache_lock); + + for (;;) { + if (dentry == real_root && mnt == real_root_mnt) + break; + if (dentry == mnt->mnt_root || IS_ROOT(dentry)) { + if (mnt->mnt_parent == mnt) + break; + + read_lock(&gr_inode_lock); + retval = + lookup_acl_subj_label(dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + if (retval != NULL) + goto out; + + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + continue; + } + + read_lock(&gr_inode_lock); + retval = lookup_acl_subj_label(dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + if (retval != NULL) + goto out; + + dentry = dentry->d_parent; + } + + read_lock(&gr_inode_lock); + retval = lookup_acl_subj_label(dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + + if (unlikely(retval == NULL)) { + read_lock(&gr_inode_lock); + retval = lookup_acl_subj_label(real_root->d_inode->i_ino, + real_root->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + } +out: + spin_unlock(&dcache_lock); + + return retval; +} + +static void +gr_log_learn(const struct task_struct *task, const struct dentry *dentry, const struct vfsmount *mnt, const __u32 mode) +{ + security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, task->role->roletype, + task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_dentry, + task->exec_file->f_vfsmnt) : task->acl->filename, task->acl->filename, + 1, 1, gr_to_filename(dentry, mnt), (unsigned long) mode, NIPQUAD(task->signal->curr_ip)); + + return; +} + +static void +gr_log_learn_id_change(const struct task_struct *task, const char type, const unsigned int real, + const unsigned int effective, const unsigned int fs) +{ + security_learn(GR_ID_LEARN_MSG, task->role->rolename, task->role->roletype, + task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_dentry, + task->exec_file->f_vfsmnt) : task->acl->filename, task->acl->filename, + type, real, effective, fs, NIPQUAD(task->signal->curr_ip)); + + return; +} + +__u32 +gr_check_link(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const struct dentry * old_dentry, const struct vfsmount * old_mnt) +{ + struct acl_object_label *obj; + __u32 oldmode, newmode; + __u32 needmode; + + if (unlikely(!(gr_status & GR_READY))) + return (GR_CREATE | GR_LINK); + + obj = chk_obj_label(old_dentry, old_mnt, current->acl); + oldmode = obj->mode; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + oldmode |= (GR_CREATE | GR_LINK); + + needmode = GR_CREATE | GR_AUDIT_CREATE | GR_SUPPRESS; + if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) + needmode |= GR_SETID | GR_AUDIT_SETID; + + newmode = + gr_check_create(new_dentry, parent_dentry, parent_mnt, + oldmode | needmode); + + needmode = newmode & (GR_FIND | GR_APPEND | GR_WRITE | GR_EXEC | + GR_SETID | GR_READ | GR_FIND | GR_DELETE | + GR_INHERIT | GR_AUDIT_INHERIT); + + if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID) && !(newmode & GR_SETID)) + goto bad; + + if ((oldmode & needmode) != needmode) + goto bad; + + needmode = oldmode & (GR_NOPTRACE | GR_PTRACERD | GR_INHERIT | GR_AUDITS); + if ((newmode & needmode) != needmode) + goto bad; + + if ((newmode & (GR_CREATE | GR_LINK)) == (GR_CREATE | GR_LINK)) + return newmode; +bad: + needmode = oldmode; + if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) + needmode |= GR_SETID; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) { + gr_log_learn(current, old_dentry, old_mnt, needmode); + return (GR_CREATE | GR_LINK); + } else if (newmode & GR_SUPPRESS) + return GR_SUPPRESS; + else + return 0; +} + +__u32 +gr_search_file(const struct dentry * dentry, const __u32 mode, + const struct vfsmount * mnt) +{ + __u32 retval = mode; + struct acl_subject_label *curracl; + struct acl_object_label *currobj; + + if (unlikely(!(gr_status & GR_READY))) + return (mode & ~GR_AUDITS); + + curracl = current->acl; + + currobj = chk_obj_label(dentry, mnt, curracl); + retval = currobj->mode & mode; + + if (unlikely + ((curracl->mode & (GR_LEARN | GR_INHERITLEARN)) && !(mode & GR_NOPTRACE) + && (retval != (mode & ~(GR_AUDITS | GR_SUPPRESS))))) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + retval = new_mode; + + if (new_mode & GR_EXEC && curracl->mode & GR_INHERITLEARN) + new_mode |= GR_INHERIT; + + if (!(mode & GR_NOLEARN)) + gr_log_learn(current, dentry, mnt, new_mode); + } + + return retval; +} + +__u32 +gr_check_create(const struct dentry * new_dentry, const struct dentry * parent, + const struct vfsmount * mnt, const __u32 mode) +{ + struct name_entry *match; + struct acl_object_label *matchpo; + struct acl_subject_label *curracl; + char *path; + __u32 retval; + + if (unlikely(!(gr_status & GR_READY))) + return (mode & ~GR_AUDITS); + + preempt_disable(); + path = gr_to_filename_rbac(new_dentry, mnt); + match = lookup_name_entry(path); + + if (!match) + goto check_parent; + + curracl = current->acl; + + read_lock(&gr_inode_lock); + matchpo = lookup_acl_obj_label_create(match->inode, match->device, curracl); + read_unlock(&gr_inode_lock); + + if (matchpo) { + if ((matchpo->mode & mode) != + (mode & ~(GR_AUDITS | GR_SUPPRESS)) + && curracl->mode & (GR_LEARN | GR_INHERITLEARN)) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + gr_log_learn(current, new_dentry, mnt, new_mode); + + preempt_enable(); + return new_mode; + } + preempt_enable(); + return (matchpo->mode & mode); + } + + check_parent: + curracl = current->acl; + + matchpo = chk_obj_create_label(parent, mnt, curracl, path); + retval = matchpo->mode & mode; + + if ((retval != (mode & ~(GR_AUDITS | GR_SUPPRESS))) + && (curracl->mode & (GR_LEARN | GR_INHERITLEARN))) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + gr_log_learn(current, new_dentry, mnt, new_mode); + preempt_enable(); + return new_mode; + } + + preempt_enable(); + return retval; +} + +int +gr_check_hidden_task(const struct task_struct *task) +{ + if (unlikely(!(gr_status & GR_READY))) + return 0; + + if (!(task->acl->mode & GR_PROCFIND) && !(current->acl->mode & GR_VIEW)) + return 1; + + return 0; +} + +int +gr_check_protected_task(const struct task_struct *task) +{ + if (unlikely(!(gr_status & GR_READY) || !task)) + return 0; + + if ((task->acl->mode & GR_PROTECTED) && !(current->acl->mode & GR_KILL) && + task->acl != current->acl) + return 1; + + return 0; +} + +void +gr_copy_label(struct task_struct *tsk) +{ + tsk->signal->used_accept = 0; + tsk->acl_sp_role = 0; + tsk->acl_role_id = current->acl_role_id; + tsk->acl = current->acl; + tsk->role = current->role; + tsk->signal->curr_ip = current->signal->curr_ip; + if (current->exec_file) + get_file(current->exec_file); + tsk->exec_file = current->exec_file; + tsk->is_writable = current->is_writable; + if (unlikely(current->signal->used_accept)) + current->signal->curr_ip = 0; + + return; +} + +static void +gr_set_proc_res(struct task_struct *task) +{ + struct acl_subject_label *proc; + unsigned short i; + + proc = task->acl; + + if (proc->mode & (GR_LEARN | GR_INHERITLEARN)) + return; + + for (i = 0; i < (GR_NLIMITS - 1); i++) { + if (!(proc->resmask & (1 << i))) + continue; + + task->signal->rlim[i].rlim_cur = proc->res[i].rlim_cur; + task->signal->rlim[i].rlim_max = proc->res[i].rlim_max; + } + + return; +} + +int +gr_check_user_change(int real, int effective, int fs) +{ + unsigned int i; + __u16 num; + uid_t *uidlist; + int curuid; + int realok = 0; + int effectiveok = 0; + int fsok = 0; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + gr_log_learn_id_change(current, 'u', real, effective, fs); + + num = current->acl->user_trans_num; + uidlist = current->acl->user_transitions; + + if (uidlist == NULL) + return 0; + + if (real == -1) + realok = 1; + if (effective == -1) + effectiveok = 1; + if (fs == -1) + fsok = 1; + + if (current->acl->user_trans_type & GR_ID_ALLOW) { + for (i = 0; i < num; i++) { + curuid = (int)uidlist[i]; + if (real == curuid) + realok = 1; + if (effective == curuid) + effectiveok = 1; + if (fs == curuid) + fsok = 1; + } + } else if (current->acl->user_trans_type & GR_ID_DENY) { + for (i = 0; i < num; i++) { + curuid = (int)uidlist[i]; + if (real == curuid) + break; + if (effective == curuid) + break; + if (fs == curuid) + break; + } + /* not in deny list */ + if (i == num) { + realok = 1; + effectiveok = 1; + fsok = 1; + } + } + + if (realok && effectiveok && fsok) + return 0; + else { + gr_log_int(GR_DONT_AUDIT, GR_USRCHANGE_ACL_MSG, realok ? (effectiveok ? (fsok ? 0 : fs) : effective) : real); + return 1; + } +} + +int +gr_check_group_change(int real, int effective, int fs) +{ + unsigned int i; + __u16 num; + gid_t *gidlist; + int curgid; + int realok = 0; + int effectiveok = 0; + int fsok = 0; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + gr_log_learn_id_change(current, 'g', real, effective, fs); + + num = current->acl->group_trans_num; + gidlist = current->acl->group_transitions; + + if (gidlist == NULL) + return 0; + + if (real == -1) + realok = 1; + if (effective == -1) + effectiveok = 1; + if (fs == -1) + fsok = 1; + + if (current->acl->group_trans_type & GR_ID_ALLOW) { + for (i = 0; i < num; i++) { + curgid = (int)gidlist[i]; + if (real == curgid) + realok = 1; + if (effective == curgid) + effectiveok = 1; + if (fs == curgid) + fsok = 1; + } + } else if (current->acl->group_trans_type & GR_ID_DENY) { + for (i = 0; i < num; i++) { + curgid = (int)gidlist[i]; + if (real == curgid) + break; + if (effective == curgid) + break; + if (fs == curgid) + break; + } + /* not in deny list */ + if (i == num) { + realok = 1; + effectiveok = 1; + fsok = 1; + } + } + + if (realok && effectiveok && fsok) + return 0; + else { + gr_log_int(GR_DONT_AUDIT, GR_GRPCHANGE_ACL_MSG, realok ? (effectiveok ? (fsok ? 0 : fs) : effective) : real); + return 1; + } +} + +void +gr_set_role_label(struct task_struct *task, const uid_t uid, const uid_t gid) +{ + struct acl_role_label *role = task->role; + struct acl_subject_label *subj = NULL; + struct acl_object_label *obj; + struct file *filp; + + if (unlikely(!(gr_status & GR_READY))) + return; + + filp = task->exec_file; + + /* kernel process, we'll give them the kernel role */ + if (unlikely(!filp)) { + task->role = kernel_role; + task->acl = kernel_role->root_label; + return; + } else if (!task->role || !(task->role->roletype & GR_ROLE_SPECIAL)) + role = lookup_acl_role_label(task, uid, gid); + + /* perform subject lookup in possibly new role + we can use this result below in the case where role == task->role + */ + subj = chk_subj_label(filp->f_dentry, filp->f_vfsmnt, role); + + /* if we changed uid/gid, but result in the same role + and are using inheritance, don't lose the inherited subject + if current subject is other than what normal lookup + would result in, we arrived via inheritance, don't + lose subject + */ + if (role != task->role || (!(task->acl->mode & GR_INHERITLEARN) && + (subj == task->acl))) + task->acl = subj; + + task->role = role; + + task->is_writable = 0; + + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, task->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Set role label for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); +#endif + + gr_set_proc_res(task); + + return; +} + +int +gr_set_proc_label(const struct dentry *dentry, const struct vfsmount *mnt) +{ + struct task_struct *task = current; + struct acl_subject_label *newacl; + struct acl_object_label *obj; + __u32 retmode; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + newacl = chk_subj_label(dentry, mnt, task->role); + + task_lock(task); + if (((task->ptrace & PT_PTRACED) && !(task->acl->mode & + GR_POVERRIDE) && (task->acl != newacl) && + !(task->role->roletype & GR_ROLE_GOD) && + !gr_search_file(dentry, GR_PTRACERD, mnt) && + !(task->acl->mode & (GR_LEARN | GR_INHERITLEARN))) || + (atomic_read(&task->fs->count) > 1 || + atomic_read(&task->files->count) > 1 || + atomic_read(&task->sighand->count) > 1)) { + task_unlock(task); + gr_log_fs_generic(GR_DONT_AUDIT, GR_PTRACE_EXEC_ACL_MSG, dentry, mnt); + return -EACCES; + } + task_unlock(task); + + obj = chk_obj_label(dentry, mnt, task->acl); + retmode = obj->mode & (GR_INHERIT | GR_AUDIT_INHERIT); + + if (!(task->acl->mode & GR_INHERITLEARN) && + ((newacl->mode & GR_LEARN) || !(retmode & GR_INHERIT))) { + if (obj->nested) + task->acl = obj->nested; + else + task->acl = newacl; + } else if (retmode & GR_INHERIT && retmode & GR_AUDIT_INHERIT) + gr_log_str_fs(GR_DO_AUDIT, GR_INHERIT_ACL_MSG, task->acl->filename, dentry, mnt); + + task->is_writable = 0; + + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(dentry, mnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + obj = chk_obj_label(dentry, mnt, task->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + + gr_set_proc_res(task); + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Set subject label for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); +#endif + return 0; +} + +static void +do_handle_delete(const ino_t ino, const dev_t dev) +{ + struct acl_object_label *matchpo; + struct acl_subject_label *matchps; + struct acl_subject_label *subj; + struct acl_role_label *role; + unsigned int i, x; + + FOR_EACH_ROLE_START(role, i) + FOR_EACH_SUBJECT_START(role, subj, x) + if ((matchpo = lookup_acl_obj_label(ino, dev, subj)) != NULL) + matchpo->mode |= GR_DELETED; + FOR_EACH_SUBJECT_END(subj,x) + FOR_EACH_NESTED_SUBJECT_START(role, subj) + if (subj->inode == ino && subj->device == dev) + subj->mode |= GR_DELETED; + FOR_EACH_NESTED_SUBJECT_END(subj) + if ((matchps = lookup_acl_subj_label(ino, dev, role)) != NULL) + matchps->mode |= GR_DELETED; + FOR_EACH_ROLE_END(role,i) + + return; +} + +void +gr_handle_delete(const ino_t ino, const dev_t dev) +{ + if (unlikely(!(gr_status & GR_READY))) + return; + + write_lock(&gr_inode_lock); + if (unlikely((unsigned long)lookup_inodev_entry(ino, dev))) + do_handle_delete(ino, dev); + write_unlock(&gr_inode_lock); + + return; +} + +static void +update_acl_obj_label(const ino_t oldinode, const dev_t olddevice, + const ino_t newinode, const dev_t newdevice, + struct acl_subject_label *subj) +{ + unsigned int index = fhash(oldinode, olddevice, subj->obj_hash_size); + struct acl_object_label *match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != oldinode || + match->device != olddevice || + !(match->mode & GR_DELETED))) + match = match->next; + + if (match && (match->inode == oldinode) + && (match->device == olddevice) + && (match->mode & GR_DELETED)) { + if (match->prev == NULL) { + subj->obj_hash[index] = match->next; + if (match->next != NULL) + match->next->prev = NULL; + } else { + match->prev->next = match->next; + if (match->next != NULL) + match->next->prev = match->prev; + } + match->prev = NULL; + match->next = NULL; + match->inode = newinode; + match->device = newdevice; + match->mode &= ~GR_DELETED; + + insert_acl_obj_label(match, subj); + } + + return; +} + +static void +update_acl_subj_label(const ino_t oldinode, const dev_t olddevice, + const ino_t newinode, const dev_t newdevice, + struct acl_role_label *role) +{ + unsigned int index = fhash(oldinode, olddevice, role->subj_hash_size); + struct acl_subject_label *match; + + match = role->subj_hash[index]; + + while (match && (match->inode != oldinode || + match->device != olddevice || + !(match->mode & GR_DELETED))) + match = match->next; + + if (match && (match->inode == oldinode) + && (match->device == olddevice) + && (match->mode & GR_DELETED)) { + if (match->prev == NULL) { + role->subj_hash[index] = match->next; + if (match->next != NULL) + match->next->prev = NULL; + } else { + match->prev->next = match->next; + if (match->next != NULL) + match->next->prev = match->prev; + } + match->prev = NULL; + match->next = NULL; + match->inode = newinode; + match->device = newdevice; + match->mode &= ~GR_DELETED; + + insert_acl_subj_label(match, role); + } + + return; +} + +static void +update_inodev_entry(const ino_t oldinode, const dev_t olddevice, + const ino_t newinode, const dev_t newdevice) +{ + unsigned int index = fhash(oldinode, olddevice, inodev_set.i_size); + struct inodev_entry *match; + + match = inodev_set.i_hash[index]; + + while (match && (match->nentry->inode != oldinode || + match->nentry->device != olddevice)) + match = match->next; + + if (match && (match->nentry->inode == oldinode) + && (match->nentry->device == olddevice)) { + if (match->prev == NULL) { + inodev_set.i_hash[index] = match->next; + if (match->next != NULL) + match->next->prev = NULL; + } else { + match->prev->next = match->next; + if (match->next != NULL) + match->next->prev = match->prev; + } + match->prev = NULL; + match->next = NULL; + match->nentry->inode = newinode; + match->nentry->device = newdevice; + + insert_inodev_entry(match); + } + + return; +} + +static void +do_handle_create(const struct name_entry *matchn, const struct dentry *dentry, + const struct vfsmount *mnt) +{ + struct acl_subject_label *subj; + struct acl_role_label *role; + unsigned int i, x; + + FOR_EACH_ROLE_START(role, i) + update_acl_subj_label(matchn->inode, matchn->device, + dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + + FOR_EACH_NESTED_SUBJECT_START(role, subj) + if ((subj->inode == dentry->d_inode->i_ino) && + (subj->device == dentry->d_inode->i_sb->s_dev)) { + subj->inode = dentry->d_inode->i_ino; + subj->device = dentry->d_inode->i_sb->s_dev; + } + FOR_EACH_NESTED_SUBJECT_END(subj) + FOR_EACH_SUBJECT_START(role, subj, x) + update_acl_obj_label(matchn->inode, matchn->device, + dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, subj); + FOR_EACH_SUBJECT_END(subj,x) + FOR_EACH_ROLE_END(role,i) + + update_inodev_entry(matchn->inode, matchn->device, + dentry->d_inode->i_ino, dentry->d_inode->i_sb->s_dev); + + return; +} + +void +gr_handle_create(const struct dentry *dentry, const struct vfsmount *mnt) +{ + struct name_entry *matchn; + + if (unlikely(!(gr_status & GR_READY))) + return; + + preempt_disable(); + matchn = lookup_name_entry(gr_to_filename_rbac(dentry, mnt)); + + if (unlikely((unsigned long)matchn)) { + write_lock(&gr_inode_lock); + do_handle_create(matchn, dentry, mnt); + write_unlock(&gr_inode_lock); + } + preempt_enable(); + + return; +} + +void +gr_handle_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, + struct vfsmount *mnt, const __u8 replace) +{ + struct name_entry *matchn; + + if (unlikely(!(gr_status & GR_READY))) + return; + + preempt_disable(); + matchn = lookup_name_entry(gr_to_filename_rbac(new_dentry, mnt)); + + /* we wouldn't have to check d_inode if it weren't for + NFS silly-renaming + */ + + write_lock(&gr_inode_lock); + if (unlikely(replace && new_dentry->d_inode)) { + if (unlikely(lookup_inodev_entry(new_dentry->d_inode->i_ino, + new_dentry->d_inode->i_sb->s_dev) && + (old_dentry->d_inode->i_nlink <= 1))) + do_handle_delete(new_dentry->d_inode->i_ino, + new_dentry->d_inode->i_sb->s_dev); + } + + if (unlikely(lookup_inodev_entry(old_dentry->d_inode->i_ino, + old_dentry->d_inode->i_sb->s_dev) && + (old_dentry->d_inode->i_nlink <= 1))) + do_handle_delete(old_dentry->d_inode->i_ino, + old_dentry->d_inode->i_sb->s_dev); + + if (unlikely((unsigned long)matchn)) + do_handle_create(matchn, old_dentry, mnt); + + write_unlock(&gr_inode_lock); + preempt_enable(); + + return; +} + +static int +lookup_special_role_auth(__u16 mode, const char *rolename, unsigned char **salt, + unsigned char **sum) +{ + struct acl_role_label *r; + struct role_allowed_ip *ipp; + struct role_transition *trans; + unsigned int i; + int found = 0; + + /* check transition table */ + + for (trans = current->role->transitions; trans; trans = trans->next) { + if (!strcmp(rolename, trans->rolename)) { + found = 1; + break; + } + } + + if (!found) + return 0; + + /* handle special roles that do not require authentication + and check ip */ + + FOR_EACH_ROLE_START(r, i) + if (!strcmp(rolename, r->rolename) && + (r->roletype & GR_ROLE_SPECIAL)) { + found = 0; + if (r->allowed_ips != NULL) { + for (ipp = r->allowed_ips; ipp; ipp = ipp->next) { + if ((ntohl(current->signal->curr_ip) & ipp->netmask) == + (ntohl(ipp->addr) & ipp->netmask)) + found = 1; + } + } else + found = 2; + if (!found) + return 0; + + if (((mode == SPROLE) && (r->roletype & GR_ROLE_NOPW)) || + ((mode == SPROLEPAM) && (r->roletype & GR_ROLE_PAM))) { + *salt = NULL; + *sum = NULL; + return 1; + } + } + FOR_EACH_ROLE_END(r,i) + + for (i = 0; i < num_sprole_pws; i++) { + if (!strcmp(rolename, acl_special_roles[i]->rolename)) { + *salt = acl_special_roles[i]->salt; + *sum = acl_special_roles[i]->sum; + return 1; + } + } + + return 0; +} + +static void +assign_special_role(char *rolename) +{ + struct acl_object_label *obj; + struct acl_role_label *r; + struct acl_role_label *assigned = NULL; + struct task_struct *tsk; + struct file *filp; + unsigned int i; + + FOR_EACH_ROLE_START(r, i) + if (!strcmp(rolename, r->rolename) && + (r->roletype & GR_ROLE_SPECIAL)) + assigned = r; + FOR_EACH_ROLE_END(r,i) + + if (!assigned) + return; + + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + + tsk = current->parent; + if (tsk == NULL) + goto out_unlock; + + filp = tsk->exec_file; + if (filp == NULL) + goto out_unlock; + + tsk->is_writable = 0; + + tsk->acl_sp_role = 1; + tsk->acl_role_id = ++acl_sp_role_value; + tsk->role = assigned; + tsk->acl = chk_subj_label(filp->f_dentry, filp->f_vfsmnt, tsk->role); + + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + tsk->is_writable = 1; + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, tsk->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + tsk->is_writable = 1; + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Assigning special role:%s subject:%s to process (%s:%d)\n", tsk->role->rolename, tsk->acl->filename, tsk->comm, tsk->pid); +#endif + +out_unlock: + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + return; +} + +int gr_check_secure_terminal(struct task_struct *task) +{ + struct task_struct *p, *p2, *p3; + struct files_struct *files; + struct fdtable *fdt; + struct file *our_file = NULL, *file; + int i; + + if (task->signal->tty == NULL) + return 1; + + files = get_files_struct(task); + if (files != NULL) { + rcu_read_lock(); + fdt = files_fdtable(files); + for (i=0; i < fdt->max_fds; i++) { + file = fcheck_files(files, i); + if (file && (our_file == NULL) && (file->private_data == task->signal->tty)) { + get_file(file); + our_file = file; + } + } + rcu_read_unlock(); + put_files_struct(files); + } + + if (our_file == NULL) + return 1; + + read_lock(&tasklist_lock); + do_each_thread(p2, p) { + files = get_files_struct(p); + if (files == NULL || + (p->signal && p->signal->tty == task->signal->tty)) { + if (files != NULL) + put_files_struct(files); + continue; + } + rcu_read_lock(); + fdt = files_fdtable(files); + for (i=0; i < fdt->max_fds; i++) { + file = fcheck_files(files, i); + if (file && S_ISCHR(file->f_dentry->d_inode->i_mode) && + file->f_dentry->d_inode->i_rdev == our_file->f_dentry->d_inode->i_rdev) { + p3 = task; + while (p3->pid > 0) { + if (p3 == p) + break; + p3 = p3->parent; + } + if (p3 == p) + break; + gr_log_ttysniff(GR_DONT_AUDIT_GOOD, GR_TTYSNIFF_ACL_MSG, p); + gr_handle_alertkill(p); + rcu_read_unlock(); + put_files_struct(files); + read_unlock(&tasklist_lock); + fput(our_file); + return 0; + } + } + rcu_read_unlock(); + put_files_struct(files); + } while_each_thread(p2, p); + read_unlock(&tasklist_lock); + + fput(our_file); + return 1; +} + +ssize_t +write_grsec_handler(struct file *file, const char * buf, size_t count, loff_t *ppos) +{ + struct gr_arg_wrapper uwrap; + unsigned char *sprole_salt; + unsigned char *sprole_sum; + int error = sizeof (struct gr_arg_wrapper); + int error2 = 0; + + down(&gr_dev_sem); + + if ((gr_status & GR_READY) && !(current->acl->mode & GR_KERNELAUTH)) { + error = -EPERM; + goto out; + } + + if (count != sizeof (struct gr_arg_wrapper)) { + gr_log_int_int(GR_DONT_AUDIT_GOOD, GR_DEV_ACL_MSG, (int)count, (int)sizeof(struct gr_arg_wrapper)); + error = -EINVAL; + goto out; + } + + + if (gr_auth_expires && time_after_eq(get_seconds(), gr_auth_expires)) { + gr_auth_expires = 0; + gr_auth_attempts = 0; + } + + if (copy_from_user(&uwrap, buf, sizeof (struct gr_arg_wrapper))) { + error = -EFAULT; + goto out; + } + + if ((uwrap.version != GRSECURITY_VERSION) || (uwrap.size != sizeof(struct gr_arg))) { + error = -EINVAL; + goto out; + } + + if (copy_from_user(gr_usermode, uwrap.arg, sizeof (struct gr_arg))) { + error = -EFAULT; + goto out; + } + + if (gr_usermode->mode != SPROLE && gr_usermode->mode != SPROLEPAM && + gr_auth_attempts >= CONFIG_GRKERNSEC_ACL_MAXTRIES && + time_after(gr_auth_expires, get_seconds())) { + error = -EBUSY; + goto out; + } + + /* if non-root trying to do anything other than use a special role, + do not attempt authentication, do not count towards authentication + locking + */ + + if (gr_usermode->mode != SPROLE && gr_usermode->mode != STATUS && + gr_usermode->mode != UNSPROLE && gr_usermode->mode != SPROLEPAM && + current->uid) { + error = -EPERM; + goto out; + } + + /* ensure pw and special role name are null terminated */ + + gr_usermode->pw[GR_PW_LEN - 1] = '\0'; + gr_usermode->sp_role[GR_SPROLE_LEN - 1] = '\0'; + + /* Okay. + * We have our enough of the argument structure..(we have yet + * to copy_from_user the tables themselves) . Copy the tables + * only if we need them, i.e. for loading operations. */ + + switch (gr_usermode->mode) { + case STATUS: + if (gr_status & GR_READY) { + error = 1; + if (!gr_check_secure_terminal(current)) + error = 3; + } else + error = 2; + goto out; + case SHUTDOWN: + if ((gr_status & GR_READY) + && !(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { + gr_status &= ~GR_READY; + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SHUTS_ACL_MSG); + free_variables(); + memset(gr_usermode, 0, sizeof (struct gr_arg)); + memset(gr_system_salt, 0, GR_SALT_LEN); + memset(gr_system_sum, 0, GR_SHA_LEN); + } else if (gr_status & GR_READY) { + gr_log_noargs(GR_DONT_AUDIT, GR_SHUTF_ACL_MSG); + error = -EPERM; + } else { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SHUTI_ACL_MSG); + error = -EAGAIN; + } + break; + case ENABLE: + if (!(gr_status & GR_READY) && !(error2 = gracl_init(gr_usermode))) + gr_log_str(GR_DONT_AUDIT_GOOD, GR_ENABLE_ACL_MSG, GR_VERSION); + else { + if (gr_status & GR_READY) + error = -EAGAIN; + else + error = error2; + gr_log_str(GR_DONT_AUDIT, GR_ENABLEF_ACL_MSG, GR_VERSION); + } + break; + case RELOAD: + if (!(gr_status & GR_READY)) { + gr_log_str(GR_DONT_AUDIT_GOOD, GR_RELOADI_ACL_MSG, GR_VERSION); + error = -EAGAIN; + } else if (!(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { + lock_kernel(); + gr_status &= ~GR_READY; + free_variables(); + if (!(error2 = gracl_init(gr_usermode))) { + unlock_kernel(); + gr_log_str(GR_DONT_AUDIT_GOOD, GR_RELOAD_ACL_MSG, GR_VERSION); + } else { + unlock_kernel(); + error = error2; + gr_log_str(GR_DONT_AUDIT, GR_RELOADF_ACL_MSG, GR_VERSION); + } + } else { + gr_log_str(GR_DONT_AUDIT, GR_RELOADF_ACL_MSG, GR_VERSION); + error = -EPERM; + } + break; + case SEGVMOD: + if (unlikely(!(gr_status & GR_READY))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SEGVMODI_ACL_MSG); + error = -EAGAIN; + break; + } + + if (!(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SEGVMODS_ACL_MSG); + if (gr_usermode->segv_device && gr_usermode->segv_inode) { + struct acl_subject_label *segvacl; + segvacl = + lookup_acl_subj_label(gr_usermode->segv_inode, + gr_usermode->segv_device, + current->role); + if (segvacl) { + segvacl->crashes = 0; + segvacl->expires = 0; + } + } else if (gr_find_uid(gr_usermode->segv_uid) >= 0) { + gr_remove_uid(gr_usermode->segv_uid); + } + } else { + gr_log_noargs(GR_DONT_AUDIT, GR_SEGVMODF_ACL_MSG); + error = -EPERM; + } + break; + case SPROLE: + case SPROLEPAM: + if (unlikely(!(gr_status & GR_READY))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SPROLEI_ACL_MSG); + error = -EAGAIN; + break; + } + + if (current->role->expires && time_after_eq(get_seconds(), current->role->expires)) { + current->role->expires = 0; + current->role->auth_attempts = 0; + } + + if (current->role->auth_attempts >= CONFIG_GRKERNSEC_ACL_MAXTRIES && + time_after(current->role->expires, get_seconds())) { + error = -EBUSY; + goto out; + } + + if (lookup_special_role_auth + (gr_usermode->mode, gr_usermode->sp_role, &sprole_salt, &sprole_sum) + && ((!sprole_salt && !sprole_sum) + || !(chkpw(gr_usermode, sprole_salt, sprole_sum)))) { + char *p = ""; + assign_special_role(gr_usermode->sp_role); + read_lock(&tasklist_lock); + if (current->parent) + p = current->parent->role->rolename; + read_unlock(&tasklist_lock); + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_SPROLES_ACL_MSG, + p, acl_sp_role_value); + } else { + gr_log_str(GR_DONT_AUDIT, GR_SPROLEF_ACL_MSG, gr_usermode->sp_role); + error = -EPERM; + if(!(current->role->auth_attempts++)) + current->role->expires = get_seconds() + CONFIG_GRKERNSEC_ACL_TIMEOUT; + + goto out; + } + break; + case UNSPROLE: + if (unlikely(!(gr_status & GR_READY))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_UNSPROLEI_ACL_MSG); + error = -EAGAIN; + break; + } + + if (current->role->roletype & GR_ROLE_SPECIAL) { + char *p = ""; + int i = 0; + + read_lock(&tasklist_lock); + if (current->parent) { + p = current->parent->role->rolename; + i = current->parent->acl_role_id; + } + read_unlock(&tasklist_lock); + + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_UNSPROLES_ACL_MSG, p, i); + gr_set_acls(1); + } else { + gr_log_str(GR_DONT_AUDIT, GR_UNSPROLEF_ACL_MSG, current->role->rolename); + error = -EPERM; + goto out; + } + break; + default: + gr_log_int(GR_DONT_AUDIT, GR_INVMODE_ACL_MSG, gr_usermode->mode); + error = -EINVAL; + break; + } + + if (error != -EPERM) + goto out; + + if(!(gr_auth_attempts++)) + gr_auth_expires = get_seconds() + CONFIG_GRKERNSEC_ACL_TIMEOUT; + + out: + up(&gr_dev_sem); + return error; +} + +int +gr_set_acls(const int type) +{ + struct acl_object_label *obj; + struct task_struct *task, *task2; + struct file *filp; + struct acl_role_label *role = current->role; + __u16 acl_role_id = current->acl_role_id; + + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + do_each_thread(task2, task) { + /* check to see if we're called from the exit handler, + if so, only replace ACLs that have inherited the admin + ACL */ + + if (type && (task->role != role || + task->acl_role_id != acl_role_id)) + continue; + + task->acl_role_id = 0; + task->acl_sp_role = 0; + + if ((filp = task->exec_file)) { + task->role = lookup_acl_role_label(task, task->uid, task->gid); + + task->acl = + chk_subj_label(filp->f_dentry, filp->f_vfsmnt, + task->role); + if (task->acl) { + struct acl_subject_label *curr; + curr = task->acl; + + task->is_writable = 0; + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, task->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + + gr_set_proc_res(task); + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "gr_set_acls for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); +#endif + } else { + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_DEFACL_MSG, task->comm, task->pid); + return 1; + } + } else { + // it's a kernel process + task->role = kernel_role; + task->acl = kernel_role->root_label; +#ifdef CONFIG_GRKERNSEC_ACL_HIDEKERN + task->acl->mode &= ~GR_PROCFIND; +#endif + } + } while_each_thread(task2, task); + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + return 0; +} + +void +gr_learn_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt) +{ + struct acl_subject_label *acl; + + if (unlikely((gr_status & GR_READY) && + task->acl && (task->acl->mode & (GR_LEARN | GR_INHERITLEARN)))) + goto skip_reslog; + +#ifdef CONFIG_GRKERNSEC_RESLOG + gr_log_resource(task, res, wanted, gt); +#endif + skip_reslog: + + if (unlikely(!(gr_status & GR_READY) || !wanted)) + return; + + acl = task->acl; + + if (likely(!acl || !(acl->mode & (GR_LEARN | GR_INHERITLEARN)) || + !(acl->resmask & (1 << (unsigned short) res)))) + return; + + if (wanted >= acl->res[res].rlim_cur) { + unsigned long res_add; + + res_add = wanted; + switch (res) { + case RLIMIT_CPU: + res_add += GR_RLIM_CPU_BUMP; + break; + case RLIMIT_FSIZE: + res_add += GR_RLIM_FSIZE_BUMP; + break; + case RLIMIT_DATA: + res_add += GR_RLIM_DATA_BUMP; + break; + case RLIMIT_STACK: + res_add += GR_RLIM_STACK_BUMP; + break; + case RLIMIT_CORE: + res_add += GR_RLIM_CORE_BUMP; + break; + case RLIMIT_RSS: + res_add += GR_RLIM_RSS_BUMP; + break; + case RLIMIT_NPROC: + res_add += GR_RLIM_NPROC_BUMP; + break; + case RLIMIT_NOFILE: + res_add += GR_RLIM_NOFILE_BUMP; + break; + case RLIMIT_MEMLOCK: + res_add += GR_RLIM_MEMLOCK_BUMP; + break; + case RLIMIT_AS: + res_add += GR_RLIM_AS_BUMP; + break; + case RLIMIT_LOCKS: + res_add += GR_RLIM_LOCKS_BUMP; + break; + } + + acl->res[res].rlim_cur = res_add; + + if (wanted > acl->res[res].rlim_max) + acl->res[res].rlim_max = res_add; + + security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, + task->role->roletype, acl->filename, + acl->res[res].rlim_cur, acl->res[res].rlim_max, + "", (unsigned long) res); + } + + return; +} + +#ifdef CONFIG_PAX_HAVE_ACL_FLAGS +void +pax_set_initial_flags(struct linux_binprm *bprm) +{ + struct task_struct *task = current; + struct acl_subject_label *proc; + unsigned long flags; + + if (unlikely(!(gr_status & GR_READY))) + return; + + flags = pax_get_flags(task); + + proc = task->acl; + + if (proc->pax_flags & GR_PAX_DISABLE_PAGEEXEC) + flags &= ~MF_PAX_PAGEEXEC; + if (proc->pax_flags & GR_PAX_DISABLE_SEGMEXEC) + flags &= ~MF_PAX_SEGMEXEC; + if (proc->pax_flags & GR_PAX_DISABLE_RANDMMAP) + flags &= ~MF_PAX_RANDMMAP; + if (proc->pax_flags & GR_PAX_DISABLE_EMUTRAMP) + flags &= ~MF_PAX_EMUTRAMP; + if (proc->pax_flags & GR_PAX_DISABLE_MPROTECT) + flags &= ~MF_PAX_MPROTECT; + + if (proc->pax_flags & GR_PAX_ENABLE_PAGEEXEC) + flags |= MF_PAX_PAGEEXEC; + if (proc->pax_flags & GR_PAX_ENABLE_SEGMEXEC) + flags |= MF_PAX_SEGMEXEC; + if (proc->pax_flags & GR_PAX_ENABLE_RANDMMAP) + flags |= MF_PAX_RANDMMAP; + if (proc->pax_flags & GR_PAX_ENABLE_EMUTRAMP) + flags |= MF_PAX_EMUTRAMP; + if (proc->pax_flags & GR_PAX_ENABLE_MPROTECT) + flags |= MF_PAX_MPROTECT; + + pax_set_flags(task, flags); + + return; +} +#endif + +#ifdef CONFIG_SYSCTL +extern struct proc_dir_entry *proc_sys_root; + +/* the following function is called under the BKL */ + +__u32 +gr_handle_sysctl(const struct ctl_table *table, const void *oldval, + const void *newval) +{ + struct proc_dir_entry *tmp; + struct nameidata nd; + const char *proc_sys = "/proc/sys"; + char *path; + struct acl_object_label *obj; + unsigned short len = 0, pos = 0, depth = 0, i; + __u32 err = 0; + __u32 mode = 0; + + if (unlikely(!(gr_status & GR_READY))) + return 1; + + path = per_cpu_ptr(gr_shared_page[0], smp_processor_id()); + + if (oldval) + mode |= GR_READ; + if (newval) + mode |= GR_WRITE; + + /* convert the requested sysctl entry into a pathname */ + + for (tmp = table->de; tmp != proc_sys_root; tmp = tmp->parent) { + len += strlen(tmp->name); + len++; + depth++; + } + + if ((len + depth + strlen(proc_sys) + 1) > PAGE_SIZE) + return 0; /* deny */ + + memset(path, 0, PAGE_SIZE); + + memcpy(path, proc_sys, strlen(proc_sys)); + + pos += strlen(proc_sys); + + for (; depth > 0; depth--) { + path[pos] = '/'; + pos++; + for (i = 1, tmp = table->de; tmp != proc_sys_root; + tmp = tmp->parent) { + if (depth == i) { + memcpy(path + pos, tmp->name, + strlen(tmp->name)); + pos += strlen(tmp->name); + } + i++; + } + } + + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + + if (err) + goto out; + + obj = chk_obj_label(nd.dentry, nd.mnt, current->acl); + err = obj->mode & (mode | to_gr_audit(mode) | GR_SUPPRESS); + + if (unlikely((current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) && + ((err & mode) != mode))) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + err = new_mode; + gr_log_learn(current, nd.dentry, nd.mnt, new_mode); + } else if ((err & mode) != mode && !(err & GR_SUPPRESS)) { + gr_log_str4(GR_DONT_AUDIT, GR_SYSCTL_ACL_MSG, "denied", + path, (mode & GR_READ) ? " reading" : "", + (mode & GR_WRITE) ? " writing" : ""); + err = 0; + } else if ((err & mode) != mode) { + err = 0; + } else if (((err & mode) == mode) && (err & GR_AUDITS)) { + gr_log_str4(GR_DO_AUDIT, GR_SYSCTL_ACL_MSG, "successful", + path, (mode & GR_READ) ? " reading" : "", + (mode & GR_WRITE) ? " writing" : ""); + } + + path_release(&nd); + + out: + return err; +} +#endif + +int +gr_handle_proc_ptrace(struct task_struct *task) +{ + struct file *filp; + struct task_struct *tmp = task; + struct task_struct *curtemp = current; + __u32 retmode; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + filp = task->exec_file; + + while (tmp->pid > 0) { + if (tmp == curtemp) + break; + tmp = tmp->parent; + } + + if (!filp || (tmp->pid == 0 && !(current->acl->mode & GR_RELAXPTRACE))) { + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + return 1; + } + + retmode = gr_search_file(filp->f_dentry, GR_NOPTRACE, filp->f_vfsmnt); + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + + if (retmode & GR_NOPTRACE) + return 1; + + if (!(current->acl->mode & GR_POVERRIDE) && !(current->role->roletype & GR_ROLE_GOD) + && (current->acl != task->acl || (current->acl != current->role->root_label + && current->pid != task->pid))) + return 1; + + return 0; +} + +int +gr_handle_ptrace(struct task_struct *task, const long request) +{ + struct task_struct *tmp = task; + struct task_struct *curtemp = current; + __u32 retmode; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + read_lock(&tasklist_lock); + while (tmp->pid > 0) { + if (tmp == curtemp) + break; + tmp = tmp->parent; + } + + if (tmp->pid == 0 && !(current->acl->mode & GR_RELAXPTRACE)) { + read_unlock(&tasklist_lock); + gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); + return 1; + } + read_unlock(&tasklist_lock); + + read_lock(&grsec_exec_file_lock); + if (unlikely(!task->exec_file)) { + read_unlock(&grsec_exec_file_lock); + return 0; + } + + retmode = gr_search_file(task->exec_file->f_dentry, GR_PTRACERD | GR_NOPTRACE, task->exec_file->f_vfsmnt); + read_unlock(&grsec_exec_file_lock); + + if (retmode & GR_NOPTRACE) { + gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); + return 1; + } + + if (retmode & GR_PTRACERD) { + switch (request) { + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + case PTRACE_POKEUSR: +#if !defined(CONFIG_PPC32) && !defined(CONFIG_PPC64) && !defined(CONFIG_PARISC) && !defined(CONFIG_ALPHA) && !defined(CONFIG_IA64) + case PTRACE_SETREGS: + case PTRACE_SETFPREGS: +#endif +#ifdef CONFIG_X86 + case PTRACE_SETFPXREGS: +#endif +#ifdef CONFIG_ALTIVEC + case PTRACE_SETVRREGS: +#endif + return 1; + default: + return 0; + } + } else if (!(current->acl->mode & GR_POVERRIDE) && + !(current->role->roletype & GR_ROLE_GOD) && + (current->acl != task->acl)) { + gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); + return 1; + } + + return 0; +} + +static int is_writable_mmap(const struct file *filp) +{ + struct task_struct *task = current; + struct acl_object_label *obj, *obj2; + + if (gr_status & GR_READY && !(task->acl->mode & GR_OVERRIDE) && + !task->is_writable && S_ISREG(filp->f_dentry->d_inode->i_mode)) { + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + obj2 = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, + task->role->root_label); + if (unlikely((obj->mode & GR_WRITE) || (obj2->mode & GR_WRITE))) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_WRITLIB_ACL_MSG, filp->f_dentry, filp->f_vfsmnt); + return 1; + } + } + return 0; +} + +int +gr_acl_handle_mmap(const struct file *file, const unsigned long prot) +{ + __u32 mode; + + if (unlikely(!file || !(prot & PROT_EXEC))) + return 1; + + if (is_writable_mmap(file)) + return 0; + + mode = + gr_search_file(file->f_dentry, + GR_EXEC | GR_AUDIT_EXEC | GR_SUPPRESS, + file->f_vfsmnt); + + if (!gr_tpe_allow(file)) + return 0; + + if (unlikely(!(mode & GR_EXEC) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_MMAP_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 0; + } else if (unlikely(!(mode & GR_EXEC))) { + return 0; + } else if (unlikely(mode & GR_EXEC && mode & GR_AUDIT_EXEC)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_MMAP_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 1; + } + + return 1; +} + +int +gr_acl_handle_mprotect(const struct file *file, const unsigned long prot) +{ + __u32 mode; + + if (unlikely(!file || !(prot & PROT_EXEC))) + return 1; + + if (is_writable_mmap(file)) + return 0; + + mode = + gr_search_file(file->f_dentry, + GR_EXEC | GR_AUDIT_EXEC | GR_SUPPRESS, + file->f_vfsmnt); + + if (!gr_tpe_allow(file)) + return 0; + + if (unlikely(!(mode & GR_EXEC) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_MPROTECT_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 0; + } else if (unlikely(!(mode & GR_EXEC))) { + return 0; + } else if (unlikely(mode & GR_EXEC && mode & GR_AUDIT_EXEC)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_MPROTECT_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 1; + } + + return 1; +} + +void +gr_acl_handle_psacct(struct task_struct *task, const long code) +{ + unsigned long runtime; + unsigned long cputime; + unsigned int wday, cday; + __u8 whr, chr; + __u8 wmin, cmin; + __u8 wsec, csec; + + if (unlikely(!(gr_status & GR_READY) || !task->acl || + !(task->acl->mode & GR_PROCACCT))) + return; + + runtime = xtime.tv_sec - task->start_time.tv_sec; + wday = runtime / (3600 * 24); + runtime -= wday * (3600 * 24); + whr = runtime / 3600; + runtime -= whr * 3600; + wmin = runtime / 60; + runtime -= wmin * 60; + wsec = runtime; + + cputime = (task->utime + task->stime) / HZ; + cday = cputime / (3600 * 24); + cputime -= cday * (3600 * 24); + chr = cputime / 3600; + cputime -= chr * 3600; + cmin = cputime / 60; + cputime -= cmin * 60; + csec = cputime; + + gr_log_procacct(GR_DO_AUDIT, GR_ACL_PROCACCT_MSG, task, wday, whr, wmin, wsec, cday, chr, cmin, csec, code); + + return; +} + +void gr_set_kernel_label(struct task_struct *task) +{ + if (gr_status & GR_READY) { + task->role = kernel_role; + task->acl = kernel_role->root_label; + } + return; +} + +int gr_acl_handle_filldir(const struct file *file, const char *name, const unsigned int namelen, const ino_t ino) +{ + struct task_struct *task = current; + struct dentry *dentry = file->f_dentry; + struct vfsmount *mnt = file->f_vfsmnt; + struct acl_object_label *obj, *tmp; + struct acl_subject_label *subj; + unsigned int bufsize; + int is_not_root; + char *path; + + if (unlikely(!(gr_status & GR_READY))) + return 1; + + if (task->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + return 1; + + subj = task->acl; + do { + obj = lookup_acl_obj_label(ino, dentry->d_inode->i_sb->s_dev, subj); + if (obj != NULL) + return (obj->mode & GR_FIND) ? 1 : 0; + } while ((subj = subj->parent_subject)); + + obj = chk_obj_label(dentry, mnt, task->acl); + if (obj->globbed == NULL) + return (obj->mode & GR_FIND) ? 1 : 0; + + is_not_root = ((obj->filename[0] == '/') && + (obj->filename[1] == '\0')) ? 0 : 1; + bufsize = PAGE_SIZE - namelen - is_not_root; + + /* check bufsize > PAGE_SIZE || bufsize == 0 */ + if (unlikely((bufsize - 1) > (PAGE_SIZE - 1))) + return 1; + + preempt_disable(); + path = d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()), + bufsize); + + bufsize = strlen(path); + + /* if base is "/", don't append an additional slash */ + if (is_not_root) + *(path + bufsize) = '/'; + memcpy(path + bufsize + is_not_root, name, namelen); + *(path + bufsize + namelen + is_not_root) = '\0'; + + tmp = obj->globbed; + while (tmp) { + if (!glob_match(tmp->filename, path)) { + preempt_enable(); + return (tmp->mode & GR_FIND) ? 1 : 0; + } + tmp = tmp->next; + } + preempt_enable(); + return (obj->mode & GR_FIND) ? 1 : 0; +} + +EXPORT_SYMBOL(gr_learn_resource); +EXPORT_SYMBOL(gr_set_kernel_label); +#ifdef CONFIG_SECURITY +EXPORT_SYMBOL(gr_check_user_change); +EXPORT_SYMBOL(gr_check_group_change); +#endif + diff -urNp linux-2.6.17.11/grsecurity/gracl_cap.c linux-2.6.17.11/grsecurity/gracl_cap.c --- linux-2.6.17.11/grsecurity/gracl_cap.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_cap.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include + +static const char *captab_log[] = { + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_DAC_READ_SEARCH", + "CAP_FOWNER", + "CAP_FSETID", + "CAP_KILL", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETPCAP", + "CAP_LINUX_IMMUTABLE", + "CAP_NET_BIND_SERVICE", + "CAP_NET_BROADCAST", + "CAP_NET_ADMIN", + "CAP_NET_RAW", + "CAP_IPC_LOCK", + "CAP_IPC_OWNER", + "CAP_SYS_MODULE", + "CAP_SYS_RAWIO", + "CAP_SYS_CHROOT", + "CAP_SYS_PTRACE", + "CAP_SYS_PACCT", + "CAP_SYS_ADMIN", + "CAP_SYS_BOOT", + "CAP_SYS_NICE", + "CAP_SYS_RESOURCE", + "CAP_SYS_TIME", + "CAP_SYS_TTY_CONFIG", + "CAP_MKNOD", + "CAP_LEASE" +}; + +EXPORT_SYMBOL(gr_task_is_capable); + +int +gr_task_is_capable(struct task_struct *task, const int cap) +{ + struct acl_subject_label *curracl; + __u32 cap_drop = 0, cap_mask = 0; + + if (!gr_acl_is_enabled()) + return 1; + + curracl = task->acl; + + cap_drop = curracl->cap_lower; + cap_mask = curracl->cap_mask; + + while ((curracl = curracl->parent_subject)) { + if (!(cap_mask & (1 << cap)) && (curracl->cap_mask & (1 << cap))) + cap_drop |= curracl->cap_lower & (1 << cap); + cap_mask |= curracl->cap_mask; + } + + if (!cap_raised(cap_drop, cap)) + return 1; + + curracl = task->acl; + + if ((curracl->mode & (GR_LEARN | GR_INHERITLEARN)) + && cap_raised(task->cap_effective, cap)) { + security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, + task->role->roletype, task->uid, + task->gid, task->exec_file ? + gr_to_filename(task->exec_file->f_dentry, + task->exec_file->f_vfsmnt) : curracl->filename, + curracl->filename, 0UL, + 0UL, "", (unsigned long) cap, NIPQUAD(task->signal->curr_ip)); + return 1; + } + + if ((cap >= 0) && (cap < (sizeof(captab_log)/sizeof(captab_log[0]))) && cap_raised(task->cap_effective, cap)) + gr_log_cap(GR_DONT_AUDIT, GR_CAP_ACL_MSG, task, captab_log[cap]); + + return 0; +} + +int +gr_is_capable_nolog(const int cap) +{ + struct acl_subject_label *curracl; + __u32 cap_drop = 0, cap_mask = 0; + + if (!gr_acl_is_enabled()) + return 1; + + curracl = current->acl; + + cap_drop = curracl->cap_lower; + cap_mask = curracl->cap_mask; + + while ((curracl = curracl->parent_subject)) { + cap_drop |= curracl->cap_lower & (cap_mask & ~curracl->cap_mask); + cap_mask |= curracl->cap_mask; + } + + if (!cap_raised(cap_drop, cap)) + return 1; + + return 0; +} + diff -urNp linux-2.6.17.11/grsecurity/gracl_fs.c linux-2.6.17.11/grsecurity/gracl_fs.c --- linux-2.6.17.11/grsecurity/gracl_fs.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_fs.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,423 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__u32 +gr_acl_handle_hidden_file(const struct dentry * dentry, + const struct vfsmount * mnt) +{ + __u32 mode; + + if (unlikely(!dentry->d_inode)) + return GR_FIND; + + mode = + gr_search_file(dentry, GR_FIND | GR_AUDIT_FIND | GR_SUPPRESS, mnt); + + if (unlikely(mode & GR_FIND && mode & GR_AUDIT_FIND)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_HIDDEN_ACL_MSG, dentry, mnt); + return mode; + } else if (unlikely(!(mode & GR_FIND) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_HIDDEN_ACL_MSG, dentry, mnt); + return 0; + } else if (unlikely(!(mode & GR_FIND))) + return 0; + + return GR_FIND; +} + +__u32 +gr_acl_handle_open(const struct dentry * dentry, const struct vfsmount * mnt, + const int fmode) +{ + __u32 reqmode = GR_FIND; + __u32 mode; + + if (unlikely(!dentry->d_inode)) + return reqmode; + + if (unlikely(fmode & O_APPEND)) + reqmode |= GR_APPEND; + else if (unlikely(fmode & FMODE_WRITE)) + reqmode |= GR_WRITE; + if (likely((fmode & FMODE_READ) && !(fmode & O_DIRECTORY))) + reqmode |= GR_READ; + + mode = + gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, + mnt); + + if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { + gr_log_fs_rbac_mode2(GR_DO_AUDIT, GR_OPEN_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return reqmode; + } else + if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) + { + gr_log_fs_rbac_mode2(GR_DONT_AUDIT, GR_OPEN_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return 0; + } else if (unlikely((mode & reqmode) != reqmode)) + return 0; + + return reqmode; +} + +__u32 +gr_acl_handle_creat(const struct dentry * dentry, + const struct dentry * p_dentry, + const struct vfsmount * p_mnt, const int fmode, + const int imode) +{ + __u32 reqmode = GR_WRITE | GR_CREATE; + __u32 mode; + + if (unlikely(fmode & O_APPEND)) + reqmode |= GR_APPEND; + if (unlikely((fmode & FMODE_READ) && !(fmode & O_DIRECTORY))) + reqmode |= GR_READ; + if (unlikely((fmode & O_CREAT) && (imode & (S_ISUID | S_ISGID)))) + reqmode |= GR_SETID; + + mode = + gr_check_create(dentry, p_dentry, p_mnt, + reqmode | to_gr_audit(reqmode) | GR_SUPPRESS); + + if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { + gr_log_fs_rbac_mode2(GR_DO_AUDIT, GR_CREATE_ACL_MSG, dentry, p_mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return reqmode; + } else + if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) + { + gr_log_fs_rbac_mode2(GR_DONT_AUDIT, GR_CREATE_ACL_MSG, dentry, p_mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return 0; + } else if (unlikely((mode & reqmode) != reqmode)) + return 0; + + return reqmode; +} + +__u32 +gr_acl_handle_access(const struct dentry * dentry, const struct vfsmount * mnt, + const int fmode) +{ + __u32 mode, reqmode = GR_FIND; + + if ((fmode & S_IXOTH) && !S_ISDIR(dentry->d_inode->i_mode)) + reqmode |= GR_EXEC; + if (fmode & S_IWOTH) + reqmode |= GR_WRITE; + if (fmode & S_IROTH) + reqmode |= GR_READ; + + mode = + gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, + mnt); + + if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { + gr_log_fs_rbac_mode3(GR_DO_AUDIT, GR_ACCESS_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : "", + reqmode & GR_EXEC ? " executing" : ""); + return reqmode; + } else + if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) + { + gr_log_fs_rbac_mode3(GR_DONT_AUDIT, GR_ACCESS_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : "", + reqmode & GR_EXEC ? " executing" : ""); + return 0; + } else if (unlikely((mode & reqmode) != reqmode)) + return 0; + + return reqmode; +} + +static __u32 generic_fs_handler(const struct dentry *dentry, const struct vfsmount *mnt, __u32 reqmode, const char *fmt) +{ + __u32 mode; + + mode = gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, mnt); + + if (unlikely(((mode & (reqmode)) == (reqmode)) && mode & GR_AUDITS)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, fmt, dentry, mnt); + return mode; + } else if (unlikely((mode & (reqmode)) != (reqmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, fmt, dentry, mnt); + return 0; + } else if (unlikely((mode & (reqmode)) != (reqmode))) + return 0; + + return (reqmode); +} + +__u32 +gr_acl_handle_rmdir(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_DELETE , GR_RMDIR_ACL_MSG); +} + +__u32 +gr_acl_handle_unlink(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_DELETE , GR_UNLINK_ACL_MSG); +} + +__u32 +gr_acl_handle_truncate(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_TRUNCATE_ACL_MSG); +} + +__u32 +gr_acl_handle_utime(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_ATIME_ACL_MSG); +} + +__u32 +gr_acl_handle_fchmod(const struct dentry *dentry, const struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely(dentry->d_inode && S_ISSOCK(dentry->d_inode->i_mode))) + return 1; + + if (unlikely((mode != (mode_t)-1) && (mode & (S_ISUID | S_ISGID)))) { + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_SETID, + GR_FCHMOD_ACL_MSG); + } else { + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_FCHMOD_ACL_MSG); + } +} + +__u32 +gr_acl_handle_chmod(const struct dentry *dentry, const struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely((mode != (mode_t)-1) && (mode & (S_ISUID | S_ISGID)))) { + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_SETID, + GR_CHMOD_ACL_MSG); + } else { + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_CHMOD_ACL_MSG); + } +} + +__u32 +gr_acl_handle_chown(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_CHOWN_ACL_MSG); +} + +__u32 +gr_acl_handle_execve(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_EXEC, GR_EXEC_ACL_MSG); +} + +__u32 +gr_acl_handle_unix(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_READ | GR_WRITE, + GR_UNIXCONNECT_ACL_MSG); +} + +/* hardlinks require at minimum create permission, + any additional privilege required is based on the + privilege of the file being linked to +*/ +__u32 +gr_acl_handle_link(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const struct dentry * old_dentry, + const struct vfsmount * old_mnt, const char *to) +{ + __u32 mode; + __u32 needmode = GR_CREATE | GR_LINK; + __u32 needaudit = GR_AUDIT_CREATE | GR_AUDIT_LINK; + + mode = + gr_check_link(new_dentry, parent_dentry, parent_mnt, old_dentry, + old_mnt); + + if (unlikely(((mode & needmode) == needmode) && (mode & needaudit))) { + gr_log_fs_rbac_str(GR_DO_AUDIT, GR_LINK_ACL_MSG, old_dentry, old_mnt, to); + return mode; + } else if (unlikely(((mode & needmode) != needmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_str(GR_DONT_AUDIT, GR_LINK_ACL_MSG, old_dentry, old_mnt, to); + return 0; + } else if (unlikely((mode & needmode) != needmode)) + return 0; + + return 1; +} + +__u32 +gr_acl_handle_symlink(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, const char *from) +{ + __u32 needmode = GR_WRITE | GR_CREATE; + __u32 mode; + + mode = + gr_check_create(new_dentry, parent_dentry, parent_mnt, + GR_CREATE | GR_AUDIT_CREATE | + GR_WRITE | GR_AUDIT_WRITE | GR_SUPPRESS); + + if (unlikely(mode & GR_WRITE && mode & GR_AUDITS)) { + gr_log_fs_str_rbac(GR_DO_AUDIT, GR_SYMLINK_ACL_MSG, from, new_dentry, parent_mnt); + return mode; + } else if (unlikely(((mode & needmode) != needmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_str_rbac(GR_DONT_AUDIT, GR_SYMLINK_ACL_MSG, from, new_dentry, parent_mnt); + return 0; + } else if (unlikely((mode & needmode) != needmode)) + return 0; + + return (GR_WRITE | GR_CREATE); +} + +static __u32 generic_fs_create_handler(const struct dentry *new_dentry, const struct dentry *parent_dentry, const struct vfsmount *parent_mnt, __u32 reqmode, const char *fmt) +{ + __u32 mode; + + mode = gr_check_create(new_dentry, parent_dentry, parent_mnt, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS); + + if (unlikely(((mode & (reqmode)) == (reqmode)) && mode & GR_AUDITS)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, fmt, new_dentry, parent_mnt); + return mode; + } else if (unlikely((mode & (reqmode)) != (reqmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, fmt, new_dentry, parent_mnt); + return 0; + } else if (unlikely((mode & (reqmode)) != (reqmode))) + return 0; + + return (reqmode); +} + +__u32 +gr_acl_handle_mknod(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const int mode) +{ + __u32 reqmode = GR_WRITE | GR_CREATE; + if (unlikely(mode & (S_ISUID | S_ISGID))) + reqmode |= GR_SETID; + + return generic_fs_create_handler(new_dentry, parent_dentry, parent_mnt, + reqmode, GR_MKNOD_ACL_MSG); +} + +__u32 +gr_acl_handle_mkdir(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt) +{ + return generic_fs_create_handler(new_dentry, parent_dentry, parent_mnt, + GR_WRITE | GR_CREATE, GR_MKDIR_ACL_MSG); +} + +#define RENAME_CHECK_SUCCESS(old, new) \ + (((old & (GR_WRITE | GR_READ)) == (GR_WRITE | GR_READ)) && \ + ((new & (GR_WRITE | GR_READ)) == (GR_WRITE | GR_READ))) + +int +gr_acl_handle_rename(struct dentry *new_dentry, + struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + struct dentry *old_dentry, + struct inode *old_parent_inode, + struct vfsmount *old_mnt, const char *newname) +{ + __u32 comp1, comp2; + int error = 0; + + if (unlikely(!gr_acl_is_enabled())) + return 0; + + if (!new_dentry->d_inode) { + comp1 = gr_check_create(new_dentry, parent_dentry, parent_mnt, + GR_READ | GR_WRITE | GR_CREATE | GR_AUDIT_READ | + GR_AUDIT_WRITE | GR_AUDIT_CREATE | GR_SUPPRESS); + comp2 = gr_search_file(old_dentry, GR_READ | GR_WRITE | + GR_DELETE | GR_AUDIT_DELETE | + GR_AUDIT_READ | GR_AUDIT_WRITE | + GR_SUPPRESS, old_mnt); + } else { + comp1 = gr_search_file(new_dentry, GR_READ | GR_WRITE | + GR_CREATE | GR_DELETE | + GR_AUDIT_CREATE | GR_AUDIT_DELETE | + GR_AUDIT_READ | GR_AUDIT_WRITE | + GR_SUPPRESS, parent_mnt); + comp2 = + gr_search_file(old_dentry, + GR_READ | GR_WRITE | GR_AUDIT_READ | + GR_DELETE | GR_AUDIT_DELETE | + GR_AUDIT_WRITE | GR_SUPPRESS, old_mnt); + } + + if (RENAME_CHECK_SUCCESS(comp1, comp2) && + ((comp1 & GR_AUDITS) || (comp2 & GR_AUDITS))) + gr_log_fs_rbac_str(GR_DO_AUDIT, GR_RENAME_ACL_MSG, old_dentry, old_mnt, newname); + else if (!RENAME_CHECK_SUCCESS(comp1, comp2) && !(comp1 & GR_SUPPRESS) + && !(comp2 & GR_SUPPRESS)) { + gr_log_fs_rbac_str(GR_DONT_AUDIT, GR_RENAME_ACL_MSG, old_dentry, old_mnt, newname); + error = -EACCES; + } else if (unlikely(!RENAME_CHECK_SUCCESS(comp1, comp2))) + error = -EACCES; + + return error; +} + +void +gr_acl_handle_exit(void) +{ + u16 id; + char *rolename; + struct file *exec_file; + + if (unlikely(current->acl_sp_role && gr_acl_is_enabled())) { + id = current->acl_role_id; + rolename = current->role->rolename; + gr_set_acls(1); + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_SPROLEL_ACL_MSG, rolename, id); + } + + write_lock(&grsec_exec_file_lock); + exec_file = current->exec_file; + current->exec_file = NULL; + write_unlock(&grsec_exec_file_lock); + + if (exec_file) + fput(exec_file); +} + +int +gr_acl_handle_procpidmem(const struct task_struct *task) +{ + if (unlikely(!gr_acl_is_enabled())) + return 0; + + if (task->acl->mode & GR_PROTPROCFD) + return -EACCES; + + return 0; +} diff -urNp linux-2.6.17.11/grsecurity/gracl_ip.c linux-2.6.17.11/grsecurity/gracl_ip.c --- linux-2.6.17.11/grsecurity/gracl_ip.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_ip.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GR_BIND 0x01 +#define GR_CONNECT 0x02 +#define GR_INVERT 0x04 + +static const char * gr_protocols[256] = { + "ip", "icmp", "igmp", "ggp", "ipencap", "st", "tcp", "cbt", + "egp", "igp", "bbn-rcc", "nvp", "pup", "argus", "emcon", "xnet", + "chaos", "udp", "mux", "dcn", "hmp", "prm", "xns-idp", "trunk-1", + "trunk-2", "leaf-1", "leaf-2", "rdp", "irtp", "iso-tp4", "netblt", "mfe-nsp", + "merit-inp", "sep", "3pc", "idpr", "xtp", "ddp", "idpr-cmtp", "tp++", + "il", "ipv6", "sdrp", "ipv6-route", "ipv6-frag", "idrp", "rsvp", "gre", + "mhrp", "bna", "ipv6-crypt", "ipv6-auth", "i-nlsp", "swipe", "narp", "mobile", + "tlsp", "skip", "ipv6-icmp", "ipv6-nonxt", "ipv6-opts", "unknown:61", "cftp", "unknown:63", + "sat-expak", "kryptolan", "rvd", "ippc", "unknown:68", "sat-mon", "visa", "ipcv", + "cpnx", "cphb", "wsn", "pvp", "br-sat-mon", "sun-nd", "wb-mon", "wb-expak", + "iso-ip", "vmtp", "secure-vmtp", "vines", "ttp", "nfsnet-igp", "dgp", "tcf", + "eigrp", "ospf", "sprite-rpc", "larp", "mtp", "ax.25", "ipip", "micp", + "scc-sp", "etherip", "encap", "unknown:99", "gmtp", "ifmp", "pnni", "pim", + "aris", "scps", "qnx", "a/n", "ipcomp", "snp", "compaq-peer", "ipx-in-ip", + "vrrp", "pgm", "unknown:114", "l2tp", "ddx", "iatp", "stp", "srp", + "uti", "smp", "sm", "ptp", "isis", "fire", "crtp", "crdup", + "sscopmce", "iplt", "sps", "pipe", "sctp", "fc", "unkown:134", "unknown:135", + "unknown:136", "unknown:137", "unknown:138", "unknown:139", "unknown:140", "unknown:141", "unknown:142", "unknown:143", + "unknown:144", "unknown:145", "unknown:146", "unknown:147", "unknown:148", "unknown:149", "unknown:150", "unknown:151", + "unknown:152", "unknown:153", "unknown:154", "unknown:155", "unknown:156", "unknown:157", "unknown:158", "unknown:159", + "unknown:160", "unknown:161", "unknown:162", "unknown:163", "unknown:164", "unknown:165", "unknown:166", "unknown:167", + "unknown:168", "unknown:169", "unknown:170", "unknown:171", "unknown:172", "unknown:173", "unknown:174", "unknown:175", + "unknown:176", "unknown:177", "unknown:178", "unknown:179", "unknown:180", "unknown:181", "unknown:182", "unknown:183", + "unknown:184", "unknown:185", "unknown:186", "unknown:187", "unknown:188", "unknown:189", "unknown:190", "unknown:191", + "unknown:192", "unknown:193", "unknown:194", "unknown:195", "unknown:196", "unknown:197", "unknown:198", "unknown:199", + "unknown:200", "unknown:201", "unknown:202", "unknown:203", "unknown:204", "unknown:205", "unknown:206", "unknown:207", + "unknown:208", "unknown:209", "unknown:210", "unknown:211", "unknown:212", "unknown:213", "unknown:214", "unknown:215", + "unknown:216", "unknown:217", "unknown:218", "unknown:219", "unknown:220", "unknown:221", "unknown:222", "unknown:223", + "unknown:224", "unknown:225", "unknown:226", "unknown:227", "unknown:228", "unknown:229", "unknown:230", "unknown:231", + "unknown:232", "unknown:233", "unknown:234", "unknown:235", "unknown:236", "unknown:237", "unknown:238", "unknown:239", + "unknown:240", "unknown:241", "unknown:242", "unknown:243", "unknown:244", "unknown:245", "unknown:246", "unknown:247", + "unknown:248", "unknown:249", "unknown:250", "unknown:251", "unknown:252", "unknown:253", "unknown:254", "unknown:255", + }; + +static const char * gr_socktypes[11] = { + "unknown:0", "stream", "dgram", "raw", "rdm", "seqpacket", "unknown:6", + "unknown:7", "unknown:8", "unknown:9", "packet" + }; + +const char * +gr_proto_to_name(unsigned char proto) +{ + return gr_protocols[proto]; +} + +const char * +gr_socktype_to_name(unsigned char type) +{ + return gr_socktypes[type]; +} + +int +gr_search_socket(const int domain, const int type, const int protocol) +{ + struct acl_subject_label *curr; + + if (unlikely(!gr_acl_is_enabled())) + goto exit; + + if ((domain < 0) || (type < 0) || (protocol < 0) || (domain != PF_INET) + || (domain >= NPROTO) || (type >= SOCK_MAX) || (protocol > 255)) + goto exit; // let the kernel handle it + + curr = current->acl; + + if (!curr->ips) + goto exit; + + if ((curr->ip_type & (1 << type)) && + (curr->ip_proto[protocol / 32] & (1 << (protocol % 32)))) + goto exit; + + if (curr->mode & (GR_LEARN | GR_INHERITLEARN)) { + /* we don't place acls on raw sockets , and sometimes + dgram/ip sockets are opened for ioctl and not + bind/connect, so we'll fake a bind learn log */ + if (type == SOCK_RAW || type == SOCK_PACKET) { + __u32 fakeip = 0; + security_learn(GR_IP_LEARN_MSG, current->role->rolename, + current->role->roletype, current->uid, + current->gid, current->exec_file ? + gr_to_filename(current->exec_file->f_dentry, + current->exec_file->f_vfsmnt) : + curr->filename, curr->filename, + NIPQUAD(fakeip), 0, type, + protocol, GR_CONNECT, +NIPQUAD(current->signal->curr_ip)); + } else if ((type == SOCK_DGRAM) && (protocol == IPPROTO_IP)) { + __u32 fakeip = 0; + security_learn(GR_IP_LEARN_MSG, current->role->rolename, + current->role->roletype, current->uid, + current->gid, current->exec_file ? + gr_to_filename(current->exec_file->f_dentry, + current->exec_file->f_vfsmnt) : + curr->filename, curr->filename, + NIPQUAD(fakeip), 0, type, + protocol, GR_BIND, NIPQUAD(current->signal->curr_ip)); + } + /* we'll log when they use connect or bind */ + goto exit; + } + + gr_log_str3(GR_DONT_AUDIT, GR_SOCK_MSG, "inet", + gr_socktype_to_name(type), gr_proto_to_name(protocol)); + + return 0; + exit: + return 1; +} + +int check_ip_policy(struct acl_ip_label *ip, __u32 ip_addr, __u16 ip_port, __u8 protocol, const int mode, const int type, __u32 our_addr, __u32 our_netmask) +{ + if ((ip->mode & mode) && + (ip_port >= ip->low) && + (ip_port <= ip->high) && + ((ntohl(ip_addr) & our_netmask) == + (ntohl(our_addr) & our_netmask)) + && (ip->proto[protocol / 32] & (1 << (protocol % 32))) + && (ip->type & (1 << type))) { + if (ip->mode & GR_INVERT) + return 2; // specifically denied + else + return 1; // allowed + } + + return 0; // not specifically allowed, may continue parsing +} + +static int +gr_search_connectbind(const int mode, const struct sock *sk, + const struct sockaddr_in *addr, const int type) +{ + char iface[IFNAMSIZ] = {0}; + struct acl_subject_label *curr; + struct acl_ip_label *ip; + struct net_device *dev; + struct in_device *idev; + unsigned long i; + int ret; + __u32 ip_addr = 0; + __u32 our_addr; + __u32 our_netmask; + char *p; + __u16 ip_port = 0; + + if (unlikely(!gr_acl_is_enabled() || sk->sk_family != PF_INET)) + return 1; + + curr = current->acl; + + if (!curr->ips) + return 1; + + ip_addr = addr->sin_addr.s_addr; + ip_port = ntohs(addr->sin_port); + + if (curr->mode & (GR_LEARN | GR_INHERITLEARN)) { + security_learn(GR_IP_LEARN_MSG, current->role->rolename, + current->role->roletype, current->uid, + current->gid, current->exec_file ? + gr_to_filename(current->exec_file->f_dentry, + current->exec_file->f_vfsmnt) : + curr->filename, curr->filename, + NIPQUAD(ip_addr), ip_port, type, + sk->sk_protocol, mode, NIPQUAD(current->signal->curr_ip)); + return 1; + } + + for (i = 0; i < curr->ip_num; i++) { + ip = *(curr->ips + i); + if (ip->iface != NULL) { + strncpy(iface, ip->iface, IFNAMSIZ - 1); + p = strchr(iface, ':'); + if (p != NULL) + *p = '\0'; + dev = dev_get_by_name(iface); + if (dev == NULL) + continue; + idev = in_dev_get(dev); + if (idev == NULL) { + dev_put(dev); + continue; + } + rcu_read_lock(); + for_ifa(idev) { + if (!strcmp(ip->iface, ifa->ifa_label)) { + our_addr = ifa->ifa_address; + our_netmask = 0xffffffff; + ret = check_ip_policy(ip, ip_addr, ip_port, sk->sk_protocol, mode, type, our_addr, our_netmask); + if (ret == 1) { + rcu_read_unlock(); + in_dev_put(idev); + dev_put(dev); + return 1; + } else if (ret == 2) { + rcu_read_unlock(); + in_dev_put(idev); + dev_put(dev); + goto denied; + } + } + } endfor_ifa(idev); + rcu_read_unlock(); + in_dev_put(idev); + dev_put(dev); + } else { + our_addr = ip->addr; + our_netmask = ip->netmask; + ret = check_ip_policy(ip, ip_addr, ip_port, sk->sk_protocol, mode, type, our_addr, our_netmask); + if (ret == 1) + return 1; + else if (ret == 2) + goto denied; + } + } + +denied: + if (mode == GR_BIND) + gr_log_int5_str2(GR_DONT_AUDIT, GR_BIND_ACL_MSG, NIPQUAD(ip_addr), ip_port, gr_socktype_to_name(type), gr_proto_to_name(sk->sk_protocol)); + else if (mode == GR_CONNECT) + gr_log_int5_str2(GR_DONT_AUDIT, GR_CONNECT_ACL_MSG, NIPQUAD(ip_addr), ip_port, gr_socktype_to_name(type), gr_proto_to_name(sk->sk_protocol)); + + return 0; +} + +int +gr_search_connect(const struct socket *sock, const struct sockaddr_in *addr) +{ + return gr_search_connectbind(GR_CONNECT, sock->sk, addr, sock->type); +} + +int +gr_search_bind(const struct socket *sock, const struct sockaddr_in *addr) +{ + return gr_search_connectbind(GR_BIND, sock->sk, addr, sock->type); +} + +int gr_search_listen(const struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sockaddr_in addr; + + addr.sin_addr.s_addr = inet_sk(sk)->saddr; + addr.sin_port = inet_sk(sk)->sport; + + return gr_search_connectbind(GR_BIND, sock->sk, &addr, sock->type); +} + +int gr_search_accept(const struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sockaddr_in addr; + + addr.sin_addr.s_addr = inet_sk(sk)->saddr; + addr.sin_port = inet_sk(sk)->sport; + + return gr_search_connectbind(GR_BIND, sock->sk, &addr, sock->type); +} + +int +gr_search_udp_sendmsg(const struct sock *sk, const struct sockaddr_in *addr) +{ + if (addr) + return gr_search_connectbind(GR_CONNECT, sk, addr, SOCK_DGRAM); + else { + struct sockaddr_in sin; + const struct inet_sock *inet = inet_sk(sk); + + sin.sin_addr.s_addr = inet->daddr; + sin.sin_port = inet->dport; + + return gr_search_connectbind(GR_CONNECT, sk, &sin, SOCK_DGRAM); + } +} + +int +gr_search_udp_recvmsg(const struct sock *sk, const struct sk_buff *skb) +{ + struct sockaddr_in sin; + + if (unlikely(skb->len < sizeof (struct udphdr))) + return 1; // skip this packet + + sin.sin_addr.s_addr = skb->nh.iph->saddr; + sin.sin_port = skb->h.uh->source; + + return gr_search_connectbind(GR_CONNECT, sk, &sin, SOCK_DGRAM); +} diff -urNp linux-2.6.17.11/grsecurity/gracl_learn.c linux-2.6.17.11/grsecurity/gracl_learn.c --- linux-2.6.17.11/grsecurity/gracl_learn.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_learn.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,204 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern ssize_t write_grsec_handler(struct file * file, const char __user * buf, + size_t count, loff_t *ppos); +extern int gr_acl_is_enabled(void); + +static DECLARE_WAIT_QUEUE_HEAD(learn_wait); +static int gr_learn_attached; + +/* use a 512k buffer */ +#define LEARN_BUFFER_SIZE (512 * 1024) + +static spinlock_t gr_learn_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_MUTEX(gr_learn_user_sem); + +/* we need to maintain two buffers, so that the kernel context of grlearn + uses a semaphore around the userspace copying, and the other kernel contexts + use a spinlock when copying into the buffer, since they cannot sleep +*/ +static char *learn_buffer; +static char *learn_buffer_user; +static int learn_buffer_len; +static int learn_buffer_user_len; + +static ssize_t +read_learn(struct file *file, char __user * buf, size_t count, loff_t * ppos) +{ + DECLARE_WAITQUEUE(wait, current); + ssize_t retval = 0; + + add_wait_queue(&learn_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + do { + down(&gr_learn_user_sem); + spin_lock(&gr_learn_lock); + if (learn_buffer_len) + break; + spin_unlock(&gr_learn_lock); + up(&gr_learn_user_sem); + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + + schedule(); + } while (1); + + memcpy(learn_buffer_user, learn_buffer, learn_buffer_len); + learn_buffer_user_len = learn_buffer_len; + retval = learn_buffer_len; + learn_buffer_len = 0; + + spin_unlock(&gr_learn_lock); + + if (copy_to_user(buf, learn_buffer_user, learn_buffer_user_len)) + retval = -EFAULT; + + up(&gr_learn_user_sem); +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&learn_wait, &wait); + return retval; +} + +static unsigned int +poll_learn(struct file * file, poll_table * wait) +{ + poll_wait(file, &learn_wait, wait); + + if (learn_buffer_len) + return (POLLIN | POLLRDNORM); + + return 0; +} + +void +gr_clear_learn_entries(void) +{ + char *tmp; + + down(&gr_learn_user_sem); + if (learn_buffer != NULL) { + spin_lock(&gr_learn_lock); + tmp = learn_buffer; + learn_buffer = NULL; + spin_unlock(&gr_learn_lock); + vfree(learn_buffer); + } + if (learn_buffer_user != NULL) { + vfree(learn_buffer_user); + learn_buffer_user = NULL; + } + learn_buffer_len = 0; + up(&gr_learn_user_sem); + + return; +} + +void +gr_add_learn_entry(const char *fmt, ...) +{ + va_list args; + unsigned int len; + + if (!gr_learn_attached) + return; + + spin_lock(&gr_learn_lock); + + /* leave a gap at the end so we know when it's "full" but don't have to + compute the exact length of the string we're trying to append + */ + if (learn_buffer_len > LEARN_BUFFER_SIZE - 16384) { + spin_unlock(&gr_learn_lock); + wake_up_interruptible(&learn_wait); + return; + } + if (learn_buffer == NULL) { + spin_unlock(&gr_learn_lock); + return; + } + + va_start(args, fmt); + len = vsnprintf(learn_buffer + learn_buffer_len, LEARN_BUFFER_SIZE - learn_buffer_len, fmt, args); + va_end(args); + + learn_buffer_len += len + 1; + + spin_unlock(&gr_learn_lock); + wake_up_interruptible(&learn_wait); + + return; +} + +static int +open_learn(struct inode *inode, struct file *file) +{ + if (file->f_mode & FMODE_READ && gr_learn_attached) + return -EBUSY; + if (file->f_mode & FMODE_READ) { + down(&gr_learn_user_sem); + if (learn_buffer == NULL) + learn_buffer = vmalloc(LEARN_BUFFER_SIZE); + if (learn_buffer_user == NULL) + learn_buffer_user = vmalloc(LEARN_BUFFER_SIZE); + if (learn_buffer == NULL) + return -ENOMEM; + if (learn_buffer_user == NULL) + return -ENOMEM; + learn_buffer_len = 0; + learn_buffer_user_len = 0; + gr_learn_attached = 1; + up(&gr_learn_user_sem); + } + return 0; +} + +static int +close_learn(struct inode *inode, struct file *file) +{ + char *tmp; + + if (file->f_mode & FMODE_READ) { + down(&gr_learn_user_sem); + if (learn_buffer != NULL) { + spin_lock(&gr_learn_lock); + tmp = learn_buffer; + learn_buffer = NULL; + spin_unlock(&gr_learn_lock); + vfree(tmp); + } + if (learn_buffer_user != NULL) { + vfree(learn_buffer_user); + learn_buffer_user = NULL; + } + learn_buffer_len = 0; + learn_buffer_user_len = 0; + gr_learn_attached = 0; + up(&gr_learn_user_sem); + } + + return 0; +} + +struct file_operations grsec_fops = { + .read = read_learn, + .write = write_grsec_handler, + .open = open_learn, + .release = close_learn, + .poll = poll_learn, +}; diff -urNp linux-2.6.17.11/grsecurity/gracl_res.c linux-2.6.17.11/grsecurity/gracl_res.c --- linux-2.6.17.11/grsecurity/gracl_res.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_res.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +static const char *restab_log[] = { + [RLIMIT_CPU] = "RLIMIT_CPU", + [RLIMIT_FSIZE] = "RLIMIT_FSIZE", + [RLIMIT_DATA] = "RLIMIT_DATA", + [RLIMIT_STACK] = "RLIMIT_STACK", + [RLIMIT_CORE] = "RLIMIT_CORE", + [RLIMIT_RSS] = "RLIMIT_RSS", + [RLIMIT_NPROC] = "RLIMIT_NPROC", + [RLIMIT_NOFILE] = "RLIMIT_NOFILE", + [RLIMIT_MEMLOCK] = "RLIMIT_MEMLOCK", + [RLIMIT_AS] = "RLIMIT_AS", + [RLIMIT_LOCKS] = "RLIMIT_LOCKS", + [RLIMIT_LOCKS + 1] = "RLIMIT_CRASH" +}; + +void +gr_log_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt) +{ + if (res == RLIMIT_NPROC && + (cap_raised(task->cap_effective, CAP_SYS_ADMIN) || + cap_raised(task->cap_effective, CAP_SYS_RESOURCE))) + return; + else if (res == RLIMIT_MEMLOCK && + cap_raised(task->cap_effective, CAP_IPC_LOCK)) + return; + + if (!gr_acl_is_enabled() && !grsec_resource_logging) + return; + + preempt_disable(); + + if (unlikely(((gt && wanted > task->signal->rlim[res].rlim_cur) || + (!gt && wanted >= task->signal->rlim[res].rlim_cur)) && + task->signal->rlim[res].rlim_cur != RLIM_INFINITY)) + gr_log_res_ulong2_str(GR_DONT_AUDIT, GR_RESOURCE_MSG, task, wanted, restab_log[res], task->signal->rlim[res].rlim_cur); + preempt_enable_no_resched(); + + return; +} diff -urNp linux-2.6.17.11/grsecurity/gracl_segv.c linux-2.6.17.11/grsecurity/gracl_segv.c --- linux-2.6.17.11/grsecurity/gracl_segv.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_segv.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct crash_uid *uid_set; +static unsigned short uid_used; +static spinlock_t gr_uid_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t gr_inode_lock; +extern struct acl_subject_label * + lookup_acl_subj_label(const ino_t inode, const dev_t dev, + struct acl_role_label *role); +extern int specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t); + +int +gr_init_uidset(void) +{ + uid_set = + kmalloc(GR_UIDTABLE_MAX * sizeof (struct crash_uid), GFP_KERNEL); + uid_used = 0; + + return uid_set ? 1 : 0; +} + +void +gr_free_uidset(void) +{ + if (uid_set) + kfree(uid_set); + + return; +} + +int +gr_find_uid(const uid_t uid) +{ + struct crash_uid *tmp = uid_set; + uid_t buid; + int low = 0, high = uid_used - 1, mid; + + while (high >= low) { + mid = (low + high) >> 1; + buid = tmp[mid].uid; + if (buid == uid) + return mid; + if (buid > uid) + high = mid - 1; + if (buid < uid) + low = mid + 1; + } + + return -1; +} + +static __inline__ void +gr_insertsort(void) +{ + unsigned short i, j; + struct crash_uid index; + + for (i = 1; i < uid_used; i++) { + index = uid_set[i]; + j = i; + while ((j > 0) && uid_set[j - 1].uid > index.uid) { + uid_set[j] = uid_set[j - 1]; + j--; + } + uid_set[j] = index; + } + + return; +} + +static __inline__ void +gr_insert_uid(const uid_t uid, const unsigned long expires) +{ + int loc; + + if (uid_used == GR_UIDTABLE_MAX) + return; + + loc = gr_find_uid(uid); + + if (loc >= 0) { + uid_set[loc].expires = expires; + return; + } + + uid_set[uid_used].uid = uid; + uid_set[uid_used].expires = expires; + uid_used++; + + gr_insertsort(); + + return; +} + +void +gr_remove_uid(const unsigned short loc) +{ + unsigned short i; + + for (i = loc + 1; i < uid_used; i++) + uid_set[i - 1] = uid_set[i]; + + uid_used--; + + return; +} + +int +gr_check_crash_uid(const uid_t uid) +{ + int loc; + int ret = 0; + + if (unlikely(!gr_acl_is_enabled())) + return 0; + + spin_lock(&gr_uid_lock); + loc = gr_find_uid(uid); + + if (loc < 0) + goto out_unlock; + + if (time_before_eq(uid_set[loc].expires, get_seconds())) + gr_remove_uid(loc); + else + ret = 1; + +out_unlock: + spin_unlock(&gr_uid_lock); + return ret; +} + +static __inline__ int +proc_is_setxid(const struct task_struct *task) +{ + if (task->uid != task->euid || task->uid != task->suid || + task->uid != task->fsuid) + return 1; + if (task->gid != task->egid || task->gid != task->sgid || + task->gid != task->fsgid) + return 1; + + return 0; +} +static __inline__ int +gr_fake_force_sig(int sig, struct task_struct *t) +{ + unsigned long int flags; + int ret; + + spin_lock_irqsave(&t->sighand->siglock, flags); + if (sigismember(&t->blocked, sig) || t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) { + t->sighand->action[sig-1].sa.sa_handler = SIG_DFL; + sigdelset(&t->blocked, sig); + recalc_sigpending_tsk(t); + } + ret = specific_send_sig_info(sig, (void*)1L, t); + spin_unlock_irqrestore(&t->sighand->siglock, flags); + + return ret; +} + +void +gr_handle_crash(struct task_struct *task, const int sig) +{ + struct acl_subject_label *curr; + struct acl_subject_label *curr2; + struct task_struct *tsk, *tsk2; + + if (sig != SIGSEGV && sig != SIGKILL && sig != SIGBUS && sig != SIGILL) + return; + + if (unlikely(!gr_acl_is_enabled())) + return; + + curr = task->acl; + + if (!(curr->resmask & (1 << GR_CRASH_RES))) + return; + + if (time_before_eq(curr->expires, get_seconds())) { + curr->expires = 0; + curr->crashes = 0; + } + + curr->crashes++; + + if (!curr->expires) + curr->expires = get_seconds() + curr->res[GR_CRASH_RES].rlim_max; + + if ((curr->crashes >= curr->res[GR_CRASH_RES].rlim_cur) && + time_after(curr->expires, get_seconds())) { + if (task->uid && proc_is_setxid(task)) { + gr_log_crash1(GR_DONT_AUDIT, GR_SEGVSTART_ACL_MSG, task, curr->res[GR_CRASH_RES].rlim_max); + spin_lock(&gr_uid_lock); + gr_insert_uid(task->uid, curr->expires); + spin_unlock(&gr_uid_lock); + curr->expires = 0; + curr->crashes = 0; + read_lock(&tasklist_lock); + do_each_thread(tsk2, tsk) { + if (tsk != task && tsk->uid == task->uid) + gr_fake_force_sig(SIGKILL, tsk); + } while_each_thread(tsk2, tsk); + read_unlock(&tasklist_lock); + } else { + gr_log_crash2(GR_DONT_AUDIT, GR_SEGVNOSUID_ACL_MSG, task, curr->res[GR_CRASH_RES].rlim_max); + read_lock(&tasklist_lock); + do_each_thread(tsk2, tsk) { + if (likely(tsk != task)) { + curr2 = tsk->acl; + + if (curr2->device == curr->device && + curr2->inode == curr->inode) + gr_fake_force_sig(SIGKILL, tsk); + } + } while_each_thread(tsk2, tsk); + read_unlock(&tasklist_lock); + } + } + + return; +} + +int +gr_check_crash_exec(const struct file *filp) +{ + struct acl_subject_label *curr; + + if (unlikely(!gr_acl_is_enabled())) + return 0; + + read_lock(&gr_inode_lock); + curr = lookup_acl_subj_label(filp->f_dentry->d_inode->i_ino, + filp->f_dentry->d_inode->i_sb->s_dev, + current->role); + read_unlock(&gr_inode_lock); + + if (!curr || !(curr->resmask & (1 << GR_CRASH_RES)) || + (!curr->crashes && !curr->expires)) + return 0; + + if ((curr->crashes >= curr->res[GR_CRASH_RES].rlim_cur) && + time_after(curr->expires, get_seconds())) + return 1; + else if (time_before_eq(curr->expires, get_seconds())) { + curr->crashes = 0; + curr->expires = 0; + } + + return 0; +} + +void +gr_handle_alertkill(struct task_struct *task) +{ + struct acl_subject_label *curracl; + __u32 curr_ip; + struct task_struct *p, *p2; + + if (unlikely(!gr_acl_is_enabled())) + return; + + curracl = task->acl; + curr_ip = task->signal->curr_ip; + + if ((curracl->mode & GR_KILLIPPROC) && curr_ip) { + read_lock(&tasklist_lock); + do_each_thread(p2, p) { + if (p->signal->curr_ip == curr_ip) + gr_fake_force_sig(SIGKILL, p); + } while_each_thread(p2, p); + read_unlock(&tasklist_lock); + } else if (curracl->mode & GR_KILLPROC) + gr_fake_force_sig(SIGKILL, task); + + return; +} diff -urNp linux-2.6.17.11/grsecurity/gracl_shm.c linux-2.6.17.11/grsecurity/gracl_shm.c --- linux-2.6.17.11/grsecurity/gracl_shm.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/gracl_shm.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int +gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, + const time_t shm_createtime, const uid_t cuid, const int shmid) +{ + struct task_struct *task; + + if (!gr_acl_is_enabled()) + return 1; + + task = find_task_by_pid(shm_cprid); + + if (unlikely(!task)) + task = find_task_by_pid(shm_lapid); + + if (unlikely(task && (time_before((unsigned long)task->start_time.tv_sec, (unsigned long)shm_createtime) || + (task->pid == shm_lapid)) && + (task->acl->mode & GR_PROTSHM) && + (task->acl != current->acl))) { + gr_log_int3(GR_DONT_AUDIT, GR_SHMAT_ACL_MSG, cuid, shm_cprid, shmid); + return 0; + } + + return 1; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_chdir.c linux-2.6.17.11/grsecurity/grsec_chdir.c --- linux-2.6.17.11/grsecurity/grsec_chdir.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_chdir.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,19 @@ +#include +#include +#include +#include +#include +#include + +void +gr_log_chdir(const struct dentry *dentry, const struct vfsmount *mnt) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_CHDIR + if ((grsec_enable_chdir && grsec_enable_group && + in_group_p(grsec_audit_gid)) || (grsec_enable_chdir && + !grsec_enable_group)) { + gr_log_fs_generic(GR_DO_AUDIT, GR_CHDIR_AUDIT_MSG, dentry, mnt); + } +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_chroot.c linux-2.6.17.11/grsecurity/grsec_chroot.c --- linux-2.6.17.11/grsecurity/grsec_chroot.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_chroot.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,332 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int +gr_handle_chroot_unix(const pid_t pid) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX + struct pid *spid = NULL; + + if (unlikely(!grsec_enable_chroot_unix)) + return 1; + + if (likely(!proc_is_chrooted(current))) + return 1; + + read_lock(&tasklist_lock); + + spid = find_pid(pid); + if (spid) { + struct task_struct *p; + p = pid_task(spid, PIDTYPE_PID); + task_lock(p); + if (unlikely(!have_same_root(current, p))) { + task_unlock(p); + read_unlock(&tasklist_lock); + gr_log_noargs(GR_DONT_AUDIT, GR_UNIX_CHROOT_MSG); + return 0; + } + task_unlock(p); + } + read_unlock(&tasklist_lock); +#endif + return 1; +} + +int +gr_handle_chroot_nice(void) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_NICE + if (grsec_enable_chroot_nice && proc_is_chrooted(current)) { + gr_log_noargs(GR_DONT_AUDIT, GR_NICE_CHROOT_MSG); + return -EPERM; + } +#endif + return 0; +} + +int +gr_handle_chroot_setpriority(struct task_struct *p, const int niceval) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_NICE + if (grsec_enable_chroot_nice && (niceval < task_nice(p)) + && proc_is_chrooted(current)) { + gr_log_str_int(GR_DONT_AUDIT, GR_PRIORITY_CHROOT_MSG, p->comm, p->pid); + return -EACCES; + } +#endif + return 0; +} + +int +gr_handle_chroot_rawio(const struct inode *inode) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS + if (grsec_enable_chroot_caps && proc_is_chrooted(current) && + inode && S_ISBLK(inode->i_mode) && !capable(CAP_SYS_RAWIO)) + return 1; +#endif + return 0; +} + +int +gr_pid_is_chrooted(struct task_struct *p) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_FINDTASK + if (!grsec_enable_chroot_findtask || !proc_is_chrooted(current) || !p) + return 0; + + task_lock(p); + if ((p->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)) || + !have_same_root(current, p)) { + task_unlock(p); + return 1; + } + task_unlock(p); +#endif + return 0; +} + +EXPORT_SYMBOL(gr_pid_is_chrooted); + +#if defined(CONFIG_GRKERNSEC_CHROOT_DOUBLE) || defined(CONFIG_GRKERNSEC_CHROOT_FCHDIR) +int gr_is_outside_chroot(const struct dentry *u_dentry, const struct vfsmount *u_mnt) +{ + struct dentry *dentry = (struct dentry *)u_dentry; + struct vfsmount *mnt = (struct vfsmount *)u_mnt; + struct dentry *realroot; + struct vfsmount *realrootmnt; + struct dentry *currentroot; + struct vfsmount *currentmnt; + int ret = 1; + + read_lock(&child_reaper->fs->lock); + realrootmnt = mntget(child_reaper->fs->rootmnt); + realroot = dget(child_reaper->fs->root); + read_unlock(&child_reaper->fs->lock); + + read_lock(¤t->fs->lock); + currentmnt = mntget(current->fs->rootmnt); + currentroot = dget(current->fs->root); + read_unlock(¤t->fs->lock); + + spin_lock(&dcache_lock); + for (;;) { + if (unlikely((dentry == realroot && mnt == realrootmnt) + || (dentry == currentroot && mnt == currentmnt))) + break; + if (unlikely(dentry == mnt->mnt_root || IS_ROOT(dentry))) { + if (mnt->mnt_parent == mnt) + break; + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + continue; + } + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + + dput(currentroot); + mntput(currentmnt); + + /* access is outside of chroot */ + if (dentry == realroot && mnt == realrootmnt) + ret = 0; + + dput(realroot); + mntput(realrootmnt); + return ret; +} +#endif + +int +gr_chroot_fchdir(struct dentry *u_dentry, struct vfsmount *u_mnt) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_FCHDIR + if (!grsec_enable_chroot_fchdir) + return 1; + + if (!proc_is_chrooted(current)) + return 1; + else if (!gr_is_outside_chroot(u_dentry, u_mnt)) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_CHROOT_FCHDIR_MSG, u_dentry, u_mnt); + return 0; + } +#endif + return 1; +} + +int +gr_chroot_shmat(const pid_t shm_cprid, const pid_t shm_lapid, + const time_t shm_createtime) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_SHMAT + struct pid *pid = NULL; + time_t starttime; + + if (unlikely(!grsec_enable_chroot_shmat)) + return 1; + + if (likely(!proc_is_chrooted(current))) + return 1; + + read_lock(&tasklist_lock); + + pid = find_pid(shm_cprid); + if (pid) { + struct task_struct *p; + p = pid_task(pid, PIDTYPE_PID); + task_lock(p); + starttime = p->start_time.tv_sec; + if (unlikely(!have_same_root(current, p) && + time_before((unsigned long)starttime, (unsigned long)shm_createtime))) { + task_unlock(p); + read_unlock(&tasklist_lock); + gr_log_noargs(GR_DONT_AUDIT, GR_SHMAT_CHROOT_MSG); + return 0; + } + task_unlock(p); + } else { + pid = find_pid(shm_lapid); + if (pid) { + struct task_struct *p; + p = pid_task(pid, PIDTYPE_PID); + task_lock(p); + if (unlikely(!have_same_root(current, p))) { + task_unlock(p); + read_unlock(&tasklist_lock); + gr_log_noargs(GR_DONT_AUDIT, GR_SHMAT_CHROOT_MSG); + return 0; + } + task_unlock(p); + } + } + + read_unlock(&tasklist_lock); +#endif + return 1; +} + +void +gr_log_chroot_exec(const struct dentry *dentry, const struct vfsmount *mnt) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_EXECLOG + if (grsec_enable_chroot_execlog && proc_is_chrooted(current)) + gr_log_fs_generic(GR_DO_AUDIT, GR_EXEC_CHROOT_MSG, dentry, mnt); +#endif + return; +} + +int +gr_handle_chroot_mknod(const struct dentry *dentry, + const struct vfsmount *mnt, const int mode) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_MKNOD + if (grsec_enable_chroot_mknod && !S_ISFIFO(mode) && !S_ISREG(mode) && + proc_is_chrooted(current)) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_MKNOD_CHROOT_MSG, dentry, mnt); + return -EPERM; + } +#endif + return 0; +} + +int +gr_handle_chroot_mount(const struct dentry *dentry, + const struct vfsmount *mnt, const char *dev_name) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_MOUNT + if (grsec_enable_chroot_mount && proc_is_chrooted(current)) { + gr_log_str_fs(GR_DONT_AUDIT, GR_MOUNT_CHROOT_MSG, dev_name, dentry, mnt); + return -EPERM; + } +#endif + return 0; +} + +int +gr_handle_chroot_pivot(void) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_PIVOT + if (grsec_enable_chroot_pivot && proc_is_chrooted(current)) { + gr_log_noargs(GR_DONT_AUDIT, GR_PIVOT_CHROOT_MSG); + return -EPERM; + } +#endif + return 0; +} + +int +gr_handle_chroot_chroot(const struct dentry *dentry, const struct vfsmount *mnt) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_DOUBLE + if (grsec_enable_chroot_double && proc_is_chrooted(current) && + !gr_is_outside_chroot(dentry, mnt)) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_CHROOT_CHROOT_MSG, dentry, mnt); + return -EPERM; + } +#endif + return 0; +} + +void +gr_handle_chroot_caps(struct task_struct *task) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS + if (grsec_enable_chroot_caps && proc_is_chrooted(task)) { + task->cap_permitted = + cap_drop(task->cap_permitted, GR_CHROOT_CAPS); + task->cap_inheritable = + cap_drop(task->cap_inheritable, GR_CHROOT_CAPS); + task->cap_effective = + cap_drop(task->cap_effective, GR_CHROOT_CAPS); + } +#endif + return; +} + +int +gr_handle_chroot_sysctl(const int op) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_SYSCTL + if (grsec_enable_chroot_sysctl && proc_is_chrooted(current) + && (op & 002)) + return -EACCES; +#endif + return 0; +} + +void +gr_handle_chroot_chdir(struct dentry *dentry, struct vfsmount *mnt) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_CHDIR + if (grsec_enable_chroot_chdir) + set_fs_pwd(current->fs, mnt, dentry); +#endif + return; +} + +int +gr_handle_chroot_chmod(const struct dentry *dentry, + const struct vfsmount *mnt, const int mode) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_CHMOD + if (grsec_enable_chroot_chmod && + ((mode & S_ISUID) || ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && + proc_is_chrooted(current)) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_CHMOD_CHROOT_MSG, dentry, mnt); + return -EPERM; + } +#endif + return 0; +} + +#ifdef CONFIG_SECURITY +EXPORT_SYMBOL(gr_handle_chroot_caps); +#endif diff -urNp linux-2.6.17.11/grsecurity/grsec_disabled.c linux-2.6.17.11/grsecurity/grsec_disabled.c --- linux-2.6.17.11/grsecurity/grsec_disabled.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_disabled.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,418 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PAX_HAVE_ACL_FLAGS +void +pax_set_initial_flags(struct linux_binprm *bprm) +{ + return; +} +#endif + +#ifdef CONFIG_SYSCTL +__u32 +gr_handle_sysctl(const struct ctl_table * table, __u32 mode) +{ + return mode; +} +#endif + +int +gr_acl_is_enabled(void) +{ + return 0; +} + +int +gr_handle_rawio(const struct inode *inode) +{ + return 0; +} + +void +gr_acl_handle_psacct(struct task_struct *task, const long code) +{ + return; +} + +int +gr_handle_ptrace(struct task_struct *task, const long request) +{ + return 0; +} + +int +gr_handle_proc_ptrace(struct task_struct *task) +{ + return 0; +} + +void +gr_learn_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt) +{ + return; +} + +int +gr_set_acls(const int type) +{ + return 0; +} + +int +gr_check_hidden_task(const struct task_struct *tsk) +{ + return 0; +} + +int +gr_check_protected_task(const struct task_struct *task) +{ + return 0; +} + +void +gr_copy_label(struct task_struct *tsk) +{ + return; +} + +void +gr_set_pax_flags(struct task_struct *task) +{ + return; +} + +int +gr_set_proc_label(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return 0; +} + +void +gr_handle_delete(const ino_t ino, const dev_t dev) +{ + return; +} + +void +gr_handle_create(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return; +} + +void +gr_handle_crash(struct task_struct *task, const int sig) +{ + return; +} + +int +gr_check_crash_exec(const struct file *filp) +{ + return 0; +} + +int +gr_check_crash_uid(const uid_t uid) +{ + return 0; +} + +void +gr_handle_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, + struct vfsmount *mnt, const __u8 replace) +{ + return; +} + +int +gr_search_socket(const int family, const int type, const int protocol) +{ + return 1; +} + +int +gr_search_connectbind(const int mode, const struct socket *sock, + const struct sockaddr_in *addr) +{ + return 1; +} + +int +gr_task_is_capable(struct task_struct *task, const int cap) +{ + return 1; +} + +int +gr_is_capable_nolog(const int cap) +{ + return 1; +} + +void +gr_handle_alertkill(struct task_struct *task) +{ + return; +} + +__u32 +gr_acl_handle_execve(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_hidden_file(const struct dentry * dentry, + const struct vfsmount * mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_open(const struct dentry * dentry, const struct vfsmount * mnt, + const int fmode) +{ + return 1; +} + +__u32 +gr_acl_handle_rmdir(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_unlink(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return 1; +} + +int +gr_acl_handle_mmap(const struct file *file, const unsigned long prot, + unsigned int *vm_flags) +{ + return 1; +} + +__u32 +gr_acl_handle_truncate(const struct dentry * dentry, + const struct vfsmount * mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_utime(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_access(const struct dentry * dentry, + const struct vfsmount * mnt, const int fmode) +{ + return 1; +} + +__u32 +gr_acl_handle_fchmod(const struct dentry * dentry, const struct vfsmount * mnt, + mode_t mode) +{ + return 1; +} + +__u32 +gr_acl_handle_chmod(const struct dentry * dentry, const struct vfsmount * mnt, + mode_t mode) +{ + return 1; +} + +__u32 +gr_acl_handle_chown(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return 1; +} + +void +grsecurity_init(void) +{ + return; +} + +__u32 +gr_acl_handle_mknod(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const int mode) +{ + return 1; +} + +__u32 +gr_acl_handle_mkdir(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_symlink(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, const char *from) +{ + return 1; +} + +__u32 +gr_acl_handle_link(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const struct dentry * old_dentry, + const struct vfsmount * old_mnt, const char *to) +{ + return 1; +} + +int +gr_acl_handle_rename(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + const struct dentry *old_dentry, + const struct inode *old_parent_inode, + const struct vfsmount *old_mnt, const char *newname) +{ + return 0; +} + +int +gr_acl_handle_filldir(const struct file *file, const char *name, + const int namelen, const ino_t ino) +{ + return 1; +} + +int +gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, + const time_t shm_createtime, const uid_t cuid, const int shmid) +{ + return 1; +} + +int +gr_search_bind(const struct socket *sock, const struct sockaddr_in *addr) +{ + return 1; +} + +int +gr_search_accept(const struct socket *sock) +{ + return 1; +} + +int +gr_search_listen(const struct socket *sock) +{ + return 1; +} + +int +gr_search_connect(const struct socket *sock, const struct sockaddr_in *addr) +{ + return 1; +} + +__u32 +gr_acl_handle_unix(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return 1; +} + +__u32 +gr_acl_handle_creat(const struct dentry * dentry, + const struct dentry * p_dentry, + const struct vfsmount * p_mnt, const int fmode, + const int imode) +{ + return 1; +} + +void +gr_acl_handle_exit(void) +{ + return; +} + +int +gr_acl_handle_mprotect(const struct file *file, const unsigned long prot) +{ + return 1; +} + +void +gr_set_role_label(const uid_t uid, const gid_t gid) +{ + return; +} + +int +gr_acl_handle_procpidmem(const struct task_struct *task) +{ + return 0; +} + +int +gr_search_udp_recvmsg(const struct sock *sk, const struct sk_buff *skb) +{ + return 1; +} + +int +gr_search_udp_sendmsg(const struct sock *sk, const struct sockaddr_in *addr) +{ + return 1; +} + +void +gr_set_kernel_label(struct task_struct *task) +{ + return; +} + +int +gr_check_user_change(int real, int effective, int fs) +{ + return 0; +} + +int +gr_check_group_change(int real, int effective, int fs) +{ + return 0; +} + + +EXPORT_SYMBOL(gr_task_is_capable); +EXPORT_SYMBOL(gr_learn_resource); +EXPORT_SYMBOL(gr_set_kernel_label); +#ifdef CONFIG_SECURITY +EXPORT_SYMBOL(gr_check_user_change); +EXPORT_SYMBOL(gr_check_group_change); +#endif diff -urNp linux-2.6.17.11/grsecurity/grsec_exec.c linux-2.6.17.11/grsecurity/grsec_exec.c --- linux-2.6.17.11/grsecurity/grsec_exec.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_exec.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_GRKERNSEC_EXECLOG +static char gr_exec_arg_buf[132]; +static DECLARE_MUTEX(gr_exec_arg_sem); +#endif + +int +gr_handle_nproc(void) +{ +#ifdef CONFIG_GRKERNSEC_EXECVE + if (grsec_enable_execve && current->user && + (atomic_read(¤t->user->processes) > + current->signal->rlim[RLIMIT_NPROC].rlim_cur) && + !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) { + gr_log_noargs(GR_DONT_AUDIT, GR_NPROC_MSG); + return -EAGAIN; + } +#endif + return 0; +} + +void +gr_handle_exec_args(struct linux_binprm *bprm, const char __user *__user *argv) +{ +#ifdef CONFIG_GRKERNSEC_EXECLOG + char *grarg = gr_exec_arg_buf; + unsigned int i, x, execlen = 0; + char c; + + if (!((grsec_enable_execlog && grsec_enable_group && + in_group_p(grsec_audit_gid)) + || (grsec_enable_execlog && !grsec_enable_group))) + return; + + down(&gr_exec_arg_sem); + memset(grarg, 0, sizeof(gr_exec_arg_buf)); + + if (unlikely(argv == NULL)) + goto log; + + for (i = 0; i < bprm->argc && execlen < 128; i++) { + const char __user *p; + unsigned int len; + + if (copy_from_user(&p, argv + i, sizeof(p))) + goto log; + if (!p) + goto log; + len = strnlen_user(p, 128 - execlen); + if (len > 128 - execlen) + len = 128 - execlen; + else if (len > 0) + len--; + if (copy_from_user(grarg + execlen, p, len)) + goto log; + + /* rewrite unprintable characters */ + for (x = 0; x < len; x++) { + c = *(grarg + execlen + x); + if (c < 32 || c > 126) + *(grarg + execlen + x) = ' '; + } + + execlen += len; + *(grarg + execlen) = ' '; + *(grarg + execlen + 1) = '\0'; + execlen++; + } + + log: + gr_log_fs_str(GR_DO_AUDIT, GR_EXEC_AUDIT_MSG, bprm->file->f_dentry, + bprm->file->f_vfsmnt, grarg); + up(&gr_exec_arg_sem); +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_fifo.c linux-2.6.17.11/grsecurity/grsec_fifo.c --- linux-2.6.17.11/grsecurity/grsec_fifo.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_fifo.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,22 @@ +#include +#include +#include +#include +#include + +int +gr_handle_fifo(const struct dentry *dentry, const struct vfsmount *mnt, + const struct dentry *dir, const int flag, const int acc_mode) +{ +#ifdef CONFIG_GRKERNSEC_FIFO + if (grsec_enable_fifo && S_ISFIFO(dentry->d_inode->i_mode) && + !(flag & O_EXCL) && (dir->d_inode->i_mode & S_ISVTX) && + (dentry->d_inode->i_uid != dir->d_inode->i_uid) && + (current->fsuid != dentry->d_inode->i_uid)) { + if (!generic_permission(dentry->d_inode, acc_mode, NULL)) + gr_log_fs_int2(GR_DONT_AUDIT, GR_FIFO_MSG, dentry, mnt, dentry->d_inode->i_uid, dentry->d_inode->i_gid); + return -EACCES; + } +#endif + return 0; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_fork.c linux-2.6.17.11/grsecurity/grsec_fork.c --- linux-2.6.17.11/grsecurity/grsec_fork.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_fork.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,15 @@ +#include +#include +#include +#include +#include + +void +gr_log_forkfail(const int retval) +{ +#ifdef CONFIG_GRKERNSEC_FORKFAIL + if (grsec_enable_forkfail && retval != -ERESTARTNOINTR) + gr_log_int(GR_DONT_AUDIT, GR_FAILFORK_MSG, retval); +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_init.c linux-2.6.17.11/grsecurity/grsec_init.c --- linux-2.6.17.11/grsecurity/grsec_init.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_init.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int grsec_enable_shm; +int grsec_enable_link; +int grsec_enable_dmesg; +int grsec_enable_fifo; +int grsec_enable_execve; +int grsec_enable_execlog; +int grsec_enable_signal; +int grsec_enable_forkfail; +int grsec_enable_time; +int grsec_enable_audit_textrel; +int grsec_enable_group; +int grsec_audit_gid; +int grsec_enable_chdir; +int grsec_enable_audit_ipc; +int grsec_enable_mount; +int grsec_enable_chroot_findtask; +int grsec_enable_chroot_mount; +int grsec_enable_chroot_shmat; +int grsec_enable_chroot_fchdir; +int grsec_enable_chroot_double; +int grsec_enable_chroot_pivot; +int grsec_enable_chroot_chdir; +int grsec_enable_chroot_chmod; +int grsec_enable_chroot_mknod; +int grsec_enable_chroot_nice; +int grsec_enable_chroot_execlog; +int grsec_enable_chroot_caps; +int grsec_enable_chroot_sysctl; +int grsec_enable_chroot_unix; +int grsec_enable_tpe; +int grsec_tpe_gid; +int grsec_enable_tpe_all; +int grsec_enable_randpid; +int grsec_enable_socket_all; +int grsec_socket_all_gid; +int grsec_enable_socket_client; +int grsec_socket_client_gid; +int grsec_enable_socket_server; +int grsec_socket_server_gid; +int grsec_resource_logging; +int grsec_lock; + +spinlock_t grsec_alert_lock = SPIN_LOCK_UNLOCKED; +unsigned long grsec_alert_wtime = 0; +unsigned long grsec_alert_fyet = 0; + +spinlock_t grsec_audit_lock = SPIN_LOCK_UNLOCKED; + +rwlock_t grsec_exec_file_lock = RW_LOCK_UNLOCKED; + +char *gr_shared_page[4]; + +char *gr_alert_log_fmt; +char *gr_audit_log_fmt; +char *gr_alert_log_buf; +char *gr_audit_log_buf; + +extern struct gr_arg *gr_usermode; +extern unsigned char *gr_system_salt; +extern unsigned char *gr_system_sum; + +void +grsecurity_init(void) +{ + int j; + /* create the per-cpu shared pages */ + + preempt_disable(); + for (j = 0; j < 4; j++) { + gr_shared_page[j] = (char *)__alloc_percpu(PAGE_SIZE); + if (gr_shared_page[j] == NULL) { + panic("Unable to allocate grsecurity shared page"); + return; + } + } + preempt_enable(); + + /* allocate log buffers */ + gr_alert_log_fmt = kmalloc(512, GFP_KERNEL); + if (!gr_alert_log_fmt) { + panic("Unable to allocate grsecurity alert log format buffer"); + return; + } + gr_audit_log_fmt = kmalloc(512, GFP_KERNEL); + if (!gr_audit_log_fmt) { + panic("Unable to allocate grsecurity audit log format buffer"); + return; + } + gr_alert_log_buf = (char *) get_zeroed_page(GFP_KERNEL); + if (!gr_alert_log_buf) { + panic("Unable to allocate grsecurity alert log buffer"); + return; + } + gr_audit_log_buf = (char *) get_zeroed_page(GFP_KERNEL); + if (!gr_audit_log_buf) { + panic("Unable to allocate grsecurity audit log buffer"); + return; + } + + /* allocate memory for authentication structure */ + gr_usermode = kmalloc(sizeof(struct gr_arg), GFP_KERNEL); + gr_system_salt = kmalloc(GR_SALT_LEN, GFP_KERNEL); + gr_system_sum = kmalloc(GR_SHA_LEN, GFP_KERNEL); + + if (!gr_usermode || !gr_system_salt || !gr_system_sum) { + panic("Unable to allocate grsecurity authentication structure"); + return; + } + +#if !defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_SYSCTL_ON) +#ifndef CONFIG_GRKERNSEC_SYSCTL + grsec_lock = 1; +#endif +#ifdef CONFIG_GRKERNSEC_SHM + grsec_enable_shm = 1; +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_TEXTREL + grsec_enable_audit_textrel = 1; +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_GROUP + grsec_enable_group = 1; + grsec_audit_gid = CONFIG_GRKERNSEC_AUDIT_GID; +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_CHDIR + grsec_enable_chdir = 1; +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + grsec_enable_audit_ipc = 1; +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT + grsec_enable_mount = 1; +#endif +#ifdef CONFIG_GRKERNSEC_LINK + grsec_enable_link = 1; +#endif +#ifdef CONFIG_GRKERNSEC_DMESG + grsec_enable_dmesg = 1; +#endif +#ifdef CONFIG_GRKERNSEC_FIFO + grsec_enable_fifo = 1; +#endif +#ifdef CONFIG_GRKERNSEC_EXECVE + grsec_enable_execve = 1; +#endif +#ifdef CONFIG_GRKERNSEC_EXECLOG + grsec_enable_execlog = 1; +#endif +#ifdef CONFIG_GRKERNSEC_SIGNAL + grsec_enable_signal = 1; +#endif +#ifdef CONFIG_GRKERNSEC_FORKFAIL + grsec_enable_forkfail = 1; +#endif +#ifdef CONFIG_GRKERNSEC_TIME + grsec_enable_time = 1; +#endif +#ifdef CONFIG_GRKERNSEC_RESLOG + grsec_resource_logging = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_FINDTASK + grsec_enable_chroot_findtask = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX + grsec_enable_chroot_unix = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_MOUNT + grsec_enable_chroot_mount = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_FCHDIR + grsec_enable_chroot_fchdir = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_SHMAT + grsec_enable_chroot_shmat = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_DOUBLE + grsec_enable_chroot_double = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_PIVOT + grsec_enable_chroot_pivot = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_CHDIR + grsec_enable_chroot_chdir = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_CHMOD + grsec_enable_chroot_chmod = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_MKNOD + grsec_enable_chroot_mknod = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_NICE + grsec_enable_chroot_nice = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_EXECLOG + grsec_enable_chroot_execlog = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS + grsec_enable_chroot_caps = 1; +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_SYSCTL + grsec_enable_chroot_sysctl = 1; +#endif +#ifdef CONFIG_GRKERNSEC_TPE + grsec_enable_tpe = 1; + grsec_tpe_gid = CONFIG_GRKERNSEC_TPE_GID; +#ifdef CONFIG_GRKERNSEC_TPE_ALL + grsec_enable_tpe_all = 1; +#endif +#endif +#ifdef CONFIG_GRKERNSEC_RANDPID + grsec_enable_randpid = 1; +#endif +#ifdef CONFIG_GRKERNSEC_SOCKET_ALL + grsec_enable_socket_all = 1; + grsec_socket_all_gid = CONFIG_GRKERNSEC_SOCKET_ALL_GID; +#endif +#ifdef CONFIG_GRKERNSEC_SOCKET_CLIENT + grsec_enable_socket_client = 1; + grsec_socket_client_gid = CONFIG_GRKERNSEC_SOCKET_CLIENT_GID; +#endif +#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER + grsec_enable_socket_server = 1; + grsec_socket_server_gid = CONFIG_GRKERNSEC_SOCKET_SERVER_GID; +#endif +#endif + + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_ipc.c linux-2.6.17.11/grsecurity/grsec_ipc.c --- linux-2.6.17.11/grsecurity/grsec_ipc.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_ipc.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include + +void +gr_log_msgget(const int ret, const int msgflg) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + if (((grsec_enable_group && in_group_p(grsec_audit_gid) && + grsec_enable_audit_ipc) || (grsec_enable_audit_ipc && + !grsec_enable_group)) && (ret >= 0) + && (msgflg & IPC_CREAT)) + gr_log_noargs(GR_DO_AUDIT, GR_MSGQ_AUDIT_MSG); +#endif + return; +} + +void +gr_log_msgrm(const uid_t uid, const uid_t cuid) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + if ((grsec_enable_group && in_group_p(grsec_audit_gid) && + grsec_enable_audit_ipc) || + (grsec_enable_audit_ipc && !grsec_enable_group)) + gr_log_int_int(GR_DO_AUDIT, GR_MSGQR_AUDIT_MSG, uid, cuid); +#endif + return; +} + +void +gr_log_semget(const int err, const int semflg) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + if (((grsec_enable_group && in_group_p(grsec_audit_gid) && + grsec_enable_audit_ipc) || (grsec_enable_audit_ipc && + !grsec_enable_group)) && (err >= 0) + && (semflg & IPC_CREAT)) + gr_log_noargs(GR_DO_AUDIT, GR_SEM_AUDIT_MSG); +#endif + return; +} + +void +gr_log_semrm(const uid_t uid, const uid_t cuid) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + if ((grsec_enable_group && in_group_p(grsec_audit_gid) && + grsec_enable_audit_ipc) || + (grsec_enable_audit_ipc && !grsec_enable_group)) + gr_log_int_int(GR_DO_AUDIT, GR_SEMR_AUDIT_MSG, uid, cuid); +#endif + return; +} + +void +gr_log_shmget(const int err, const int shmflg, const size_t size) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + if (((grsec_enable_group && in_group_p(grsec_audit_gid) && + grsec_enable_audit_ipc) || (grsec_enable_audit_ipc && + !grsec_enable_group)) && (err >= 0) + && (shmflg & IPC_CREAT)) + gr_log_int(GR_DO_AUDIT, GR_SHM_AUDIT_MSG, size); +#endif + return; +} + +void +gr_log_shmrm(const uid_t uid, const uid_t cuid) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + if ((grsec_enable_group && in_group_p(grsec_audit_gid) && + grsec_enable_audit_ipc) || + (grsec_enable_audit_ipc && !grsec_enable_group)) + gr_log_int_int(GR_DO_AUDIT, GR_SHMR_AUDIT_MSG, uid, cuid); +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_link.c linux-2.6.17.11/grsecurity/grsec_link.c --- linux-2.6.17.11/grsecurity/grsec_link.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_link.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include + +int +gr_handle_follow_link(const struct inode *parent, + const struct inode *inode, + const struct dentry *dentry, const struct vfsmount *mnt) +{ +#ifdef CONFIG_GRKERNSEC_LINK + if (grsec_enable_link && S_ISLNK(inode->i_mode) && + (parent->i_mode & S_ISVTX) && (parent->i_uid != inode->i_uid) && + (parent->i_mode & S_IWOTH) && (current->fsuid != inode->i_uid)) { + gr_log_fs_int2(GR_DONT_AUDIT, GR_SYMLINK_MSG, dentry, mnt, inode->i_uid, inode->i_gid); + return -EACCES; + } +#endif + return 0; +} + +int +gr_handle_hardlink(const struct dentry *dentry, + const struct vfsmount *mnt, + struct inode *inode, const int mode, const char *to) +{ +#ifdef CONFIG_GRKERNSEC_LINK + if (grsec_enable_link && current->fsuid != inode->i_uid && + (!S_ISREG(mode) || (mode & S_ISUID) || + ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) || + (generic_permission(inode, MAY_READ | MAY_WRITE, NULL))) && + !capable(CAP_FOWNER) && current->uid) { + gr_log_fs_int2_str(GR_DONT_AUDIT, GR_HARDLINK_MSG, dentry, mnt, inode->i_uid, inode->i_gid, to); + return -EPERM; + } +#endif + return 0; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_log.c linux-2.6.17.11/grsecurity/grsec_log.c --- linux-2.6.17.11/grsecurity/grsec_log.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_log.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,265 @@ +#include +#include +#include +#include +#include +#include + +#define BEGIN_LOCKS(x) \ + read_lock(&tasklist_lock); \ + read_lock(&grsec_exec_file_lock); \ + if (x != GR_DO_AUDIT) \ + spin_lock(&grsec_alert_lock); \ + else \ + spin_lock(&grsec_audit_lock) + +#define END_LOCKS(x) \ + if (x != GR_DO_AUDIT) \ + spin_unlock(&grsec_alert_lock); \ + else \ + spin_unlock(&grsec_audit_lock); \ + read_unlock(&grsec_exec_file_lock); \ + read_unlock(&tasklist_lock); \ + if (x == GR_DONT_AUDIT) \ + gr_handle_alertkill(current) + +enum { + FLOODING, + NO_FLOODING +}; + +extern char *gr_alert_log_fmt; +extern char *gr_audit_log_fmt; +extern char *gr_alert_log_buf; +extern char *gr_audit_log_buf; + +static int gr_log_start(int audit) +{ + char *loglevel = (audit == GR_DO_AUDIT) ? KERN_INFO : KERN_ALERT; + char *fmt = (audit == GR_DO_AUDIT) ? gr_audit_log_fmt : gr_alert_log_fmt; + char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; + + if (audit == GR_DO_AUDIT) + goto set_fmt; + + if (!grsec_alert_wtime || jiffies - grsec_alert_wtime > CONFIG_GRKERNSEC_FLOODTIME * HZ) { + grsec_alert_wtime = jiffies; + grsec_alert_fyet = 0; + } else if ((jiffies - grsec_alert_wtime < CONFIG_GRKERNSEC_FLOODTIME * HZ) && (grsec_alert_fyet < CONFIG_GRKERNSEC_FLOODBURST)) { + grsec_alert_fyet++; + } else if (grsec_alert_fyet == CONFIG_GRKERNSEC_FLOODBURST) { + grsec_alert_wtime = jiffies; + grsec_alert_fyet++; + printk(KERN_ALERT "grsec: more alerts, logging disabled for %d seconds\n", CONFIG_GRKERNSEC_FLOODTIME); + return FLOODING; + } else return FLOODING; + +set_fmt: + memset(buf, 0, PAGE_SIZE); + if (current->signal->curr_ip && gr_acl_is_enabled()) { + sprintf(fmt, "%s%s", loglevel, "grsec: From %u.%u.%u.%u: (%.64s:%c:%.950s) "); + snprintf(buf, PAGE_SIZE - 1, fmt, NIPQUAD(current->signal->curr_ip), current->role->rolename, gr_roletype_to_char(), current->acl->filename); + } else if (current->signal->curr_ip) { + sprintf(fmt, "%s%s", loglevel, "grsec: From %u.%u.%u.%u: "); + snprintf(buf, PAGE_SIZE - 1, fmt, NIPQUAD(current->signal->curr_ip)); + } else if (gr_acl_is_enabled()) { + sprintf(fmt, "%s%s", loglevel, "grsec: (%.64s:%c:%.950s) "); + snprintf(buf, PAGE_SIZE - 1, fmt, current->role->rolename, gr_roletype_to_char(), current->acl->filename); + } else { + sprintf(fmt, "%s%s", loglevel, "grsec: "); + strcpy(buf, fmt); + } + + return NO_FLOODING; +} + +static void gr_log_middle(int audit, const char *msg, va_list ap) +{ + char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; + unsigned int len = strlen(buf); + + vsnprintf(buf + len, PAGE_SIZE - len - 1, msg, ap); + + return; +} + +static void gr_log_middle_varargs(int audit, const char *msg, ...) +{ + char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; + unsigned int len = strlen(buf); + va_list ap; + + va_start(ap, msg); + vsnprintf(buf + len, PAGE_SIZE - len - 1, msg, ap); + va_end(ap); + + return; +} + +static void gr_log_end(int audit) +{ + char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; + unsigned int len = strlen(buf); + + snprintf(buf + len, PAGE_SIZE - len - 1, DEFAULTSECMSG, DEFAULTSECARGS(current)); + printk("%s\n", buf); + + return; +} + +void gr_log_varargs(int audit, const char *msg, int argtypes, ...) +{ + int logtype; + char *result = (audit == GR_DO_AUDIT) ? "successful" : "denied"; + char *str1, *str2, *str3; + int num1, num2; + unsigned long ulong1, ulong2; + struct dentry *dentry; + struct vfsmount *mnt; + struct file *file; + struct task_struct *task; + va_list ap; + + BEGIN_LOCKS(audit); + logtype = gr_log_start(audit); + if (logtype == FLOODING) { + END_LOCKS(audit); + return; + } + va_start(ap, argtypes); + switch (argtypes) { + case GR_TTYSNIFF: + task = va_arg(ap, struct task_struct *); + gr_log_middle_varargs(audit, msg, NIPQUAD(task->signal->curr_ip), gr_task_fullpath0(task), task->comm, task->pid, gr_parent_task_fullpath0(task), task->parent->comm, task->parent->pid); + break; + case GR_RBAC: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt)); + break; + case GR_RBAC_STR: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + str1 = va_arg(ap, char *); + gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt), str1); + break; + case GR_STR_RBAC: + str1 = va_arg(ap, char *); + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + gr_log_middle_varargs(audit, msg, result, str1, gr_to_filename(dentry, mnt)); + break; + case GR_RBAC_MODE2: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + str1 = va_arg(ap, char *); + str2 = va_arg(ap, char *); + gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt), str1, str2); + break; + case GR_RBAC_MODE3: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + str1 = va_arg(ap, char *); + str2 = va_arg(ap, char *); + str3 = va_arg(ap, char *); + gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt), str1, str2, str3); + break; + case GR_FILENAME: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt)); + break; + case GR_STR_FILENAME: + str1 = va_arg(ap, char *); + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + gr_log_middle_varargs(audit, msg, str1, gr_to_filename(dentry, mnt)); + break; + case GR_FILENAME_STR: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + str1 = va_arg(ap, char *); + gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt), str1); + break; + case GR_FILENAME_TWO_INT: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + num1 = va_arg(ap, int); + num2 = va_arg(ap, int); + gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt), num1, num2); + break; + case GR_FILENAME_TWO_INT_STR: + dentry = va_arg(ap, struct dentry *); + mnt = va_arg(ap, struct vfsmount *); + num1 = va_arg(ap, int); + num2 = va_arg(ap, int); + str1 = va_arg(ap, char *); + gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt), num1, num2, str1); + break; + case GR_TEXTREL: + file = va_arg(ap, struct file *); + ulong1 = va_arg(ap, unsigned long); + ulong2 = va_arg(ap, unsigned long); + gr_log_middle_varargs(audit, msg, file ? gr_to_filename(file->f_dentry, file->f_vfsmnt) : "", ulong1, ulong2); + break; + case GR_PTRACE: + task = va_arg(ap, struct task_struct *); + gr_log_middle_varargs(audit, msg, task->exec_file ? gr_to_filename(task->exec_file->f_dentry, task->exec_file->f_vfsmnt) : "(none)", task->comm, task->pid); + break; + case GR_RESOURCE: + task = va_arg(ap, struct task_struct *); + ulong1 = va_arg(ap, unsigned long); + str1 = va_arg(ap, char *); + ulong2 = va_arg(ap, unsigned long); + gr_log_middle_varargs(audit, msg, ulong1, str1, ulong2, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); + break; + case GR_CAP: + task = va_arg(ap, struct task_struct *); + str1 = va_arg(ap, char *); + gr_log_middle_varargs(audit, msg, str1, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); + break; + case GR_SIG: + task = va_arg(ap, struct task_struct *); + num1 = va_arg(ap, int); + gr_log_middle_varargs(audit, msg, num1, gr_task_fullpath0(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath0(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); + break; + case GR_CRASH1: + task = va_arg(ap, struct task_struct *); + ulong1 = va_arg(ap, unsigned long); + gr_log_middle_varargs(audit, msg, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid, task->uid, ulong1); + break; + case GR_CRASH2: + task = va_arg(ap, struct task_struct *); + ulong1 = va_arg(ap, unsigned long); + gr_log_middle_varargs(audit, msg, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid, ulong1); + break; + case GR_PSACCT: + { + unsigned int wday, cday; + __u8 whr, chr; + __u8 wmin, cmin; + __u8 wsec, csec; + char cur_tty[64] = { 0 }; + char parent_tty[64] = { 0 }; + + task = va_arg(ap, struct task_struct *); + wday = va_arg(ap, unsigned int); + cday = va_arg(ap, unsigned int); + whr = va_arg(ap, int); + chr = va_arg(ap, int); + wmin = va_arg(ap, int); + cmin = va_arg(ap, int); + wsec = va_arg(ap, int); + csec = va_arg(ap, int); + ulong1 = va_arg(ap, unsigned long); + + gr_log_middle_varargs(audit, msg, gr_task_fullpath(task), task->comm, task->pid, NIPQUAD(task->signal->curr_ip), tty_name(task->signal->tty, cur_tty), task->uid, task->euid, task->gid, task->egid, wday, whr, wmin, wsec, cday, chr, cmin, csec, (task->flags & PF_SIGNALED) ? "killed by signal" : "exited", ulong1, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, NIPQUAD(task->parent->signal->curr_ip), tty_name(task->parent->signal->tty, parent_tty), task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); + } + break; + default: + gr_log_middle(audit, msg, ap); + } + va_end(ap); + gr_log_end(audit); + END_LOCKS(audit); +} diff -urNp linux-2.6.17.11/grsecurity/grsec_mem.c linux-2.6.17.11/grsecurity/grsec_mem.c --- linux-2.6.17.11/grsecurity/grsec_mem.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_mem.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +void +gr_handle_ioperm(void) +{ + gr_log_noargs(GR_DONT_AUDIT, GR_IOPERM_MSG); + return; +} + +void +gr_handle_iopl(void) +{ + gr_log_noargs(GR_DONT_AUDIT, GR_IOPL_MSG); + return; +} + +void +gr_handle_mem_write(void) +{ + gr_log_noargs(GR_DONT_AUDIT, GR_MEM_WRITE_MSG); + return; +} + +void +gr_handle_kmem_write(void) +{ + gr_log_noargs(GR_DONT_AUDIT, GR_KMEM_MSG); + return; +} + +void +gr_handle_open_port(void) +{ + gr_log_noargs(GR_DONT_AUDIT, GR_PORT_OPEN_MSG); + return; +} + +int +gr_handle_mem_mmap(const unsigned long offset, struct vm_area_struct *vma) +{ + unsigned long start, end; + + start = offset; + end = start + vma->vm_end - vma->vm_start; + + if (start > end) { + gr_log_noargs(GR_DONT_AUDIT, GR_MEM_MMAP_MSG); + return -EPERM; + } + + /* allowed ranges : ISA I/O BIOS */ + if ((start >= __pa(high_memory)) +#ifdef CONFIG_X86 + || (start >= 0x000a0000 && end <= 0x00100000) + || (start >= 0x00000000 && end <= 0x00001000) +#endif + ) + return 0; + + if (vma->vm_flags & VM_WRITE) { + gr_log_noargs(GR_DONT_AUDIT, GR_MEM_MMAP_MSG); + return -EPERM; + } else + vma->vm_flags &= ~VM_MAYWRITE; + + return 0; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_mount.c linux-2.6.17.11/grsecurity/grsec_mount.c --- linux-2.6.17.11/grsecurity/grsec_mount.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_mount.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,34 @@ +#include +#include +#include +#include + +void +gr_log_remount(const char *devname, const int retval) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT + if (grsec_enable_mount && (retval >= 0)) + gr_log_str(GR_DO_AUDIT, GR_REMOUNT_AUDIT_MSG, devname ? devname : "none"); +#endif + return; +} + +void +gr_log_unmount(const char *devname, const int retval) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT + if (grsec_enable_mount && (retval >= 0)) + gr_log_str(GR_DO_AUDIT, GR_UNMOUNT_AUDIT_MSG, devname ? devname : "none"); +#endif + return; +} + +void +gr_log_mount(const char *from, const char *to, const int retval) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT + if (grsec_enable_mount && (retval >= 0)) + gr_log_str_str(GR_DO_AUDIT, GR_MOUNT_AUDIT_MSG, from, to); +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_rand.c linux-2.6.17.11/grsecurity/grsec_rand.c --- linux-2.6.17.11/grsecurity/grsec_rand.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_rand.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include + +extern int pid_max; + +int +gr_random_pid(void) +{ +#ifdef CONFIG_GRKERNSEC_RANDPID + int pid; + + if (grsec_enable_randpid && current->fs->root) { + /* return a pid in the range 1 ... pid_max - 1 + optimize this so we don't have to do a real division + */ + pid = 1 + (get_random_long() % pid_max); + if (pid == pid_max) + pid = pid_max - 1; + return pid; + } +#endif + return 0; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_sig.c linux-2.6.17.11/grsecurity/grsec_sig.c --- linux-2.6.17.11/grsecurity/grsec_sig.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_sig.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +void +gr_log_signal(const int sig, const struct task_struct *t) +{ +#ifdef CONFIG_GRKERNSEC_SIGNAL + if (grsec_enable_signal && ((sig == SIGSEGV) || (sig == SIGILL) || + (sig == SIGABRT) || (sig == SIGBUS))) { + if (t->pid == current->pid) { + gr_log_int(GR_DONT_AUDIT_GOOD, GR_UNISIGLOG_MSG, sig); + } else { + gr_log_sig(GR_DONT_AUDIT_GOOD, GR_DUALSIGLOG_MSG, t, sig); + } + } +#endif + return; +} + +int +gr_handle_signal(const struct task_struct *p, const int sig) +{ +#ifdef CONFIG_GRKERNSEC + if (current->pid > 1 && gr_check_protected_task(p)) { + gr_log_sig(GR_DONT_AUDIT, GR_SIG_ACL_MSG, p, sig); + return -EPERM; + } else if (gr_pid_is_chrooted((struct task_struct *)p)) { + return -EPERM; + } +#endif + return 0; +} + +void gr_handle_brute_attach(struct task_struct *p) +{ +#ifdef CONFIG_GRKERNSEC_BRUTE + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + if (p->parent && p->parent->exec_file == p->exec_file) + p->parent->brute = 1; + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); +#endif + return; +} + +void gr_handle_brute_check(void) +{ +#ifdef CONFIG_GRKERNSEC_BRUTE + if (current->brute) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(30 * HZ); + } +#endif + return; +} + diff -urNp linux-2.6.17.11/grsecurity/grsec_sock.c linux-2.6.17.11/grsecurity/grsec_sock.c --- linux-2.6.17.11/grsecurity/grsec_sock.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_sock.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,263 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_IP_NF_MATCH_STEALTH_MODULE) +extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); +EXPORT_SYMBOL(udp_v4_lookup); +#endif + +EXPORT_SYMBOL(gr_cap_rtnetlink); + +extern int gr_search_udp_recvmsg(const struct sock *sk, const struct sk_buff *skb); +extern int gr_search_udp_sendmsg(const struct sock *sk, const struct sockaddr_in *addr); + +EXPORT_SYMBOL(gr_search_udp_recvmsg); +EXPORT_SYMBOL(gr_search_udp_sendmsg); + +#ifdef CONFIG_UNIX_MODULE +EXPORT_SYMBOL(gr_acl_handle_unix); +EXPORT_SYMBOL(gr_acl_handle_mknod); +EXPORT_SYMBOL(gr_handle_chroot_unix); +EXPORT_SYMBOL(gr_handle_create); +#endif + +#ifdef CONFIG_GRKERNSEC +#define gr_conn_table_size 32749 +struct conn_table_entry { + struct conn_table_entry *next; + struct signal_struct *sig; +}; + +struct conn_table_entry *gr_conn_table[gr_conn_table_size]; +spinlock_t gr_conn_table_lock = SPIN_LOCK_UNLOCKED; + +extern const char * gr_socktype_to_name(unsigned char type); +extern const char * gr_proto_to_name(unsigned char proto); + +static __inline__ int +conn_hash(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport, unsigned int size) +{ + return ((daddr + saddr + (sport << 8) + (dport << 16)) % size); +} + +static __inline__ int +conn_match(const struct signal_struct *sig, __u32 saddr, __u32 daddr, + __u16 sport, __u16 dport) +{ + if (unlikely(sig->gr_saddr == saddr && sig->gr_daddr == daddr && + sig->gr_sport == sport && sig->gr_dport == dport)) + return 1; + else + return 0; +} + +static void gr_add_to_task_ip_table_nolock(struct signal_struct *sig, struct conn_table_entry *newent) +{ + struct conn_table_entry **match; + unsigned int index; + + index = conn_hash(sig->gr_saddr, sig->gr_daddr, + sig->gr_sport, sig->gr_dport, + gr_conn_table_size); + + newent->sig = sig; + + match = &gr_conn_table[index]; + newent->next = *match; + *match = newent; + + return; +} + +static void gr_del_task_from_ip_table_nolock(struct signal_struct *sig) +{ + struct conn_table_entry *match, *last = NULL; + unsigned int index; + + index = conn_hash(sig->gr_saddr, sig->gr_daddr, + sig->gr_sport, sig->gr_dport, + gr_conn_table_size); + + match = gr_conn_table[index]; + while (match && !conn_match(match->sig, + sig->gr_saddr, sig->gr_daddr, sig->gr_sport, + sig->gr_dport)) { + last = match; + match = match->next; + } + + if (match) { + if (last) + last->next = match->next; + else + gr_conn_table[index] = NULL; + kfree(match); + } + + return; +} + +static struct signal_struct * gr_lookup_task_ip_table(__u32 saddr, __u32 daddr, + __u16 sport, __u16 dport) +{ + struct conn_table_entry *match; + unsigned int index; + + index = conn_hash(saddr, daddr, sport, dport, gr_conn_table_size); + + match = gr_conn_table[index]; + while (match && !conn_match(match->sig, saddr, daddr, sport, dport)) + match = match->next; + + if (match) + return match->sig; + else + return NULL; +} + +#endif + +void gr_update_task_in_ip_table(struct task_struct *task, const struct inet_sock *inet) +{ +#ifdef CONFIG_GRKERNSEC + struct signal_struct *sig = task->signal; + struct conn_table_entry *newent; + + newent = kmalloc(sizeof(struct conn_table_entry), GFP_ATOMIC); + if (newent == NULL) + return; + /* no bh lock needed since we are called with bh disabled */ + spin_lock(&gr_conn_table_lock); + gr_del_task_from_ip_table_nolock(sig); + sig->gr_saddr = inet->rcv_saddr; + sig->gr_daddr = inet->daddr; + sig->gr_sport = inet->sport; + sig->gr_dport = inet->dport; + gr_add_to_task_ip_table_nolock(sig, newent); + spin_unlock(&gr_conn_table_lock); +#endif + return; +} + +void gr_del_task_from_ip_table(struct task_struct *task) +{ +#ifdef CONFIG_GRKERNSEC + spin_lock(&gr_conn_table_lock); + gr_del_task_from_ip_table_nolock(task->signal); + spin_unlock(&gr_conn_table_lock); +#endif + return; +} + +void +gr_attach_curr_ip(const struct sock *sk) +{ +#ifdef CONFIG_GRKERNSEC + struct signal_struct *p, *set; + const struct inet_sock *inet = inet_sk(sk); + + if (unlikely(sk->sk_protocol != IPPROTO_TCP)) + return; + + set = current->signal; + + spin_lock_bh(&gr_conn_table_lock); + p = gr_lookup_task_ip_table(inet->daddr, inet->rcv_saddr, + inet->dport, inet->sport); + if (unlikely(p != NULL)) { + set->curr_ip = p->curr_ip; + set->used_accept = 1; + gr_del_task_from_ip_table_nolock(p); + spin_unlock_bh(&gr_conn_table_lock); + return; + } + spin_unlock_bh(&gr_conn_table_lock); + + set->curr_ip = inet->daddr; + set->used_accept = 1; +#endif + return; +} + +int +gr_handle_sock_all(const int family, const int type, const int protocol) +{ +#ifdef CONFIG_GRKERNSEC_SOCKET_ALL + if (grsec_enable_socket_all && in_group_p(grsec_socket_all_gid) && + (family != AF_UNIX) && (family != AF_LOCAL)) { + gr_log_int_str2(GR_DONT_AUDIT, GR_SOCK2_MSG, family, gr_socktype_to_name(type), gr_proto_to_name(protocol)); + return -EACCES; + } +#endif + return 0; +} + +int +gr_handle_sock_server(const struct sockaddr *sck) +{ +#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER + if (grsec_enable_socket_server && + in_group_p(grsec_socket_server_gid) && + sck && (sck->sa_family != AF_UNIX) && + (sck->sa_family != AF_LOCAL)) { + gr_log_noargs(GR_DONT_AUDIT, GR_BIND_MSG); + return -EACCES; + } +#endif + return 0; +} + +int +gr_handle_sock_server_other(const struct sock *sck) +{ +#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER + if (grsec_enable_socket_server && + in_group_p(grsec_socket_server_gid) && + sck && (sck->sk_family != AF_UNIX) && + (sck->sk_family != AF_LOCAL)) { + gr_log_noargs(GR_DONT_AUDIT, GR_BIND_MSG); + return -EACCES; + } +#endif + return 0; +} + +int +gr_handle_sock_client(const struct sockaddr *sck) +{ +#ifdef CONFIG_GRKERNSEC_SOCKET_CLIENT + if (grsec_enable_socket_client && in_group_p(grsec_socket_client_gid) && + sck && (sck->sa_family != AF_UNIX) && + (sck->sa_family != AF_LOCAL)) { + gr_log_noargs(GR_DONT_AUDIT, GR_CONNECT_MSG); + return -EACCES; + } +#endif + return 0; +} + +__u32 +gr_cap_rtnetlink(void) +{ +#ifdef CONFIG_GRKERNSEC + if (!gr_acl_is_enabled()) + return current->cap_effective; + else if (cap_raised(current->cap_effective, CAP_NET_ADMIN) && + gr_task_is_capable(current, CAP_NET_ADMIN)) + return current->cap_effective; + else + return 0; +#else + return current->cap_effective; +#endif +} diff -urNp linux-2.6.17.11/grsecurity/grsec_sysctl.c linux-2.6.17.11/grsecurity/grsec_sysctl.c --- linux-2.6.17.11/grsecurity/grsec_sysctl.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_sysctl.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,466 @@ +#include +#include +#include +#include +#include + +#ifdef CONFIG_GRKERNSEC_MODSTOP +int grsec_modstop; +#endif + +int +gr_handle_sysctl_mod(const char *dirname, const char *name, const int op) +{ +#ifdef CONFIG_GRKERNSEC_SYSCTL + if (!strcmp(dirname, "grsecurity") && grsec_lock && (op & 002)) { + gr_log_str(GR_DONT_AUDIT, GR_SYSCTL_MSG, name); + return -EACCES; + } +#endif +#ifdef CONFIG_GRKERNSEC_MODSTOP + if (!strcmp(dirname, "grsecurity") && !strcmp(name, "disable_modules") && + grsec_modstop && (op & 002)) { + gr_log_str(GR_DONT_AUDIT, GR_SYSCTL_MSG, name); + return -EACCES; + } +#endif + return 0; +} + +#if defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_MODSTOP) +enum {GS_LINK=1, GS_FIFO, GS_EXECVE, GS_EXECLOG, GS_SIGNAL, +GS_FORKFAIL, GS_TIME, GS_CHROOT_SHMAT, GS_CHROOT_UNIX, GS_CHROOT_MNT, +GS_CHROOT_FCHDIR, GS_CHROOT_DBL, GS_CHROOT_PVT, GS_CHROOT_CD, GS_CHROOT_CM, +GS_CHROOT_MK, GS_CHROOT_NI, GS_CHROOT_EXECLOG, GS_CHROOT_CAPS, +GS_CHROOT_SYSCTL, GS_TPE, GS_TPE_GID, GS_TPE_ALL, GS_SIDCAPS, +GS_RANDPID, GS_SOCKET_ALL, GS_SOCKET_ALL_GID, GS_SOCKET_CLIENT, +GS_SOCKET_CLIENT_GID, GS_SOCKET_SERVER, GS_SOCKET_SERVER_GID, +GS_GROUP, GS_GID, GS_ACHDIR, GS_AMOUNT, GS_AIPC, GS_DMSG, +GS_TEXTREL, GS_FINDTASK, GS_SHM, GS_LOCK, GS_MODSTOP, GS_RESLOG}; + + +ctl_table grsecurity_table[] = { +#ifdef CONFIG_GRKERNSEC_SYSCTL +#ifdef CONFIG_GRKERNSEC_LINK + { + .ctl_name = GS_LINK, + .procname = "linking_restrictions", + .data = &grsec_enable_link, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_FIFO + { + .ctl_name = GS_FIFO, + .procname = "fifo_restrictions", + .data = &grsec_enable_fifo, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_EXECVE + { + .ctl_name = GS_EXECVE, + .procname = "execve_limiting", + .data = &grsec_enable_execve, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_EXECLOG + { + .ctl_name = GS_EXECLOG, + .procname = "exec_logging", + .data = &grsec_enable_execlog, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_SIGNAL + { + .ctl_name = GS_SIGNAL, + .procname = "signal_logging", + .data = &grsec_enable_signal, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_FORKFAIL + { + .ctl_name = GS_FORKFAIL, + .procname = "forkfail_logging", + .data = &grsec_enable_forkfail, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_TIME + { + .ctl_name = GS_TIME, + .procname = "timechange_logging", + .data = &grsec_enable_time, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_SHMAT + { + .ctl_name = GS_CHROOT_SHMAT, + .procname = "chroot_deny_shmat", + .data = &grsec_enable_chroot_shmat, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX + { + .ctl_name = GS_CHROOT_UNIX, + .procname = "chroot_deny_unix", + .data = &grsec_enable_chroot_unix, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_MOUNT + { + .ctl_name = GS_CHROOT_MNT, + .procname = "chroot_deny_mount", + .data = &grsec_enable_chroot_mount, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_FCHDIR + { + .ctl_name = GS_CHROOT_FCHDIR, + .procname = "chroot_deny_fchdir", + .data = &grsec_enable_chroot_fchdir, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_DOUBLE + { + .ctl_name = GS_CHROOT_DBL, + .procname = "chroot_deny_chroot", + .data = &grsec_enable_chroot_double, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_PIVOT + { + .ctl_name = GS_CHROOT_PVT, + .procname = "chroot_deny_pivot", + .data = &grsec_enable_chroot_pivot, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_CHDIR + { + .ctl_name = GS_CHROOT_CD, + .procname = "chroot_enforce_chdir", + .data = &grsec_enable_chroot_chdir, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_CHMOD + { + .ctl_name = GS_CHROOT_CM, + .procname = "chroot_deny_chmod", + .data = &grsec_enable_chroot_chmod, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_MKNOD + { + .ctl_name = GS_CHROOT_MK, + .procname = "chroot_deny_mknod", + .data = &grsec_enable_chroot_mknod, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_NICE + { + .ctl_name = GS_CHROOT_NI, + .procname = "chroot_restrict_nice", + .data = &grsec_enable_chroot_nice, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_EXECLOG + { + .ctl_name = GS_CHROOT_EXECLOG, + .procname = "chroot_execlog", + .data = &grsec_enable_chroot_execlog, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS + { + .ctl_name = GS_CHROOT_CAPS, + .procname = "chroot_caps", + .data = &grsec_enable_chroot_caps, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_SYSCTL + { + .ctl_name = GS_CHROOT_SYSCTL, + .procname = "chroot_deny_sysctl", + .data = &grsec_enable_chroot_sysctl, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_TPE + { + .ctl_name = GS_TPE, + .procname = "tpe", + .data = &grsec_enable_tpe, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = GS_TPE_GID, + .procname = "tpe_gid", + .data = &grsec_tpe_gid, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_TPE_ALL + { + .ctl_name = GS_TPE_ALL, + .procname = "tpe_restrict_all", + .data = &grsec_enable_tpe_all, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_RANDPID + { + .ctl_name = GS_RANDPID, + .procname = "rand_pids", + .data = &grsec_enable_randpid, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_SOCKET_ALL + { + .ctl_name = GS_SOCKET_ALL, + .procname = "socket_all", + .data = &grsec_enable_socket_all, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = GS_SOCKET_ALL_GID, + .procname = "socket_all_gid", + .data = &grsec_socket_all_gid, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_SOCKET_CLIENT + { + .ctl_name = GS_SOCKET_CLIENT, + .procname = "socket_client", + .data = &grsec_enable_socket_client, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = GS_SOCKET_CLIENT_GID, + .procname = "socket_client_gid", + .data = &grsec_socket_client_gid, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER + { + .ctl_name = GS_SOCKET_SERVER, + .procname = "socket_server", + .data = &grsec_enable_socket_server, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = GS_SOCKET_SERVER_GID, + .procname = "socket_server_gid", + .data = &grsec_socket_server_gid, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_GROUP + { + .ctl_name = GS_GROUP, + .procname = "audit_group", + .data = &grsec_enable_group, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = GS_GID, + .procname = "audit_gid", + .data = &grsec_audit_gid, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_CHDIR + { + .ctl_name = GS_ACHDIR, + .procname = "audit_chdir", + .data = &grsec_enable_chdir, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT + { + .ctl_name = GS_AMOUNT, + .procname = "audit_mount", + .data = &grsec_enable_mount, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_IPC + { + .ctl_name = GS_AIPC, + .procname = "audit_ipc", + .data = &grsec_enable_audit_ipc, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_AUDIT_TEXTREL + { + .ctl_name = GS_TEXTREL, + .procname = "audit_textrel", + .data = &grsec_enable_audit_textrel, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_DMESG + { + .ctl_name = GS_DMSG, + .procname = "dmesg", + .data = &grsec_enable_dmesg, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_CHROOT_FINDTASK + { + .ctl_name = GS_FINDTASK, + .procname = "chroot_findtask", + .data = &grsec_enable_chroot_findtask, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_SHM + { + .ctl_name = GS_SHM, + .procname = "destroy_unused_shm", + .data = &grsec_enable_shm, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_RESLOG + { + .ctl_name = GS_RESLOG, + .procname = "resource_logging", + .data = &grsec_resource_logging, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif + { + .ctl_name = GS_LOCK, + .procname = "grsec_lock", + .data = &grsec_lock, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_GRKERNSEC_MODSTOP + { + .ctl_name = GS_MODSTOP, + .procname = "disable_modules", + .data = &grsec_modstop, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, +#endif + { .ctl_name = 0 } +}; +#endif + +int gr_check_modstop(void) +{ +#ifdef CONFIG_GRKERNSEC_MODSTOP + if (grsec_modstop == 1) { + gr_log_noargs(GR_DONT_AUDIT, GR_STOPMOD_MSG); + return 1; + } +#endif + return 0; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_textrel.c linux-2.6.17.11/grsecurity/grsec_textrel.c --- linux-2.6.17.11/grsecurity/grsec_textrel.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_textrel.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,16 @@ +#include +#include +#include +#include +#include +#include + +void +gr_log_textrel(struct vm_area_struct * vma) +{ +#ifdef CONFIG_GRKERNSEC_AUDIT_TEXTREL + if (grsec_enable_audit_textrel) + gr_log_textrel_ulong_ulong(GR_DO_AUDIT, GR_TEXTREL_AUDIT_MSG, vma->vm_file, vma->vm_start, vma->vm_pgoff); +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_time.c linux-2.6.17.11/grsecurity/grsec_time.c --- linux-2.6.17.11/grsecurity/grsec_time.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_time.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,13 @@ +#include +#include +#include + +void +gr_log_timechange(void) +{ +#ifdef CONFIG_GRKERNSEC_TIME + if (grsec_enable_time) + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_TIME_MSG); +#endif + return; +} diff -urNp linux-2.6.17.11/grsecurity/grsec_tpe.c linux-2.6.17.11/grsecurity/grsec_tpe.c --- linux-2.6.17.11/grsecurity/grsec_tpe.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsec_tpe.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include + +extern int gr_acl_tpe_check(void); + +int +gr_tpe_allow(const struct file *file) +{ +#ifdef CONFIG_GRKERNSEC + struct inode *inode = file->f_dentry->d_parent->d_inode; + + if (current->uid && ((grsec_enable_tpe && +#ifdef CONFIG_GRKERNSEC_TPE_INVERT + !in_group_p(grsec_tpe_gid) +#else + in_group_p(grsec_tpe_gid) +#endif + ) || gr_acl_tpe_check()) && + (inode->i_uid || (!inode->i_uid && ((inode->i_mode & S_IWGRP) || + (inode->i_mode & S_IWOTH))))) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_EXEC_TPE_MSG, file->f_dentry, file->f_vfsmnt); + return 0; + } +#ifdef CONFIG_GRKERNSEC_TPE_ALL + if (current->uid && grsec_enable_tpe && grsec_enable_tpe_all && + ((inode->i_uid && (inode->i_uid != current->uid)) || + (inode->i_mode & S_IWGRP) || (inode->i_mode & S_IWOTH))) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_EXEC_TPE_MSG, file->f_dentry, file->f_vfsmnt); + return 0; + } +#endif +#endif + return 1; +} diff -urNp linux-2.6.17.11/grsecurity/grsum.c linux-2.6.17.11/grsecurity/grsum.c --- linux-2.6.17.11/grsecurity/grsum.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/grsum.c 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include +#include + + +#if !defined(CONFIG_CRYPTO) || defined(CONFIG_CRYPTO_MODULE) || !defined(CONFIG_CRYPTO_SHA256) || defined(CONFIG_CRYPTO_SHA256_MODULE) +#error "crypto and sha256 must be built into the kernel" +#endif + +int +chkpw(struct gr_arg *entry, unsigned char *salt, unsigned char *sum) +{ + char *p; + struct crypto_tfm *tfm; + unsigned char temp_sum[GR_SHA_LEN]; + struct scatterlist sg[2]; + volatile int retval = 0; + volatile int dummy = 0; + unsigned int i; + + tfm = crypto_alloc_tfm("sha256", 0); + if (tfm == NULL) { + /* should never happen, since sha256 should be built in */ + return 1; + } + + crypto_digest_init(tfm); + + p = salt; + sg[0].page = virt_to_page(p); + sg[0].offset = ((long) p & ~PAGE_MASK); + sg[0].length = GR_SALT_LEN; + + crypto_digest_update(tfm, sg, 1); + + p = entry->pw; + sg[0].page = virt_to_page(p); + sg[0].offset = ((long) p & ~PAGE_MASK); + sg[0].length = strlen(entry->pw); + + crypto_digest_update(tfm, sg, 1); + + crypto_digest_final(tfm, temp_sum); + + memset(entry->pw, 0, GR_PW_LEN); + + for (i = 0; i < GR_SHA_LEN; i++) + if (sum[i] != temp_sum[i]) + retval = 1; + else + dummy = 1; // waste a cycle + + crypto_free_tfm(tfm); + + return retval; +} diff -urNp linux-2.6.17.11/grsecurity/Kconfig linux-2.6.17.11/grsecurity/Kconfig --- linux-2.6.17.11/grsecurity/Kconfig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/Kconfig 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,890 @@ +# +# grecurity configuration +# + +menu "Grsecurity" + +config GRKERNSEC + bool "Grsecurity" + select CRYPTO + select CRYPTO_SHA256 + help + If you say Y here, you will be able to configure many features + that will enhance the security of your system. It is highly + recommended that you say Y here and read through the help + for each option so that you fully understand the features and + can evaluate their usefulness for your machine. + +choice + prompt "Security Level" + depends GRKERNSEC + default GRKERNSEC_CUSTOM + +config GRKERNSEC_LOW + bool "Low" + select GRKERNSEC_LINK + select GRKERNSEC_FIFO + select GRKERNSEC_RANDPID + select GRKERNSEC_EXECVE + select GRKERNSEC_RANDNET + select GRKERNSEC_DMESG + select GRKERNSEC_CHROOT_CHDIR + select GRKERNSEC_MODSTOP if (MODULES) + + help + If you choose this option, several of the grsecurity options will + be enabled that will give you greater protection against a number + of attacks, while assuring that none of your software will have any + conflicts with the additional security measures. If you run a lot + of unusual software, or you are having problems with the higher + security levels, you should say Y here. With this option, the + following features are enabled: + + - Linking restrictions + - FIFO restrictions + - Randomized PIDs + - Enforcing RLIMIT_NPROC on execve + - Restricted dmesg + - Enforced chdir("/") on chroot + - Runtime module disabling + +config GRKERNSEC_MEDIUM + bool "Medium" + select PAX + select PAX_EI_PAX + select PAX_PT_PAX_FLAGS + select PAX_HAVE_ACL_FLAGS + select GRKERNSEC_PROC_MEMMAP if (PAX_NOEXEC || PAX_ASLR) + select GRKERNSEC_CHROOT_SYSCTL + select GRKERNSEC_LINK + select GRKERNSEC_FIFO + select GRKERNSEC_RANDPID + select GRKERNSEC_EXECVE + select GRKERNSEC_DMESG + select GRKERNSEC_RANDNET + select GRKERNSEC_FORKFAIL + select GRKERNSEC_TIME + select GRKERNSEC_SIGNAL + select GRKERNSEC_CHROOT + select GRKERNSEC_CHROOT_UNIX + select GRKERNSEC_CHROOT_MOUNT + select GRKERNSEC_CHROOT_PIVOT + select GRKERNSEC_CHROOT_DOUBLE + select GRKERNSEC_CHROOT_CHDIR + select GRKERNSEC_CHROOT_MKNOD + select GRKERNSEC_PROC + select GRKERNSEC_PROC_USERGROUP + select GRKERNSEC_MODSTOP if (MODULES) + select PAX_RANDUSTACK + select PAX_ASLR + select PAX_RANDMMAP + select PAX_NOVSYSCALL if (X86 && !X86_64) + + help + If you say Y here, several features in addition to those included + in the low additional security level will be enabled. These + features provide even more security to your system, though in rare + cases they may be incompatible with very old or poorly written + software. If you enable this option, make sure that your auth + service (identd) is running as gid 1001. With this option, + the following features (in addition to those provided in the + low additional security level) will be enabled: + + - Randomized TCP source ports + - Failed fork logging + - Time change logging + - Signal logging + - Deny mounts in chroot + - Deny double chrooting + - Deny sysctl writes in chroot + - Deny mknod in chroot + - Deny access to abstract AF_UNIX sockets out of chroot + - Deny pivot_root in chroot + - Denied writes of /dev/kmem, /dev/mem, and /dev/port + - /proc restrictions with special GID set to 10 (usually wheel) + - Address Space Layout Randomization (ASLR) + +config GRKERNSEC_HIGH + bool "High" + select GRKERNSEC_LINK + select GRKERNSEC_FIFO + select GRKERNSEC_RANDPID + select GRKERNSEC_EXECVE + select GRKERNSEC_DMESG + select GRKERNSEC_FORKFAIL + select GRKERNSEC_TIME + select GRKERNSEC_SIGNAL + select GRKERNSEC_CHROOT_SHMAT + select GRKERNSEC_CHROOT_UNIX + select GRKERNSEC_CHROOT_MOUNT + select GRKERNSEC_CHROOT_FCHDIR + select GRKERNSEC_CHROOT_PIVOT + select GRKERNSEC_CHROOT_DOUBLE + select GRKERNSEC_CHROOT_CHDIR + select GRKERNSEC_CHROOT_MKNOD + select GRKERNSEC_CHROOT_CAPS + select GRKERNSEC_CHROOT_SYSCTL + select GRKERNSEC_CHROOT_FINDTASK + select GRKERNSEC_PROC + select GRKERNSEC_PROC_MEMMAP if (PAX_NOEXEC || PAX_ASLR) + select GRKERNSEC_HIDESYM + select GRKERNSEC_BRUTE + select GRKERNSEC_SHM if (SYSVIPC) + select GRKERNSEC_PROC_USERGROUP + select GRKERNSEC_KMEM + select GRKERNSEC_RESLOG + select GRKERNSEC_RANDNET + select GRKERNSEC_PROC_ADD + select GRKERNSEC_CHROOT_CHMOD + select GRKERNSEC_CHROOT_NICE + select GRKERNSEC_AUDIT_MOUNT + select GRKERNSEC_MODSTOP if (MODULES) + select PAX + select PAX_RANDUSTACK + select PAX_ASLR + select PAX_RANDMMAP + select PAX_NOEXEC + select PAX_MPROTECT + select PAX_EI_PAX + select PAX_PT_PAX_FLAGS + select PAX_HAVE_ACL_FLAGS + select PAX_KERNEXEC if (!X86_64 && !MODULES && !HOTPLUG_PCI_COMPAQ_NVRAM && !PCI_BIOS) + select PAX_RANDKSTACK if (X86_TSC && !X86_64) + select PAX_SEGMEXEC if (X86 && !X86_64) + select PAX_PAGEEXEC if (!X86) + select PAX_EMUPLT if (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) + select PAX_DLRESOLVE if (SPARC32 || SPARC64) + select PAX_SYSCALL if (PPC32) + select PAX_EMUTRAMP if (PARISC) + select PAX_EMUSIGRT if (PARISC) + select PAX_NOVSYSCALL if (X86 && !X86_64) + select PAX_ETEXECRELOCS if (ALPHA || IA64 || PARISC) + help + If you say Y here, many of the features of grsecurity will be + enabled, which will protect you against many kinds of attacks + against your system. The heightened security comes at a cost + of an increased chance of incompatibilities with rare software + on your machine. Since this security level enables PaX, you should + view and read about the PaX + project. While you are there, download chpax and run it on + binaries that cause problems with PaX. Also remember that + since the /proc restrictions are enabled, you must run your + identd as gid 1001. This security level enables the following + features in addition to those listed in the low and medium + security levels: + + - Additional /proc restrictions + - Chmod restrictions in chroot + - No signals, ptrace, or viewing of processes outside of chroot + - Capability restrictions in chroot + - Deny fchdir out of chroot + - Priority restrictions in chroot + - Segmentation-based implementation of PaX + - Mprotect restrictions + - Removal of addresses from /proc//[smaps|maps|stat] + - Kernel stack randomization + - Mount/unmount/remount logging + - Kernel symbol hiding + - Destroy unused shared memory + - Prevention of memory exhaustion-based exploits +config GRKERNSEC_CUSTOM + bool "Custom" + help + If you say Y here, you will be able to configure every grsecurity + option, which allows you to enable many more features that aren't + covered in the basic security levels. These additional features + include TPE, socket restrictions, and the sysctl system for + grsecurity. It is advised that you read through the help for + each option to determine its usefulness in your situation. + +endchoice + +menu "Address Space Protection" +depends on GRKERNSEC + +config GRKERNSEC_KMEM + bool "Deny writing to /dev/kmem, /dev/mem, and /dev/port" + help + If you say Y here, /dev/kmem and /dev/mem won't be allowed to + be written to via mmap or otherwise to modify the running kernel. + /dev/port will also not be allowed to be opened. If you have module + support disabled, enabling this will close up four ways that are + currently used to insert malicious code into the running kernel. + Even with all these features enabled, we still highly recommend that + you use the RBAC system, as it is still possible for an attacker to + modify the running kernel through privileged I/O granted by ioperm/iopl. + If you are not using XFree86, you may be able to stop this additional + case by enabling the 'Disable privileged I/O' option. Though nothing + legitimately writes to /dev/kmem, XFree86 does need to write to /dev/mem, + but only to video memory, which is the only writing we allow in this + case. If /dev/kmem or /dev/mem are mmaped without PROT_WRITE, they will + not be allowed to mprotect it with PROT_WRITE later. + It is highly recommended that you say Y here if you meet all the + conditions above. + +config GRKERNSEC_IO + bool "Disable privileged I/O" + depends on X86 + select RTC + help + If you say Y here, all ioperm and iopl calls will return an error. + Ioperm and iopl can be used to modify the running kernel. + Unfortunately, some programs need this access to operate properly, + the most notable of which are XFree86 and hwclock. hwclock can be + remedied by having RTC support in the kernel, so CONFIG_RTC is + enabled if this option is enabled, to ensure that hwclock operates + correctly. XFree86 still will not operate correctly with this option + enabled, so DO NOT CHOOSE Y IF YOU USE XFree86. If you use XFree86 + and you still want to protect your kernel against modification, + use the RBAC system. + +config GRKERNSEC_PROC_MEMMAP + bool "Remove addresses from /proc//[smaps|maps|stat]" + depends on PAX_NOEXEC || PAX_ASLR + help + If you say Y here, the /proc//maps and /proc//stat files will + give no information about the addresses of its mappings if + PaX features that rely on random addresses are enabled on the task. + If you use PaX it is greatly recommended that you say Y here as it + closes up a hole that makes the full ASLR useless for suid + binaries. + +config GRKERNSEC_BRUTE + bool "Deter exploit bruteforcing" + help + If you say Y here, attempts to bruteforce exploits against forking + daemons such as apache or sshd will be deterred. When a child of a + forking daemon is killed by PaX or crashes due to an illegal + instruction, the parent process will be delayed 30 seconds upon every + subsequent fork until the administrator is able to assess the + situation and restart the daemon. It is recommended that you also + enable signal logging in the auditing section so that logs are + generated when a process performs an illegal instruction. + +config GRKERNSEC_MODSTOP + bool "Runtime module disabling" + depends on MODULES + help + If you say Y here, you will be able to disable the ability to (un)load + modules at runtime. This feature is useful if you need the ability + to load kernel modules at boot time, but do not want to allow an + attacker to load a rootkit kernel module into the system, or to remove + a loaded kernel module important to system functioning. You should + enable the /dev/mem protection feature as well, since rootkits can be + inserted into the kernel via other methods than kernel modules. Since + an untrusted module could still be loaded by modifying init scripts and + rebooting the system, it is also recommended that you enable the RBAC + system. If you enable this option, a sysctl option with name + "disable_modules" will be created. Setting this option to "1" disables + module loading. After this option is set, no further writes to it are + allowed until the system is rebooted. + +config GRKERNSEC_HIDESYM + bool "Hide kernel symbols" + help + If you say Y here, getting information on loaded modules, and + displaying all kernel symbols through a syscall will be restricted + to users with CAP_SYS_MODULE. This option is only effective + provided the following conditions are met: + 1) The kernel using grsecurity is not precompiled by some distribution + 2) You are using the RBAC system and hiding other files such as your + kernel image and System.map + 3) You have the additional /proc restrictions enabled, which removes + /proc/kcore + If the above conditions are met, this option will aid to provide a + useful protection against local and remote kernel exploitation of + overflows and arbitrary read/write vulnerabilities. + +endmenu +menu "Role Based Access Control Options" +depends on GRKERNSEC + +config GRKERNSEC_ACL_HIDEKERN + bool "Hide kernel processes" + help + If you say Y here, all kernel threads will be hidden to all + processes but those whose subject has the "view hidden processes" + flag. + +config GRKERNSEC_ACL_MAXTRIES + int "Maximum tries before password lockout" + default 3 + help + This option enforces the maximum number of times a user can attempt + to authorize themselves with the grsecurity RBAC system before being + denied the ability to attempt authorization again for a specified time. + The lower the number, the harder it will be to brute-force a password. + +config GRKERNSEC_ACL_TIMEOUT + int "Time to wait after max password tries, in seconds" + default 30 + help + This option specifies the time the user must wait after attempting to + authorize to the RBAC system with the maximum number of invalid + passwords. The higher the number, the harder it will be to brute-force + a password. + +endmenu +menu "Filesystem Protections" +depends on GRKERNSEC + +config GRKERNSEC_PROC + bool "Proc restrictions" + help + If you say Y here, the permissions of the /proc filesystem + will be altered to enhance system security and privacy. You MUST + choose either a user only restriction or a user and group restriction. + Depending upon the option you choose, you can either restrict users to + see only the processes they themselves run, or choose a group that can + view all processes and files normally restricted to root if you choose + the "restrict to user only" option. NOTE: If you're running identd as + a non-root user, you will have to run it as the group you specify here. + +config GRKERNSEC_PROC_USER + bool "Restrict /proc to user only" + depends on GRKERNSEC_PROC + help + If you say Y here, non-root users will only be able to view their own + processes, and restricts them from viewing network-related information, + and viewing kernel symbol and module information. + +config GRKERNSEC_PROC_USERGROUP + bool "Allow special group" + depends on GRKERNSEC_PROC && !GRKERNSEC_PROC_USER + help + If you say Y here, you will be able to select a group that will be + able to view all processes, network-related information, and + kernel and symbol information. This option is useful if you want + to run identd as a non-root user. + +config GRKERNSEC_PROC_GID + int "GID for special group" + depends on GRKERNSEC_PROC_USERGROUP + default 1001 + +config GRKERNSEC_PROC_ADD + bool "Additional restrictions" + depends on GRKERNSEC_PROC_USER || GRKERNSEC_PROC_USERGROUP + help + If you say Y here, additional restrictions will be placed on + /proc that keep normal users from viewing device information and + slabinfo information that could be useful for exploits. + +config GRKERNSEC_LINK + bool "Linking restrictions" + help + If you say Y here, /tmp race exploits will be prevented, since users + will no longer be able to follow symlinks owned by other users in + world-writable +t directories (i.e. /tmp), unless the owner of the + symlink is the owner of the directory. users will also not be + able to hardlink to files they do not own. If the sysctl option is + enabled, a sysctl option with name "linking_restrictions" is created. + +config GRKERNSEC_FIFO + bool "FIFO restrictions" + help + If you say Y here, users will not be able to write to FIFOs they don't + own in world-writable +t directories (i.e. /tmp), unless the owner of + the FIFO is the same owner of the directory it's held in. If the sysctl + option is enabled, a sysctl option with name "fifo_restrictions" is + created. + +config GRKERNSEC_CHROOT + bool "Chroot jail restrictions" + help + If you say Y here, you will be able to choose several options that will + make breaking out of a chrooted jail much more difficult. If you + encounter no software incompatibilities with the following options, it + is recommended that you enable each one. + +config GRKERNSEC_CHROOT_MOUNT + bool "Deny mounts" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to + mount or remount filesystems. If the sysctl option is enabled, a + sysctl option with name "chroot_deny_mount" is created. + +config GRKERNSEC_CHROOT_DOUBLE + bool "Deny double-chroots" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to chroot + again outside the chroot. This is a widely used method of breaking + out of a chroot jail and should not be allowed. If the sysctl + option is enabled, a sysctl option with name + "chroot_deny_chroot" is created. + +config GRKERNSEC_CHROOT_PIVOT + bool "Deny pivot_root in chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to use + a function called pivot_root() that was introduced in Linux 2.3.41. It + works similar to chroot in that it changes the root filesystem. This + function could be misused in a chrooted process to attempt to break out + of the chroot, and therefore should not be allowed. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_pivot" is + created. + +config GRKERNSEC_CHROOT_CHDIR + bool "Enforce chdir(\"/\") on all chroots" + depends on GRKERNSEC_CHROOT + help + If you say Y here, the current working directory of all newly-chrooted + applications will be set to the the root directory of the chroot. + The man page on chroot(2) states: + Note that this call does not change the current working + directory, so that `.' can be outside the tree rooted at + `/'. In particular, the super-user can escape from a + `chroot jail' by doing `mkdir foo; chroot foo; cd ..'. + + It is recommended that you say Y here, since it's not known to break + any software. If the sysctl option is enabled, a sysctl option with + name "chroot_enforce_chdir" is created. + +config GRKERNSEC_CHROOT_CHMOD + bool "Deny (f)chmod +s" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to chmod + or fchmod files to make them have suid or sgid bits. This protects + against another published method of breaking a chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_chmod" is + created. + +config GRKERNSEC_CHROOT_FCHDIR + bool "Deny fchdir out of chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, a well-known method of breaking chroots by fchdir'ing + to a file descriptor of the chrooting process that points to a directory + outside the filesystem will be stopped. If the sysctl option + is enabled, a sysctl option with name "chroot_deny_fchdir" is created. + +config GRKERNSEC_CHROOT_MKNOD + bool "Deny mknod" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be allowed to + mknod. The problem with using mknod inside a chroot is that it + would allow an attacker to create a device entry that is the same + as one on the physical root of your system, which could range from + anything from the console device to a device for your harddrive (which + they could then use to wipe the drive or steal data). It is recommended + that you say Y here, unless you run into software incompatibilities. + If the sysctl option is enabled, a sysctl option with name + "chroot_deny_mknod" is created. + +config GRKERNSEC_CHROOT_SHMAT + bool "Deny shmat() out of chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to attach + to shared memory segments that were created outside of the chroot jail. + It is recommended that you say Y here. If the sysctl option is enabled, + a sysctl option with name "chroot_deny_shmat" is created. + +config GRKERNSEC_CHROOT_UNIX + bool "Deny access to abstract AF_UNIX sockets out of chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to + connect to abstract (meaning not belonging to a filesystem) Unix + domain sockets that were bound outside of a chroot. It is recommended + that you say Y here. If the sysctl option is enabled, a sysctl option + with name "chroot_deny_unix" is created. + +config GRKERNSEC_CHROOT_FINDTASK + bool "Protect outside processes" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to + kill, send signals with fcntl, ptrace, capget, setpgid, getpgid, + getsid, or view any process outside of the chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_findtask" is + created. + +config GRKERNSEC_CHROOT_NICE + bool "Restrict priority changes" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to raise + the priority of processes in the chroot, or alter the priority of + processes outside the chroot. This provides more security than simply + removing CAP_SYS_NICE from the process' capability set. If the + sysctl option is enabled, a sysctl option with name "chroot_restrict_nice" + is created. + +config GRKERNSEC_CHROOT_SYSCTL + bool "Deny sysctl writes" + depends on GRKERNSEC_CHROOT + help + If you say Y here, an attacker in a chroot will not be able to + write to sysctl entries, either by sysctl(2) or through a /proc + interface. It is strongly recommended that you say Y here. If the + sysctl option is enabled, a sysctl option with name + "chroot_deny_sysctl" is created. + +config GRKERNSEC_CHROOT_CAPS + bool "Capability restrictions" + depends on GRKERNSEC_CHROOT + help + If you say Y here, the capabilities on all root processes within a + chroot jail will be lowered to stop module insertion, raw i/o, + system and net admin tasks, rebooting the system, modifying immutable + files, modifying IPC owned by another, and changing the system time. + This is left an option because it can break some apps. Disable this + if your chrooted apps are having problems performing those kinds of + tasks. If the sysctl option is enabled, a sysctl option with + name "chroot_caps" is created. + +endmenu +menu "Kernel Auditing" +depends on GRKERNSEC + +config GRKERNSEC_AUDIT_GROUP + bool "Single group for auditing" + help + If you say Y here, the exec, chdir, (un)mount, and ipc logging features + will only operate on a group you specify. This option is recommended + if you only want to watch certain users instead of having a large + amount of logs from the entire system. If the sysctl option is enabled, + a sysctl option with name "audit_group" is created. + +config GRKERNSEC_AUDIT_GID + int "GID for auditing" + depends on GRKERNSEC_AUDIT_GROUP + default 1007 + +config GRKERNSEC_EXECLOG + bool "Exec logging" + help + If you say Y here, all execve() calls will be logged (since the + other exec*() calls are frontends to execve(), all execution + will be logged). Useful for shell-servers that like to keep track + of their users. If the sysctl option is enabled, a sysctl option with + name "exec_logging" is created. + WARNING: This option when enabled will produce a LOT of logs, especially + on an active system. + +config GRKERNSEC_RESLOG + bool "Resource logging" + help + If you say Y here, all attempts to overstep resource limits will + be logged with the resource name, the requested size, and the current + limit. It is highly recommended that you say Y here. If the sysctl + option is enabled, a sysctl option with name "resource_logging" is + created. If the RBAC system is enabled, the sysctl value is ignored. + +config GRKERNSEC_CHROOT_EXECLOG + bool "Log execs within chroot" + help + If you say Y here, all executions inside a chroot jail will be logged + to syslog. This can cause a large amount of logs if certain + applications (eg. djb's daemontools) are installed on the system, and + is therefore left as an option. If the sysctl option is enabled, a + sysctl option with name "chroot_execlog" is created. + +config GRKERNSEC_AUDIT_CHDIR + bool "Chdir logging" + help + If you say Y here, all chdir() calls will be logged. If the sysctl + option is enabled, a sysctl option with name "audit_chdir" is created. + +config GRKERNSEC_AUDIT_MOUNT + bool "(Un)Mount logging" + help + If you say Y here, all mounts and unmounts will be logged. If the + sysctl option is enabled, a sysctl option with name "audit_mount" is + created. + +config GRKERNSEC_AUDIT_IPC + bool "IPC logging" + help + If you say Y here, creation and removal of message queues, semaphores, + and shared memory will be logged. If the sysctl option is enabled, a + sysctl option with name "audit_ipc" is created. + +config GRKERNSEC_SIGNAL + bool "Signal logging" + help + If you say Y here, certain important signals will be logged, such as + SIGSEGV, which will as a result inform you of when a error in a program + occurred, which in some cases could mean a possible exploit attempt. + If the sysctl option is enabled, a sysctl option with name + "signal_logging" is created. + +config GRKERNSEC_FORKFAIL + bool "Fork failure logging" + help + If you say Y here, all failed fork() attempts will be logged. + This could suggest a fork bomb, or someone attempting to overstep + their process limit. If the sysctl option is enabled, a sysctl option + with name "forkfail_logging" is created. + +config GRKERNSEC_TIME + bool "Time change logging" + help + If you say Y here, any changes of the system clock will be logged. + If the sysctl option is enabled, a sysctl option with name + "timechange_logging" is created. + +config GRKERNSEC_PROC_IPADDR + bool "/proc//ipaddr support" + help + If you say Y here, a new entry will be added to each /proc/ + directory that contains the IP address of the person using the task. + The IP is carried across local TCP and AF_UNIX stream sockets. + This information can be useful for IDS/IPSes to perform remote response + to a local attack. The entry is readable by only the owner of the + process (and root if he has CAP_DAC_OVERRIDE, which can be removed via + the RBAC system), and thus does not create privacy concerns. + +config GRKERNSEC_AUDIT_TEXTREL + bool 'ELF text relocations logging (READ HELP)' + depends on PAX_MPROTECT + help + If you say Y here, text relocations will be logged with the filename + of the offending library or binary. The purpose of the feature is + to help Linux distribution developers get rid of libraries and + binaries that need text relocations which hinder the future progress + of PaX. Only Linux distribution developers should say Y here, and + never on a production machine, as this option creates an information + leak that could aid an attacker in defeating the randomization of + a single memory region. If the sysctl option is enabled, a sysctl + option with name "audit_textrel" is created. + +endmenu + +menu "Executable Protections" +depends on GRKERNSEC + +config GRKERNSEC_EXECVE + bool "Enforce RLIMIT_NPROC on execs" + help + If you say Y here, users with a resource limit on processes will + have the value checked during execve() calls. The current system + only checks the system limit during fork() calls. If the sysctl option + is enabled, a sysctl option with name "execve_limiting" is created. + +config GRKERNSEC_SHM + bool "Destroy unused shared memory" + depends on SYSVIPC + help + If you say Y here, shared memory will be destroyed when no one is + attached to it. Otherwise, resources involved with the shared + memory can be used up and not be associated with any process (as the + shared memory still exists, and the creating process has exited). If + the sysctl option is enabled, a sysctl option with name + "destroy_unused_shm" is created. + +config GRKERNSEC_DMESG + bool "Dmesg(8) restriction" + help + If you say Y here, non-root users will not be able to use dmesg(8) + to view up to the last 4kb of messages in the kernel's log buffer. + If the sysctl option is enabled, a sysctl option with name "dmesg" is + created. + +config GRKERNSEC_RANDPID + bool "Randomized PIDs" + help + If you say Y here, all PIDs created on the system will be + pseudo-randomly generated. This is extremely effective along + with the /proc restrictions to disallow an attacker from guessing + pids of daemons, etc. PIDs are also used in some cases as part + of a naming system for temporary files, so this option would keep + those filenames from being predicted as well. We also use code + to make sure that PID numbers aren't reused too soon. If the sysctl + option is enabled, a sysctl option with name "rand_pids" is created. + +config GRKERNSEC_TPE + bool "Trusted Path Execution (TPE)" + help + If you say Y here, you will be able to choose a gid to add to the + supplementary groups of users you want to mark as "untrusted." + These users will not be able to execute any files that are not in + root-owned directories writable only by root. If the sysctl option + is enabled, a sysctl option with name "tpe" is created. + +config GRKERNSEC_TPE_ALL + bool "Partially restrict non-root users" + depends on GRKERNSEC_TPE + help + If you say Y here, All non-root users other than the ones in the + group specified in the main TPE option will only be allowed to + execute files in directories they own that are not group or + world-writable, or in directories owned by root and writable only by + root. If the sysctl option is enabled, a sysctl option with name + "tpe_restrict_all" is created. + +config GRKERNSEC_TPE_INVERT + bool "Invert GID option" + depends on GRKERNSEC_TPE + help + If you say Y here, the group you specify in the TPE configuration will + decide what group TPE restrictions will be *disabled* for. This + option is useful if you want TPE restrictions to be applied to most + users on the system. + +config GRKERNSEC_TPE_GID + int "GID for untrusted users" + depends on GRKERNSEC_TPE && !GRKERNSEC_TPE_INVERT + default 1005 + help + If you have selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *disabled* for. + If you have not selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *enabled* for. + If the sysctl option is enabled, a sysctl option with name "tpe_gid" + is created. + +config GRKERNSEC_TPE_GID + int "GID for trusted users" + depends on GRKERNSEC_TPE && GRKERNSEC_TPE_INVERT + default 1005 + help + If you have selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *disabled* for. + If you have not selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *enabled* for. + If the sysctl option is enabled, a sysctl option with name "tpe_gid" + is created. + +endmenu +menu "Network Protections" +depends on GRKERNSEC + +config GRKERNSEC_RANDNET + bool "Larger entropy pools" + help + If you say Y here, the entropy pools used for many features of Linux + and grsecurity will be doubled in size. Since several grsecurity + features use additional randomness, it is recommended that you say Y + here. Saying Y here has a similar effect as modifying + /proc/sys/kernel/random/poolsize. + +config GRKERNSEC_SOCKET + bool "Socket restrictions" + help + If you say Y here, you will be able to choose from several options. + If you assign a GID on your system and add it to the supplementary + groups of users you want to restrict socket access to, this patch + will perform up to three things, based on the option(s) you choose. + +config GRKERNSEC_SOCKET_ALL + bool "Deny any sockets to group" + depends on GRKERNSEC_SOCKET + help + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine or run server + applications from your machine. If the sysctl option is enabled, a + sysctl option with name "socket_all" is created. + +config GRKERNSEC_SOCKET_ALL_GID + int "GID to deny all sockets for" + depends on GRKERNSEC_SOCKET_ALL + default 1004 + help + Here you can choose the GID to disable socket access for. Remember to + add the users you want socket access disabled for to the GID + specified here. If the sysctl option is enabled, a sysctl option + with name "socket_all_gid" is created. + +config GRKERNSEC_SOCKET_CLIENT + bool "Deny client sockets to group" + depends on GRKERNSEC_SOCKET + help + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine, but will be + able to run servers. If this option is enabled, all users in the group + you specify will have to use passive mode when initiating ftp transfers + from the shell on your machine. If the sysctl option is enabled, a + sysctl option with name "socket_client" is created. + +config GRKERNSEC_SOCKET_CLIENT_GID + int "GID to deny client sockets for" + depends on GRKERNSEC_SOCKET_CLIENT + default 1003 + help + Here you can choose the GID to disable client socket access for. + Remember to add the users you want client socket access disabled for to + the GID specified here. If the sysctl option is enabled, a sysctl + option with name "socket_client_gid" is created. + +config GRKERNSEC_SOCKET_SERVER + bool "Deny server sockets to group" + depends on GRKERNSEC_SOCKET + help + If you say Y here, you will be able to choose a GID of whose users will + be unable to run server applications from your machine. If the sysctl + option is enabled, a sysctl option with name "socket_server" is created. + +config GRKERNSEC_SOCKET_SERVER_GID + int "GID to deny server sockets for" + depends on GRKERNSEC_SOCKET_SERVER + default 1002 + help + Here you can choose the GID to disable server socket access for. + Remember to add the users you want server socket access disabled for to + the GID specified here. If the sysctl option is enabled, a sysctl + option with name "socket_server_gid" is created. + +endmenu +menu "Sysctl support" +depends on GRKERNSEC && SYSCTL + +config GRKERNSEC_SYSCTL + bool "Sysctl support" + help + If you say Y here, you will be able to change the options that + grsecurity runs with at bootup, without having to recompile your + kernel. You can echo values to files in /proc/sys/kernel/grsecurity + to enable (1) or disable (0) various features. All the sysctl entries + are mutable until the "grsec_lock" entry is set to a non-zero value. + All features enabled in the kernel configuration are disabled at boot + if you do not say Y to the "Turn on features by default" option. + All options should be set at startup, and the grsec_lock entry should + be set to a non-zero value after all the options are set. + *THIS IS EXTREMELY IMPORTANT* + +config GRKERNSEC_SYSCTL_ON + bool "Turn on features by default" + depends on GRKERNSEC_SYSCTL + help + If you say Y here, instead of having all features enabled in the + kernel configuration disabled at boot time, the features will be + enabled at boot time. It is recommended you say Y here unless + there is some reason you would want all sysctl-tunable features to + be disabled by default. As mentioned elsewhere, it is important + to enable the grsec_lock entry once you have finished modifying + the sysctl entries. + +endmenu +menu "Logging Options" +depends on GRKERNSEC + +config GRKERNSEC_FLOODTIME + int "Seconds in between log messages (minimum)" + default 10 + help + This option allows you to enforce the number of seconds between + grsecurity log messages. The default should be suitable for most + people, however, if you choose to change it, choose a value small enough + to allow informative logs to be produced, but large enough to + prevent flooding. + +config GRKERNSEC_FLOODBURST + int "Number of messages in a burst (maximum)" + default 4 + help + This option allows you to choose the maximum number of messages allowed + within the flood time interval you chose in a separate option. The + default should be suitable for most people, however if you find that + many of your logs are being interpreted as flooding, you may want to + raise this value. + +endmenu + +endmenu diff -urNp linux-2.6.17.11/grsecurity/Makefile linux-2.6.17.11/grsecurity/Makefile --- linux-2.6.17.11/grsecurity/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/grsecurity/Makefile 2006-09-01 16:20:28.000000000 -0400 @@ -0,0 +1,20 @@ +# grsecurity's ACL system was originally written in 2001 by Michael Dalton +# during 2001-2005 it has been completely redesigned by Brad Spengler +# into an RBAC system +# +# All code in this directory and various hooks inserted throughout the kernel +# are copyright Brad Spengler, and released under the GPL v2 or higher + +obj-y = grsec_chdir.o grsec_chroot.o grsec_exec.o grsec_fifo.o grsec_fork.o \ + grsec_mount.o grsec_rand.o grsec_sig.o grsec_sock.o grsec_sysctl.o \ + grsec_time.o grsec_tpe.o grsec_ipc.o grsec_link.o grsec_textrel.o + +obj-$(CONFIG_GRKERNSEC) += grsec_init.o grsum.o gracl.o gracl_ip.o gracl_segv.o \ + gracl_cap.o gracl_alloc.o gracl_shm.o grsec_mem.o gracl_fs.o \ + gracl_learn.o grsec_log.o +obj-$(CONFIG_GRKERNSEC_RESLOG) += gracl_res.o + +ifndef CONFIG_GRKERNSEC +obj-y += grsec_disabled.o +endif + diff -urNp linux-2.6.17.11/include/asm-alpha/a.out.h linux-2.6.17.11/include/asm-alpha/a.out.h --- linux-2.6.17.11/include/asm-alpha/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-alpha/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -98,7 +98,7 @@ struct exec set_personality (((BFPM->sh_bang || EX.ah.entry < 0x100000000L \ ? ADDR_LIMIT_32BIT : 0) | PER_OSF4)) -#define STACK_TOP \ +#define __STACK_TOP \ (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) #endif diff -urNp linux-2.6.17.11/include/asm-alpha/elf.h linux-2.6.17.11/include/asm-alpha/elf.h --- linux-2.6.17.11/include/asm-alpha/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-alpha/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -91,6 +91,17 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) ((tsk)->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->personality & ADDR_LIMIT_32BIT ? 14 : 28) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk)->personality & ADDR_LIMIT_32BIT ? 14 : 28) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->personality & ADDR_LIMIT_32BIT ? 14 : 19) +#endif + /* $0 is set by ld.so to a pointer to a function which might be registered using atexit. This provides a mean for the dynamic linker to call DT_FINI functions for shared libraries that have diff -urNp linux-2.6.17.11/include/asm-alpha/kmap_types.h linux-2.6.17.11/include/asm-alpha/kmap_types.h --- linux-2.6.17.11/include/asm-alpha/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-alpha/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -25,7 +25,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-alpha/page.h linux-2.6.17.11/include/asm-alpha/page.h --- linux-2.6.17.11/include/asm-alpha/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-alpha/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -95,6 +95,16 @@ typedef unsigned long pgprot_t; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #endif /* __KERNEL__ */ #include diff -urNp linux-2.6.17.11/include/asm-alpha/pgtable.h linux-2.6.17.11/include/asm-alpha/pgtable.h --- linux-2.6.17.11/include/asm-alpha/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-alpha/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -102,6 +102,17 @@ struct vm_area_struct; #define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS) #define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) #define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE) #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) diff -urNp linux-2.6.17.11/include/asm-arm/a.out.h linux-2.6.17.11/include/asm-arm/a.out.h --- linux-2.6.17.11/include/asm-arm/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-arm/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -28,7 +28,7 @@ struct exec #define M_ARM 103 #ifdef __KERNEL__ -#define STACK_TOP ((current->personality == PER_LINUX_32BIT) ? \ +#define __STACK_TOP ((current->personality == PER_LINUX_32BIT) ? \ TASK_SIZE : TASK_SIZE_26) #endif diff -urNp linux-2.6.17.11/include/asm-arm/elf.h linux-2.6.17.11/include/asm-arm/elf.h --- linux-2.6.17.11/include/asm-arm/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-arm/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -56,6 +56,17 @@ typedef struct user_fp elf_fpregset_t; #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) 0x00008000UL + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk->personality == PER_LINUX_32BIT) ? 16 : 10) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk->personality == PER_LINUX_32BIT) ? 16 : 10) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk->personality == PER_LINUX_32BIT) ? 16 : 10) +#endif + /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we have no such handler. */ diff -urNp linux-2.6.17.11/include/asm-arm/kmap_types.h linux-2.6.17.11/include/asm-arm/kmap_types.h --- linux-2.6.17.11/include/asm-arm/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-arm/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -18,6 +18,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-arm26/kmap_types.h linux-2.6.17.11/include/asm-arm26/kmap_types.h --- linux-2.6.17.11/include/asm-arm26/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-arm26/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -6,7 +6,8 @@ */ enum km_type { KM_IRQ0, - KM_USER1 + KM_USER1, + KM_CLEARPAGE }; #endif diff -urNp linux-2.6.17.11/include/asm-cris/kmap_types.h linux-2.6.17.11/include/asm-cris/kmap_types.h --- linux-2.6.17.11/include/asm-cris/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-cris/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -19,6 +19,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-frv/kmap_types.h linux-2.6.17.11/include/asm-frv/kmap_types.h --- linux-2.6.17.11/include/asm-frv/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-frv/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -23,6 +23,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-h8300/kmap_types.h linux-2.6.17.11/include/asm-h8300/kmap_types.h --- linux-2.6.17.11/include/asm-h8300/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-h8300/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -15,6 +15,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-i386/alternative.h linux-2.6.17.11/include/asm-i386/alternative.h --- linux-2.6.17.11/include/asm-i386/alternative.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/alternative.h 2006-09-01 16:20:29.000000000 -0400 @@ -47,7 +47,7 @@ extern void alternatives_smp_switch(int " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ + ".section .altinstr_replacement,\"a\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */\ ".previous" :: "i" (feature) : "memory") @@ -71,7 +71,7 @@ extern void alternatives_smp_switch(int " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ + ".section .altinstr_replacement,\"a\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */\ ".previous" :: "i" (feature), ##input) @@ -110,7 +110,7 @@ extern void alternatives_smp_switch(int " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ ".previous\n" \ - ".section .smp_altinstr_replacement,\"awx\"\n" \ + ".section .smp_altinstr_replacement,\"aw\"\n" \ "663:\n\t" upinstr "\n" /* replacement */ \ "664:\n\t.fill 662b-661b,1,0x42\n" /* space for original */ \ ".previous" : args) diff -urNp linux-2.6.17.11/include/asm-i386/a.out.h linux-2.6.17.11/include/asm-i386/a.out.h --- linux-2.6.17.11/include/asm-i386/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -19,7 +19,11 @@ struct exec #ifdef __KERNEL__ -#define STACK_TOP TASK_SIZE +#ifdef CONFIG_PAX_SEGMEXEC +#define __STACK_TOP ((current->mm->pax_flags & MF_PAX_SEGMEXEC)?TASK_SIZE/2:TASK_SIZE) +#else +#define __STACK_TOP TASK_SIZE +#endif #endif diff -urNp linux-2.6.17.11/include/asm-i386/auxvec.h linux-2.6.17.11/include/asm-i386/auxvec.h --- linux-2.6.17.11/include/asm-i386/auxvec.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/auxvec.h 2006-09-01 16:20:29.000000000 -0400 @@ -5,7 +5,9 @@ * Architecture-neutral AT_ values in 0-17, leave some room * for more of them, start the x86-specific ones at 32. */ +#ifndef CONFIG_PAX_NOVSYSCALL #define AT_SYSINFO 32 #define AT_SYSINFO_EHDR 33 +#endif #endif diff -urNp linux-2.6.17.11/include/asm-i386/bug.h linux-2.6.17.11/include/asm-i386/bug.h --- linux-2.6.17.11/include/asm-i386/bug.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/bug.h 2006-09-01 16:20:31.000000000 -0400 @@ -12,10 +12,9 @@ #ifdef CONFIG_BUG #define HAVE_ARCH_BUG #ifdef CONFIG_DEBUG_BUGVERBOSE -#define BUG() \ - __asm__ __volatile__( "ud2\n" \ - "\t.word %c0\n" \ - "\t.long %c1\n" \ +#define BUG() \ + __asm__ __volatile__( "ud2\n\t" \ + "ljmp %0, %1\n\t" \ : : "i" (__LINE__), "i" (__FILE__)) #else #define BUG() __asm__ __volatile__("ud2\n") diff -urNp linux-2.6.17.11/include/asm-i386/checksum.h linux-2.6.17.11/include/asm-i386/checksum.h --- linux-2.6.17.11/include/asm-i386/checksum.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/checksum.h 2006-09-01 16:20:29.000000000 -0400 @@ -30,6 +30,12 @@ asmlinkage unsigned int csum_partial(con asmlinkage unsigned int csum_partial_copy_generic(const unsigned char *src, unsigned char *dst, int len, int sum, int *src_err_ptr, int *dst_err_ptr); +asmlinkage unsigned int csum_partial_copy_generic_to_user(const unsigned char *src, unsigned char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr); + +asmlinkage unsigned int csum_partial_copy_generic_from_user(const unsigned char *src, unsigned char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr); + /* * Note: when you get a NULL pointer exception here this means someone * passed in an incorrect kernel address to one of these functions. @@ -49,7 +55,7 @@ unsigned int csum_partial_copy_from_user int len, int sum, int *err_ptr) { might_sleep(); - return csum_partial_copy_generic((__force unsigned char *)src, dst, + return csum_partial_copy_generic_from_user((__force unsigned char *)src, dst, len, sum, err_ptr, NULL); } @@ -183,7 +189,7 @@ static __inline__ unsigned int csum_and_ { might_sleep(); if (access_ok(VERIFY_WRITE, dst, len)) - return csum_partial_copy_generic(src, (__force unsigned char *)dst, len, sum, NULL, err_ptr); + return csum_partial_copy_generic_to_user(src, (__force unsigned char *)dst, len, sum, NULL, err_ptr); if (len) *err_ptr = -EFAULT; diff -urNp linux-2.6.17.11/include/asm-i386/desc.h linux-2.6.17.11/include/asm-i386/desc.h --- linux-2.6.17.11/include/asm-i386/desc.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/desc.h 2006-09-01 16:20:29.000000000 -0400 @@ -10,11 +10,13 @@ #include #include -#include +#include #include +#include +#include -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; +extern struct desc_struct cpu_gdt_table[NR_CPUS][PAGE_SIZE / sizeof(struct desc_struct)]; DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); @@ -24,13 +26,53 @@ struct Xgt_desc_struct { unsigned short pad; } __attribute__ ((packed)); -extern struct Xgt_desc_struct idt_descr; -DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); - +extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; + return cpu_gdt_table[cpu]; +} + +#define pax_open_kernel(cr0) \ +do { \ + typecheck(unsigned long,cr0); \ + preempt_disable(); \ + cr0 = read_cr0(); \ + write_cr0(cr0 & ~0x10000UL); \ +} while(0) + +#define pax_close_kernel(cr0) \ +do { \ + typecheck(unsigned long,cr0); \ + write_cr0(cr0); \ + preempt_enable_no_resched(); \ +} while(0) + +static inline void set_user_cs(struct mm_struct *mm, int cpu) +{ +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + unsigned long base = mm->context.user_cs_base; + unsigned long limit = mm->context.user_cs_limit; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + + if (likely(limit)) { + limit -= 1UL; + limit >>= 12; + } + + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS].a = (limit & 0xFFFFUL) | (base << 16); + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS].b = (limit & 0xF0000UL) | 0xC0FB00UL | (base & 0xFF000000UL) | ((base >> 16) & 0xFFUL); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + +#endif } #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) @@ -50,7 +92,7 @@ static inline struct desc_struct *get_cp * This is the ldt that every process will get unless we need * something other than this. */ -extern struct desc_struct default_ldt[]; +extern const struct desc_struct default_ldt[]; extern void set_intr_gate(unsigned int irq, void * addr); #define _set_tssldt_desc(n,addr,limit,type) \ @@ -64,7 +106,7 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\ "rorl $16,%1" \ : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type)) -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) { _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr, offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); @@ -72,11 +114,28 @@ static inline void __set_tss_desc(unsign #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) +static inline void __set_ldt_desc(unsigned int cpu, const void *addr, unsigned int size) { _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } +static inline void set_ldt_desc(unsigned int cpu, const void *addr, unsigned int size) +{ + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + + _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + +} + #define LDT_entry_a(info) \ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) @@ -90,7 +149,7 @@ static inline void set_ldt_desc(unsigned ((info)->seg_32bit << 22) | \ ((info)->limit_in_pages << 23) | \ ((info)->useable << 20) | \ - 0x7000) + 0x7100) #define LDT_empty(info) (\ (info)->base_addr == 0 && \ @@ -134,7 +193,7 @@ static inline void clear_LDT(void) */ static inline void load_LDT_nolock(mm_context_t *pc, int cpu) { - void *segments = pc->ldt; + const void *segments = pc->ldt; int count = pc->size; if (likely(!count)) { @@ -162,6 +221,22 @@ static inline unsigned long get_desc_bas return base; } +static inline void _load_LDT(mm_context_t *pc) +{ + int cpu = get_cpu(); + const void *segments = pc->ldt; + int count = pc->size; + + if (likely(!count)) { + segments = &default_ldt[0]; + count = 5; + } + + __set_ldt_desc(cpu, segments, count); + load_LDT_desc(); + put_cpu(); +} + #endif /* !__ASSEMBLY__ */ #endif diff -urNp linux-2.6.17.11/include/asm-i386/elf.h linux-2.6.17.11/include/asm-i386/elf.h --- linux-2.6.17.11/include/asm-i386/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -73,6 +73,17 @@ typedef struct user_fxsr_struct elf_fpxr #define ELF_ET_DYN_BASE ((TASK_UNMAPPED_BASE) * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) 0x10000000UL + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) 15 +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) +#endif + /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is now struct_user_regs, they are different) */ @@ -131,7 +146,14 @@ extern int dump_task_extended_fpu (struc #define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) #define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) + +#ifndef CONFIG_PAX_NOVSYSCALL +#ifdef CONFIG_PAX_SEGMEXEC +#define VSYSCALL_ENTRY ((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? (unsigned long) &__kernel_vsyscall - SEGMEXEC_TASK_SIZE : (unsigned long) &__kernel_vsyscall) +#else #define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) +#endif + extern void __kernel_vsyscall; #define ARCH_DLINFO \ @@ -187,3 +209,5 @@ do { \ #endif #endif + +#endif diff -urNp linux-2.6.17.11/include/asm-i386/i387.h linux-2.6.17.11/include/asm-i386/i387.h --- linux-2.6.17.11/include/asm-i386/i387.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/i387.h 2006-09-01 16:20:29.000000000 -0400 @@ -40,13 +40,8 @@ extern void kernel_fpu_begin(void); #define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) /* We need a safe address that is cheap to find and that is already - in L1 during context switch. The best choices are unfortunately - different for UP and SMP */ -#ifdef CONFIG_SMP -#define safe_address (__per_cpu_offset[0]) -#else -#define safe_address (kstat_cpu(0).cpustat.user) -#endif + in L1 during context switch. */ +#define safe_address (init_tss[smp_processor_id()].esp0) /* * These must be called with preempt disabled diff -urNp linux-2.6.17.11/include/asm-i386/kmap_types.h linux-2.6.17.11/include/asm-i386/kmap_types.h --- linux-2.6.17.11/include/asm-i386/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -23,7 +23,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-i386/mach-default/apm.h linux-2.6.17.11/include/asm-i386/mach-default/apm.h --- linux-2.6.17.11/include/asm-i386/mach-default/apm.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mach-default/apm.h 2006-09-01 16:20:29.000000000 -0400 @@ -36,7 +36,7 @@ static inline void apm_bios_call_asm(u32 __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall *%%cs:apm_bios_entry\n\t" + "lcall *%%ss:apm_bios_entry\n\t" "setc %%al\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" @@ -60,7 +60,7 @@ static inline u8 apm_bios_call_simple_as __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall *%%cs:apm_bios_entry\n\t" + "lcall *%%ss:apm_bios_entry\n\t" "setc %%bl\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" diff -urNp linux-2.6.17.11/include/asm-i386/mach-default/do_timer.h linux-2.6.17.11/include/asm-i386/mach-default/do_timer.h --- linux-2.6.17.11/include/asm-i386/mach-default/do_timer.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mach-default/do_timer.h 2006-09-01 16:20:29.000000000 -0400 @@ -18,7 +18,7 @@ static inline void do_timer_interrupt_ho { do_timer(regs); #ifndef CONFIG_SMP - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode(regs)); #endif /* * In the SMP case we use the local APIC timer interrupt to do the diff -urNp linux-2.6.17.11/include/asm-i386/mach-visws/do_timer.h linux-2.6.17.11/include/asm-i386/mach-visws/do_timer.h --- linux-2.6.17.11/include/asm-i386/mach-visws/do_timer.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mach-visws/do_timer.h 2006-09-01 16:20:29.000000000 -0400 @@ -11,7 +11,7 @@ static inline void do_timer_interrupt_ho do_timer(regs); #ifndef CONFIG_SMP - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode(regs)); #endif /* * In the SMP case we use the local APIC timer interrupt to do the diff -urNp linux-2.6.17.11/include/asm-i386/mach-voyager/do_timer.h linux-2.6.17.11/include/asm-i386/mach-voyager/do_timer.h --- linux-2.6.17.11/include/asm-i386/mach-voyager/do_timer.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mach-voyager/do_timer.h 2006-09-01 16:20:29.000000000 -0400 @@ -5,7 +5,7 @@ static inline void do_timer_interrupt_ho { do_timer(regs); #ifndef CONFIG_SMP - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode(regs)); #endif voyager_timer_interrupt(regs); diff -urNp linux-2.6.17.11/include/asm-i386/mman.h linux-2.6.17.11/include/asm-i386/mman.h --- linux-2.6.17.11/include/asm-i386/mman.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mman.h 2006-09-01 16:20:29.000000000 -0400 @@ -11,6 +11,10 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#ifdef CONFIG_PAX_SEGMEXEC +#define MAP_MIRROR 0x20000 +#endif + #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff -urNp linux-2.6.17.11/include/asm-i386/mmu_context.h linux-2.6.17.11/include/asm-i386/mmu_context.h --- linux-2.6.17.11/include/asm-i386/mmu_context.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mmu_context.h 2006-09-01 16:20:29.000000000 -0400 @@ -46,6 +46,18 @@ static inline void switch_mm(struct mm_s */ if (unlikely(prev->context.ldt != next->context.ldt)) load_LDT_nolock(&next->context, cpu); + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpu_clear(cpu, prev->context.cpu_user_cs_mask); + cpu_set(cpu, next->context.cpu_user_cs_mask); +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (prev->context.user_cs_base != next->context.user_cs_base || + prev->context.user_cs_limit != next->context.user_cs_limit) +#endif + + set_user_cs(next, cpu); } #ifdef CONFIG_SMP else { @@ -58,6 +70,12 @@ static inline void switch_mm(struct mm_s */ load_cr3(next->pgd); load_LDT_nolock(&next->context, cpu); + +#ifdef CONFIG_PAX_PAGEEXEC + cpu_set(cpu, next->context.cpu_user_cs_mask); +#endif + + set_user_cs(next, cpu); } } #endif diff -urNp linux-2.6.17.11/include/asm-i386/mmu.h linux-2.6.17.11/include/asm-i386/mmu.h --- linux-2.6.17.11/include/asm-i386/mmu.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/mmu.h 2006-09-01 16:20:29.000000000 -0400 @@ -12,6 +12,17 @@ typedef struct { int size; struct semaphore sem; void *ldt; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + unsigned long user_cs_base; + unsigned long user_cs_limit; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpumask_t cpu_user_cs_mask; +#endif + +#endif + } mm_context_t; #endif diff -urNp linux-2.6.17.11/include/asm-i386/module.h linux-2.6.17.11/include/asm-i386/module.h --- linux-2.6.17.11/include/asm-i386/module.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/module.h 2006-09-01 16:20:29.000000000 -0400 @@ -72,6 +72,12 @@ struct mod_arch_specific #define MODULE_STACKSIZE "" #endif -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE +#ifdef CONFIG_GRKERNSEC +#define MODULE_GRSEC "GRSECURITY " +#else +#define MODULE_GRSEC "" +#endif + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE MODULE_GRSEC #endif /* _ASM_I386_MODULE_H */ diff -urNp linux-2.6.17.11/include/asm-i386/page.h linux-2.6.17.11/include/asm-i386/page.h --- linux-2.6.17.11/include/asm-i386/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/page.h 2006-09-01 16:20:31.000000000 -0400 @@ -52,13 +52,14 @@ typedef struct { unsigned long long pgpr #define pmd_val(x) ((x).pmd) #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; }) #define HPAGE_SHIFT 21 #else typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; -#define boot_pte_t pte_t /* or would you rather have a typedef */ #define pte_val(x) ((x).pte_low) +#define __pte(x) ((pte_t) { (x) } ) #define HPAGE_SHIFT 22 #endif #define PTE_MASK PAGE_MASK @@ -73,7 +74,6 @@ typedef struct { unsigned long pgprot; } #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) -#define __pte(x) ((pte_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -118,6 +118,15 @@ extern int page_is_ram(unsigned long pag #endif #define __KERNEL_START (__PAGE_OFFSET + __PHYSICAL_START) +#ifdef CONFIG_PAX_KERNEXEC +#define __KERNEL_TEXT_OFFSET (__PAGE_OFFSET + ((__PHYSICAL_START + ~(4*1024*1024)) & (4*1024*1024))) +#ifndef __ASSEMBLY__ +extern unsigned char MODULES_VADDR[]; +extern unsigned char MODULES_END[]; +#endif +#else +#define __KERNEL_TEXT_OFFSET (0) +#endif #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) @@ -137,6 +146,19 @@ extern int page_is_ram(unsigned long pag ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & (MF_PAX_PAGEEXEC|MF_PAX_SEGMEXEC))?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & (MF_PAX_PAGEEXEC|MF_PAX_SEGMEXEC))?0:VM_EXEC)) +#endif +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +#define CONFIG_ARCH_TRACK_EXEC_LIMIT 1 +#endif + #endif /* __KERNEL__ */ #include diff -urNp linux-2.6.17.11/include/asm-i386/pgalloc.h linux-2.6.17.11/include/asm-i386/pgalloc.h --- linux-2.6.17.11/include/asm-i386/pgalloc.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/pgalloc.h 2006-09-01 16:20:29.000000000 -0400 @@ -3,11 +3,17 @@ #include #include +#include #include #include /* for struct page */ +#ifdef CONFIG_PAX_NOVSYSCALL +#define pmd_populate_kernel(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))) +#else #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) +#endif #define pmd_populate(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE + \ diff -urNp linux-2.6.17.11/include/asm-i386/pgtable.h linux-2.6.17.11/include/asm-i386/pgtable.h --- linux-2.6.17.11/include/asm-i386/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -34,7 +34,6 @@ struct vm_area_struct; */ #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) extern unsigned long empty_zero_page[1024]; -extern pgd_t swapper_pg_dir[1024]; extern kmem_cache_t *pgd_cache; extern kmem_cache_t *pmd_cache; extern spinlock_t pgd_lock; @@ -59,6 +58,11 @@ void paging_init(void); # include #endif +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +#ifdef CONFIG_X86_PAE +extern pmd_t swapper_pm_dir[PTRS_PER_PGD][PTRS_PER_PMD]; +#endif + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -68,9 +72,11 @@ void paging_init(void); #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) +#ifndef CONFIG_X86_PAE #define TWOLEVEL_PGDIR_SHIFT 22 #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) +#endif /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the @@ -141,17 +147,26 @@ void paging_init(void); #define PAGE_SHARED_EXEC \ __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC \ - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_COPY_EXEC \ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY \ - PAGE_COPY_NOEXEC #define PAGE_READONLY \ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_READONLY_EXEC \ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + +#define PAGE_COPY \ + PAGE_COPY_NOEXEC #define _PAGE_KERNEL \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) #define _PAGE_KERNEL_EXEC \ @@ -176,18 +191,18 @@ extern unsigned long long __PAGE_KERNEL, * This is the closest we can get.. */ #define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY +#define __P001 PAGE_READONLY_NOEXEC +#define __P010 PAGE_COPY_NOEXEC +#define __P011 PAGE_COPY_NOEXEC #define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC #define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED +#define __S001 PAGE_READONLY_NOEXEC +#define __S010 PAGE_SHARED_NOEXEC +#define __S011 PAGE_SHARED_NOEXEC #define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC @@ -431,6 +446,9 @@ extern void noexec_setup(const char *str #endif /* !__ASSEMBLY__ */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + #ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) #endif /* CONFIG_FLATMEM */ diff -urNp linux-2.6.17.11/include/asm-i386/processor.h linux-2.6.17.11/include/asm-i386/processor.h --- linux-2.6.17.11/include/asm-i386/processor.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/processor.h 2006-09-01 16:20:29.000000000 -0400 @@ -19,7 +19,6 @@ #include #include #include -#include #include /* flag for disabling the tsc */ @@ -94,8 +93,6 @@ struct cpuinfo_x86 { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; -extern struct tss_struct doublefault_tss; -DECLARE_PER_CPU(struct tss_struct, init_tss); #ifdef CONFIG_SMP extern struct cpuinfo_x86 cpu_data[]; @@ -325,10 +322,19 @@ extern int bootloader_type; */ #define TASK_SIZE (PAGE_OFFSET) +#ifdef CONFIG_PAX_SEGMEXEC +#define SEGMEXEC_TASK_SIZE ((PAGE_OFFSET) / 2) +#endif + /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ + +#ifdef CONFIG_PAX_SEGMEXEC +#define TASK_UNMAPPED_BASE (PAGE_ALIGN((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? SEGMEXEC_TASK_SIZE/3 : TASK_SIZE/3)) +#else #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) +#endif #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -444,6 +450,9 @@ struct tss_struct { #define ARCH_MIN_TASKALIGN 16 +extern struct tss_struct doublefault_tss; +extern struct tss_struct init_tss[NR_CPUS]; + struct thread_struct { /* cached TLS descriptors. */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -472,6 +481,7 @@ struct thread_struct { }; #define INIT_THREAD { \ + .esp0 = sizeof(init_stack) + (long)&init_stack - 8, \ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ @@ -484,7 +494,7 @@ struct thread_struct { * be within the limit. */ #define INIT_TSS { \ - .esp0 = sizeof(init_stack) + (long)&init_stack, \ + .esp0 = sizeof(init_stack) + (long)&init_stack - 8, \ .ss0 = __KERNEL_DS, \ .ss1 = __KERNEL_CS, \ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ @@ -560,11 +570,7 @@ void show_trace(struct task_struct *task unsigned long get_wchan(struct task_struct *p); #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) -#define KSTK_TOP(info) \ -({ \ - unsigned long *__ptr = (unsigned long *)(info); \ - (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ -}) +#define KSTK_TOP(info) ((info)->task.thread.esp0) /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. @@ -579,7 +585,7 @@ unsigned long get_wchan(struct task_stru #define task_pt_regs(task) \ ({ \ struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ + __regs__ = (struct pt_regs *)((task)->thread.esp0); \ __regs__ - 1; \ }) diff -urNp linux-2.6.17.11/include/asm-i386/ptrace.h linux-2.6.17.11/include/asm-i386/ptrace.h --- linux-2.6.17.11/include/asm-i386/ptrace.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/ptrace.h 2006-09-01 16:20:29.000000000 -0400 @@ -65,17 +65,18 @@ struct task_struct; extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); /* - * user_mode_vm(regs) determines whether a register set came from user mode. + * user_mode(regs) determines whether a register set came from user mode. * This is true if V8086 mode was enabled OR if the register set was from * protected mode with RPL-3 CS value. This tricky test checks that with * one comparison. Many places in the kernel can bypass this full check - * if they have already ruled out V8086 mode, so user_mode(regs) can be used. + * if they have already ruled out V8086 mode, so user_mode_novm(regs) can + * be used. */ -static inline int user_mode(struct pt_regs *regs) +static inline int user_mode_novm(struct pt_regs *regs) { return (regs->xcs & 3) != 0; } -static inline int user_mode_vm(struct pt_regs *regs) +static inline int user_mode(struct pt_regs *regs) { return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0; } diff -urNp linux-2.6.17.11/include/asm-i386/system.h linux-2.6.17.11/include/asm-i386/system.h --- linux-2.6.17.11/include/asm-i386/system.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/system.h 2006-09-01 16:20:29.000000000 -0400 @@ -5,6 +5,7 @@ #include #include #include +#include #include /* for LOCK_PREFIX */ #ifdef __KERNEL__ @@ -151,7 +152,7 @@ static inline unsigned long get_limit(un unsigned long __limit; __asm__("lsll %1,%0" :"=r" (__limit):"r" (segment)); - return __limit+1; + return __limit; } #define nop() __asm__ __volatile__ ("nop") @@ -496,7 +497,7 @@ static inline void sched_cacheflush(void wbinvd(); } -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) (x) extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); diff -urNp linux-2.6.17.11/include/asm-i386/uaccess.h linux-2.6.17.11/include/asm-i386/uaccess.h --- linux-2.6.17.11/include/asm-i386/uaccess.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-i386/uaccess.h 2006-09-01 16:20:29.000000000 -0400 @@ -10,6 +10,8 @@ #include #include #include +#include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -30,7 +32,8 @@ #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) +void __set_fs(mm_segment_t x, int cpu); +void set_fs(mm_segment_t x); #define segment_eq(a,b) ((a).seg == (b).seg) @@ -281,9 +284,12 @@ extern void __put_user_8(void); #define __put_user_u64(x, addr, err) \ __asm__ __volatile__( \ - "1: movl %%eax,0(%2)\n" \ - "2: movl %%edx,4(%2)\n" \ + " movw %w5,%%ds\n" \ + "1: movl %%eax,%%ds:0(%2)\n" \ + "2: movl %%edx,%%ds:4(%2)\n" \ "3:\n" \ + " pushl %%ss\n" \ + " popl %%ds\n" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ " jmp 3b\n" \ @@ -294,7 +300,8 @@ extern void __put_user_8(void); " .long 2b,4b\n" \ ".previous" \ : "=r"(err) \ - : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) + : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err), \ + "r"(__USER_DS)) #ifdef CONFIG_X86_WP_WORKS_OK @@ -333,8 +340,11 @@ struct __large_struct { unsigned long bu */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ __asm__ __volatile__( \ - "1: mov"itype" %"rtype"1,%2\n" \ + " movw %w5,%%ds\n" \ + "1: mov"itype" %"rtype"1,%%ds:%2\n" \ "2:\n" \ + " pushl %%ss\n" \ + " popl %%ds\n" \ ".section .fixup,\"ax\"\n" \ "3: movl %3,%0\n" \ " jmp 2b\n" \ @@ -344,7 +354,8 @@ struct __large_struct { unsigned long bu " .long 1b,3b\n" \ ".previous" \ : "=r"(err) \ - : ltype (x), "m"(__m(addr)), "i"(errret), "0"(err)) + : ltype (x), "m"(__m(addr)), "i"(errret), "0"(err), \ + "r"(__USER_DS)) #define __get_user_nocheck(x,ptr,size) \ @@ -372,8 +383,11 @@ do { \ #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ __asm__ __volatile__( \ - "1: mov"itype" %2,%"rtype"1\n" \ + " movw %w5,%%ds\n" \ + "1: mov"itype" %%ds:%2,%"rtype"1\n" \ "2:\n" \ + " pushl %%ss\n" \ + " popl %%ds\n" \ ".section .fixup,\"ax\"\n" \ "3: movl %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ @@ -384,7 +398,7 @@ do { \ " .long 1b,3b\n" \ ".previous" \ : "=r"(err), ltype (x) \ - : "m"(__m(addr)), "i"(errret), "0"(err)) + : "m"(__m(addr)), "i"(errret), "0"(err), "r"(__USER_DS)) unsigned long __must_check __copy_to_user_ll(void __user *to, diff -urNp linux-2.6.17.11/include/asm-ia64/elf.h linux-2.6.17.11/include/asm-ia64/elf.h --- linux-2.6.17.11/include/asm-ia64/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ia64/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -163,6 +163,16 @@ typedef elf_greg_t elf_gregset_t[ELF_NGR typedef struct ia64_fpreg elf_fpreg_t; typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) ((tsk)->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#endif struct pt_regs; /* forward declaration... */ diff -urNp linux-2.6.17.11/include/asm-ia64/kmap_types.h linux-2.6.17.11/include/asm-ia64/kmap_types.h --- linux-2.6.17.11/include/asm-ia64/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ia64/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -23,7 +23,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-ia64/page.h linux-2.6.17.11/include/asm-ia64/page.h --- linux-2.6.17.11/include/asm-ia64/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ia64/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -229,4 +229,13 @@ get_order (unsigned long size) (((current->personality & READ_IMPLIES_EXEC) != 0) \ ? VM_EXEC : 0)) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #endif /* _ASM_IA64_PAGE_H */ diff -urNp linux-2.6.17.11/include/asm-ia64/pgtable.h linux-2.6.17.11/include/asm-ia64/pgtable.h --- linux-2.6.17.11/include/asm-ia64/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ia64/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -144,6 +144,17 @@ #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) #define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) +# define PAGE_READONLY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +# define PAGE_COPY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_READONLY_NOEXEC PAGE_READONLY +# define PAGE_COPY_NOEXEC PAGE_COPY +#endif + #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) diff -urNp linux-2.6.17.11/include/asm-ia64/processor.h linux-2.6.17.11/include/asm-ia64/processor.h --- linux-2.6.17.11/include/asm-ia64/processor.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ia64/processor.h 2006-09-01 16:20:29.000000000 -0400 @@ -284,7 +284,7 @@ struct thread_struct { .on_ustack = 0, \ .ksp = 0, \ .map_base = DEFAULT_MAP_BASE, \ - .rbs_bot = STACK_TOP - DEFAULT_USER_STACK_SIZE, \ + .rbs_bot = __STACK_TOP - DEFAULT_USER_STACK_SIZE, \ .task_size = DEFAULT_TASK_SIZE, \ .last_fph_cpu = -1, \ INIT_THREAD_IA32 \ diff -urNp linux-2.6.17.11/include/asm-ia64/ustack.h linux-2.6.17.11/include/asm-ia64/ustack.h --- linux-2.6.17.11/include/asm-ia64/ustack.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ia64/ustack.h 2006-09-01 16:20:29.000000000 -0400 @@ -11,6 +11,6 @@ #define MAX_USER_STACK_SIZE (RGN_MAP_LIMIT/2) /* Make a default stack size of 2GB */ #define DEFAULT_USER_STACK_SIZE (1UL << 31) -#define STACK_TOP (0x6000000000000000UL + RGN_MAP_LIMIT) +#define __STACK_TOP (0x6000000000000000UL + RGN_MAP_LIMIT) #endif /* _ASM_IA64_USTACK_H */ diff -urNp linux-2.6.17.11/include/asm-m32r/kmap_types.h linux-2.6.17.11/include/asm-m32r/kmap_types.h --- linux-2.6.17.11/include/asm-m32r/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-m32r/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -25,7 +25,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-m68k/kmap_types.h linux-2.6.17.11/include/asm-m68k/kmap_types.h --- linux-2.6.17.11/include/asm-m68k/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-m68k/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -15,6 +15,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-m68knommu/kmap_types.h linux-2.6.17.11/include/asm-m68knommu/kmap_types.h --- linux-2.6.17.11/include/asm-m68knommu/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-m68knommu/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -15,6 +15,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-mips/a.out.h linux-2.6.17.11/include/asm-mips/a.out.h --- linux-2.6.17.11/include/asm-mips/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-mips/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -36,10 +36,10 @@ struct exec #ifdef __KERNEL__ #ifdef CONFIG_32BIT -#define STACK_TOP TASK_SIZE +#define __STACK_TOP TASK_SIZE #endif #ifdef CONFIG_64BIT -#define STACK_TOP (current->thread.mflags & MF_32BIT_ADDR ? TASK_SIZE32 : TASK_SIZE) +#define __STACK_TOP (current->thread.mflags & MF_32BIT_ADDR ? TASK_SIZE32 : TASK_SIZE) #endif #endif diff -urNp linux-2.6.17.11/include/asm-mips/elf.h linux-2.6.17.11/include/asm-mips/elf.h --- linux-2.6.17.11/include/asm-mips/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-mips/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -372,4 +372,15 @@ extern int dump_task_fpu(struct task_str #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) #endif +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #endif /* _ASM_ELF_H */ diff -urNp linux-2.6.17.11/include/asm-mips/kmap_types.h linux-2.6.17.11/include/asm-mips/kmap_types.h --- linux-2.6.17.11/include/asm-mips/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-mips/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -23,7 +23,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-mips/page.h linux-2.6.17.11/include/asm-mips/page.h --- linux-2.6.17.11/include/asm-mips/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-mips/page.h 2006-09-01 16:20:31.000000000 -0400 @@ -79,15 +79,17 @@ static inline void copy_user_page(void * #ifdef CONFIG_CPU_MIPS32 typedef struct { unsigned long pte_low, pte_high; } pte_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) + #define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; }) #else typedef struct { unsigned long long pte; } pte_t; #define pte_val(x) ((x).pte) + #define __pte(x) ((pte_t) { (x) } ) #endif #else typedef struct { unsigned long pte; } pte_t; #define pte_val(x) ((x).pte) -#endif #define __pte(x) ((pte_t) { (x) } ) +#endif /* * For 3-level pagetables we defines these ourselves, for 2-level the @@ -151,6 +153,15 @@ typedef struct { unsigned long pgprot; } #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #define UNCAC_ADDR(addr) ((addr) - PAGE_OFFSET + UNCAC_BASE) #define CAC_ADDR(addr) ((addr) - UNCAC_BASE + PAGE_OFFSET) diff -urNp linux-2.6.17.11/include/asm-parisc/a.out.h linux-2.6.17.11/include/asm-parisc/a.out.h --- linux-2.6.17.11/include/asm-parisc/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-parisc/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -22,7 +22,7 @@ struct exec /* XXX: STACK_TOP actually should be STACK_BOTTOM for parisc. * prumpf */ -#define STACK_TOP TASK_SIZE +#define __STACK_TOP TASK_SIZE #endif diff -urNp linux-2.6.17.11/include/asm-parisc/elf.h linux-2.6.17.11/include/asm-parisc/elf.h --- linux-2.6.17.11/include/asm-parisc/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-parisc/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -337,6 +337,17 @@ struct pt_regs; /* forward declaration.. #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) 0x10000UL + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) 16 +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) 16 +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) 16 +#endif + /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ diff -urNp linux-2.6.17.11/include/asm-parisc/kmap_types.h linux-2.6.17.11/include/asm-parisc/kmap_types.h --- linux-2.6.17.11/include/asm-parisc/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-parisc/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -23,7 +23,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-parisc/page.h linux-2.6.17.11/include/asm-parisc/page.h --- linux-2.6.17.11/include/asm-parisc/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-parisc/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -189,6 +189,15 @@ extern int npmem_ranges; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #endif /* __KERNEL__ */ #include diff -urNp linux-2.6.17.11/include/asm-parisc/pgtable.h linux-2.6.17.11/include/asm-parisc/pgtable.h --- linux-2.6.17.11/include/asm-parisc/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-parisc/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -220,6 +220,17 @@ extern void *vmalloc_start; #define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) #define PAGE_COPY PAGE_EXECREAD #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) diff -urNp linux-2.6.17.11/include/asm-powerpc/a.out.h linux-2.6.17.11/include/asm-powerpc/a.out.h --- linux-2.6.17.11/include/asm-powerpc/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-powerpc/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -23,12 +23,12 @@ struct exec #define STACK_TOP_USER64 TASK_SIZE_USER64 #define STACK_TOP_USER32 TASK_SIZE_USER32 -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ +#define __STACK_TOP (test_thread_flag(TIF_32BIT) ? \ STACK_TOP_USER32 : STACK_TOP_USER64) #else /* __powerpc64__ */ -#define STACK_TOP TASK_SIZE +#define __STACK_TOP TASK_SIZE #endif /* __powerpc64__ */ #endif /* __KERNEL__ */ diff -urNp linux-2.6.17.11/include/asm-powerpc/elf.h linux-2.6.17.11/include/asm-powerpc/elf.h --- linux-2.6.17.11/include/asm-powerpc/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-powerpc/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -176,6 +176,26 @@ typedef elf_vrreg_t elf_vrregset_t32[ELF #define ELF_ET_DYN_BASE (0x08000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (0x10000000UL) + +#ifdef __powerpc64__ +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (test_thread_flag(TIF_32BIT) ? 16 : 28) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (test_thread_flag(TIF_32BIT) ? 16 : 28) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (test_thread_flag(TIF_32BIT) ? 16 : 28) +#else +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) 15 +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) 15 +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) 15 +#endif +#endif + #ifdef __KERNEL__ /* Common routine for both 32-bit and 64-bit processes */ diff -urNp linux-2.6.17.11/include/asm-powerpc/kmap_types.h linux-2.6.17.11/include/asm-powerpc/kmap_types.h --- linux-2.6.17.11/include/asm-powerpc/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-powerpc/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -26,6 +26,7 @@ enum km_type { KM_SOFTIRQ1, KM_PPC_SYNC_PAGE, KM_PPC_SYNC_ICACHE, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-powerpc/page_64.h linux-2.6.17.11/include/asm-powerpc/page_64.h --- linux-2.6.17.11/include/asm-powerpc/page_64.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-powerpc/page_64.h 2006-09-01 16:20:29.000000000 -0400 @@ -170,6 +170,15 @@ extern unsigned int HPAGE_SHIFT; (test_thread_flag(TIF_32BIT) ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #include #endif /* __KERNEL__ */ diff -urNp linux-2.6.17.11/include/asm-ppc/page.h linux-2.6.17.11/include/asm-ppc/page.h --- linux-2.6.17.11/include/asm-ppc/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ppc/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -175,6 +175,15 @@ extern __inline__ int get_order(unsigned /* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */ #define __HAVE_ARCH_GATE_AREA 1 +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #include #endif /* __KERNEL__ */ #endif /* _PPC_PAGE_H */ diff -urNp linux-2.6.17.11/include/asm-ppc/pgtable.h linux-2.6.17.11/include/asm-ppc/pgtable.h --- linux-2.6.17.11/include/asm-ppc/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-ppc/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -441,11 +441,21 @@ extern unsigned long ioremap_bot, iorema #define PAGE_NONE __pgprot(_PAGE_BASE) #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC | _PAGE_HWEXEC) #define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC) + +#if defined(CONFIG_PAX_PAGEEXEC) && !defined(CONFIG_40x) && !defined(CONFIG_44x) +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_GUARDED) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_GUARDED) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_GUARDED) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif #define PAGE_KERNEL __pgprot(_PAGE_RAM) #define PAGE_KERNEL_NOCACHE __pgprot(_PAGE_IO) @@ -457,21 +467,21 @@ extern unsigned long ioremap_bot, iorema * This is the closest we can get.. */ #define __P000 PAGE_NONE -#define __P001 PAGE_READONLY_X -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY_X -#define __P100 PAGE_READONLY +#define __P001 PAGE_READONLY_NOEXEC +#define __P010 PAGE_COPY_NOEXEC +#define __P011 PAGE_COPY_NOEXEC +#define __P100 PAGE_READONLY_X #define __P101 PAGE_READONLY_X -#define __P110 PAGE_COPY +#define __P110 PAGE_COPY_X #define __P111 PAGE_COPY_X #define __S000 PAGE_NONE -#define __S001 PAGE_READONLY_X -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED_X -#define __S100 PAGE_READONLY +#define __S001 PAGE_READONLY_NOEXEC +#define __S010 PAGE_SHARED_NOEXEC +#define __S011 PAGE_SHARED_NOEXEC +#define __S100 PAGE_READONLY_X #define __S101 PAGE_READONLY_X -#define __S110 PAGE_SHARED +#define __S110 PAGE_SHARED_X #define __S111 PAGE_SHARED_X #ifndef __ASSEMBLY__ diff -urNp linux-2.6.17.11/include/asm-s390/kmap_types.h linux-2.6.17.11/include/asm-s390/kmap_types.h --- linux-2.6.17.11/include/asm-s390/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-s390/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -16,6 +16,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-sh/kmap_types.h linux-2.6.17.11/include/asm-sh/kmap_types.h --- linux-2.6.17.11/include/asm-sh/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sh/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -25,7 +25,8 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_CLEARPAGE, +D(14) KM_TYPE_NR }; #undef D diff -urNp linux-2.6.17.11/include/asm-sparc/a.out.h linux-2.6.17.11/include/asm-sparc/a.out.h --- linux-2.6.17.11/include/asm-sparc/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -91,7 +91,7 @@ struct relocation_info /* used when head #include -#define STACK_TOP (PAGE_OFFSET - PAGE_SIZE) +#define __STACK_TOP (PAGE_OFFSET - PAGE_SIZE) #endif /* __KERNEL__ */ diff -urNp linux-2.6.17.11/include/asm-sparc/elf.h linux-2.6.17.11/include/asm-sparc/elf.h --- linux-2.6.17.11/include/asm-sparc/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -145,6 +145,17 @@ typedef struct { #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) 0x10000UL + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) 16 +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) 16 +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) 16 +#endif + /* This yields a mask that user programs can use to figure out what instruction set this cpu supports. This can NOT be done in userspace on Sparc. */ diff -urNp linux-2.6.17.11/include/asm-sparc/kmap_types.h linux-2.6.17.11/include/asm-sparc/kmap_types.h --- linux-2.6.17.11/include/asm-sparc/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -15,6 +15,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-sparc/page.h linux-2.6.17.11/include/asm-sparc/page.h --- linux-2.6.17.11/include/asm-sparc/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -163,6 +163,15 @@ extern unsigned long pfn_base; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #endif /* __KERNEL__ */ #include diff -urNp linux-2.6.17.11/include/asm-sparc/pgtable.h linux-2.6.17.11/include/asm-sparc/pgtable.h --- linux-2.6.17.11/include/asm-sparc/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -50,6 +50,13 @@ BTFIXUPDEF_INT(page_none) BTFIXUPDEF_INT(page_shared) BTFIXUPDEF_INT(page_copy) BTFIXUPDEF_INT(page_readonly) + +#ifdef CONFIG_PAX_PAGEEXEC +BTFIXUPDEF_INT(page_shared_noexec) +BTFIXUPDEF_INT(page_copy_noexec) +BTFIXUPDEF_INT(page_readonly_noexec) +#endif + BTFIXUPDEF_INT(page_kernel) #define PMD_SHIFT SUN4C_PMD_SHIFT @@ -71,6 +78,16 @@ BTFIXUPDEF_INT(page_kernel) #define PAGE_COPY __pgprot(BTFIXUP_INT(page_copy)) #define PAGE_READONLY __pgprot(BTFIXUP_INT(page_readonly)) +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(BTFIXUP_INT(page_shared_noexec)) +# define PAGE_COPY_NOEXEC __pgprot(BTFIXUP_INT(page_copy_noexec)) +# define PAGE_READONLY_NOEXEC __pgprot(BTFIXUP_INT(page_readonly_noexec)) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + extern unsigned long page_kernel; #ifdef MODULE diff -urNp linux-2.6.17.11/include/asm-sparc/pgtsrmmu.h linux-2.6.17.11/include/asm-sparc/pgtsrmmu.h --- linux-2.6.17.11/include/asm-sparc/pgtsrmmu.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/pgtsrmmu.h 2006-09-01 16:20:29.000000000 -0400 @@ -115,6 +115,16 @@ SRMMU_EXEC | SRMMU_REF) #define SRMMU_PAGE_RDONLY __pgprot(SRMMU_VALID | SRMMU_CACHE | \ SRMMU_EXEC | SRMMU_REF) + +#ifdef CONFIG_PAX_PAGEEXEC +#define SRMMU_PAGE_SHARED_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | \ + SRMMU_WRITE | SRMMU_REF) +#define SRMMU_PAGE_COPY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | \ + SRMMU_REF) +#define SRMMU_PAGE_RDONLY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | \ + SRMMU_REF) +#endif + #define SRMMU_PAGE_KERNEL __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \ SRMMU_DIRTY | SRMMU_REF) diff -urNp linux-2.6.17.11/include/asm-sparc/uaccess.h linux-2.6.17.11/include/asm-sparc/uaccess.h --- linux-2.6.17.11/include/asm-sparc/uaccess.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc/uaccess.h 2006-09-01 16:20:29.000000000 -0400 @@ -41,7 +41,7 @@ * No one can read/write anything from userland in the kernel space by setting * large size and address near to PAGE_OFFSET - a fault will break his intentions. */ -#define __user_ok(addr, size) ({ (void)(size); (addr) < STACK_TOP; }) +#define __user_ok(addr, size) ({ (void)(size); (addr) < __STACK_TOP; }) #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) #define __access_ok(addr,size) (__user_ok((addr) & get_fs().seg,(size))) #define access_ok(type, addr, size) \ diff -urNp linux-2.6.17.11/include/asm-sparc64/a.out.h linux-2.6.17.11/include/asm-sparc64/a.out.h --- linux-2.6.17.11/include/asm-sparc64/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc64/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -98,7 +98,7 @@ struct relocation_info /* used when head #define STACK_TOP32 ((1UL << 32UL) - PAGE_SIZE) #define STACK_TOP64 (0x0000080000000000UL - (1UL << 32UL)) -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ +#define __STACK_TOP (test_thread_flag(TIF_32BIT) ? \ STACK_TOP32 : STACK_TOP64) #endif diff -urNp linux-2.6.17.11/include/asm-sparc64/elf.h linux-2.6.17.11/include/asm-sparc64/elf.h --- linux-2.6.17.11/include/asm-sparc64/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc64/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -142,6 +142,16 @@ typedef struct { #define ELF_ET_DYN_BASE 0x0000010000000000UL #endif +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (test_thread_flag(TIF_32BIT) ? 0x10000UL : 0x100000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) (PAGE_SHIFT + 1) +#define PAX_DELTA_MMAP_LEN(tsk) (test_thread_flag(TIF_32BIT) ? 14 : 28 ) +#define PAX_DELTA_EXEC_LSB(tsk) (PAGE_SHIFT + 1) +#define PAX_DELTA_EXEC_LEN(tsk) (test_thread_flag(TIF_32BIT) ? 14 : 28 ) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (test_thread_flag(TIF_32BIT) ? 15 : 29 ) +#endif /* This yields a mask that user programs can use to figure out what instruction set this cpu supports. */ diff -urNp linux-2.6.17.11/include/asm-sparc64/kmap_types.h linux-2.6.17.11/include/asm-sparc64/kmap_types.h --- linux-2.6.17.11/include/asm-sparc64/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc64/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -19,6 +19,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-sparc64/page.h linux-2.6.17.11/include/asm-sparc64/page.h --- linux-2.6.17.11/include/asm-sparc64/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-sparc64/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -144,6 +144,15 @@ typedef unsigned long pgprot_t; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #endif /* !(__KERNEL__) */ #include diff -urNp linux-2.6.17.11/include/asm-v850/kmap_types.h linux-2.6.17.11/include/asm-v850/kmap_types.h --- linux-2.6.17.11/include/asm-v850/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-v850/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -13,6 +13,7 @@ enum km_type { KM_PTE1, KM_IRQ0, KM_IRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-x86_64/a.out.h linux-2.6.17.11/include/asm-x86_64/a.out.h --- linux-2.6.17.11/include/asm-x86_64/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -21,7 +21,7 @@ struct exec #ifdef __KERNEL__ #include -#define STACK_TOP TASK_SIZE +#define __STACK_TOP TASK_SIZE #endif #endif /* __A_OUT_GNU_H__ */ diff -urNp linux-2.6.17.11/include/asm-x86_64/elf.h linux-2.6.17.11/include/asm-x86_64/elf.h --- linux-2.6.17.11/include/asm-x86_64/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -90,6 +90,17 @@ typedef struct user_i387_struct elf_fpre #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (test_thread_flag(TIF_IA32) ? 0x08048000UL : 0x400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (test_thread_flag(TIF_IA32) ? 16 : 32) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (test_thread_flag(TIF_IA32) ? 16 : 32) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (test_thread_flag(TIF_IA32) ? 16 : 32) +#endif + /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is now struct_user_regs, they are different). Assumes current is the process getting dumped. */ diff -urNp linux-2.6.17.11/include/asm-x86_64/ia32.h linux-2.6.17.11/include/asm-x86_64/ia32.h --- linux-2.6.17.11/include/asm-x86_64/ia32.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/ia32.h 2006-09-01 16:20:29.000000000 -0400 @@ -157,7 +157,13 @@ struct ustat32 { char f_fpack[6]; }; -#define IA32_STACK_TOP IA32_PAGE_OFFSET +#ifdef CONFIG_PAX_RANDUSTACK +#define IA32_DELTA_STACK (current->mm->delta_stack) +#else +#define IA32_DELTA_STACK 0UL +#endif + +#define IA32_STACK_TOP (IA32_PAGE_OFFSET - IA32_DELTA_STACK) #ifdef __KERNEL__ struct user_desc; diff -urNp linux-2.6.17.11/include/asm-x86_64/kmap_types.h linux-2.6.17.11/include/asm-x86_64/kmap_types.h --- linux-2.6.17.11/include/asm-x86_64/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -13,6 +13,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/asm-x86_64/page.h linux-2.6.17.11/include/asm-x86_64/page.h --- linux-2.6.17.11/include/asm-x86_64/page.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/page.h 2006-09-01 16:20:29.000000000 -0400 @@ -134,6 +134,15 @@ typedef struct { unsigned long pgprot; } (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_MPROTECT +#define __VM_STACK_FLAGS (((current->mm->pax_flags & MF_PAX_MPROTECT)?0:VM_MAYEXEC) | \ + ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#else +#define __VM_STACK_FLAGS (VM_MAYEXEC | ((current->mm->pax_flags & MF_PAX_PAGEEXEC)?0:VM_EXEC)) +#endif +#endif + #define __HAVE_ARCH_GATE_AREA 1 #endif /* __KERNEL__ */ diff -urNp linux-2.6.17.11/include/asm-x86_64/pgalloc.h linux-2.6.17.11/include/asm-x86_64/pgalloc.h --- linux-2.6.17.11/include/asm-x86_64/pgalloc.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/pgalloc.h 2006-09-01 16:20:29.000000000 -0400 @@ -7,7 +7,7 @@ #include #define pmd_populate_kernel(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(pte))) #define pud_populate(mm, pud, pmd) \ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))) #define pgd_populate(mm, pgd, pud) \ diff -urNp linux-2.6.17.11/include/asm-x86_64/pgtable.h linux-2.6.17.11/include/asm-x86_64/pgtable.h --- linux-2.6.17.11/include/asm-x86_64/pgtable.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/pgtable.h 2006-09-01 16:20:29.000000000 -0400 @@ -180,6 +180,10 @@ static inline pte_t ptep_get_and_clear_f #define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) + +#define PAGE_READONLY_NOEXEC PAGE_READONLY +#define PAGE_SHARED_NOEXEC PAGE_SHARED + #define __PAGE_KERNEL \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) #define __PAGE_KERNEL_EXEC \ @@ -268,7 +272,13 @@ static inline pte_t pfn_pte(unsigned lon #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) static inline int pte_user(pte_t pte) { return pte_val(pte) & _PAGE_USER; } static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; } -static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_USER; } +extern inline int pte_exec(pte_t pte) +{ + if (__supported_pte_mask & _PAGE_NX) + return pte_val(pte) & _PAGE_NX; + else + return pte_val(pte) & _PAGE_USER; +} static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } @@ -276,12 +286,26 @@ static inline int pte_file(pte_t pte) { static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } -static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } +extern inline pte_t pte_exprotect(pte_t pte) +{ + if (__supported_pte_mask & _PAGE_NX) + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_NX)); + else + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); + return pte; +} static inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } static inline pte_t pte_mkold(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; } static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; } static inline pte_t pte_mkread(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; } -static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; } +extern inline pte_t pte_mkexec(pte_t pte) +{ + if (__supported_pte_mask & _PAGE_NX) + set_pte(&pte, __pte(pte_val(pte) | _PAGE_NX)); + else + set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); + return pte; +} static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } diff -urNp linux-2.6.17.11/include/asm-x86_64/system.h linux-2.6.17.11/include/asm-x86_64/system.h --- linux-2.6.17.11/include/asm-x86_64/system.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-x86_64/system.h 2006-09-01 16:20:29.000000000 -0400 @@ -366,6 +366,6 @@ static inline unsigned long __cmpxchg(vo void cpu_idle_wait(void); -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) (x) #endif diff -urNp linux-2.6.17.11/include/asm-xtensa/kmap_types.h linux-2.6.17.11/include/asm-xtensa/kmap_types.h --- linux-2.6.17.11/include/asm-xtensa/kmap_types.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/asm-xtensa/kmap_types.h 2006-09-01 16:20:29.000000000 -0400 @@ -25,6 +25,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff -urNp linux-2.6.17.11/include/linux/a.out.h linux-2.6.17.11/include/linux/a.out.h --- linux-2.6.17.11/include/linux/a.out.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/a.out.h 2006-09-01 16:20:29.000000000 -0400 @@ -7,6 +7,16 @@ #include +#ifdef CONFIG_PAX_RANDUSTACK +#define __DELTA_STACK (current->mm->delta_stack) +#else +#define __DELTA_STACK 0UL +#endif + +#ifndef STACK_TOP +#define STACK_TOP (__STACK_TOP - __DELTA_STACK) +#endif + #endif /* __STRUCT_EXEC_OVERRIDE__ */ /* these go in the N_MACHTYPE field */ @@ -37,6 +47,14 @@ enum machine_type { M_MIPS2 = 152 /* MIPS R6000/R4000 binary */ }; +/* Constants for the N_FLAGS field */ +#define F_PAX_PAGEEXEC 1 /* Paging based non-executable pages */ +#define F_PAX_EMUTRAMP 2 /* Emulate trampolines */ +#define F_PAX_MPROTECT 4 /* Restrict mprotect() */ +#define F_PAX_RANDMMAP 8 /* Randomize mmap() base */ +/*#define F_PAX_RANDEXEC 16*/ /* Randomize ET_EXEC base */ +#define F_PAX_SEGMEXEC 32 /* Segmentation based non-executable pages */ + #if !defined (N_MAGIC) #define N_MAGIC(exec) ((exec).a_info & 0xffff) #endif diff -urNp linux-2.6.17.11/include/linux/binfmts.h linux-2.6.17.11/include/linux/binfmts.h --- linux-2.6.17.11/include/linux/binfmts.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/binfmts.h 2006-09-01 16:20:29.000000000 -0400 @@ -7,10 +7,10 @@ struct pt_regs; /* * MAX_ARG_PAGES defines the number of pages allocated for arguments - * and envelope for the new program. 32 should suffice, this gives - * a maximum env+arg of 128kB w/4KB pages! + * and envelope for the new program. 33 should suffice, this gives + * a maximum env+arg of 132kB w/4KB pages! */ -#define MAX_ARG_PAGES 32 +#define MAX_ARG_PAGES 33 /* sizeof(linux_binprm->buf) */ #define BINPRM_BUF_SIZE 128 @@ -38,6 +38,7 @@ struct linux_binprm{ unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; + int misc; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 @@ -87,5 +88,8 @@ extern void compute_creds(struct linux_b extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); extern int set_binfmt(struct linux_binfmt *new); +void pax_report_fault(struct pt_regs *regs, void *pc, void *sp); +void pax_report_insns(void *pc, void *sp); + #endif /* __KERNEL__ */ #endif /* _LINUX_BINFMTS_H */ diff -urNp linux-2.6.17.11/include/linux/capability.h linux-2.6.17.11/include/linux/capability.h --- linux-2.6.17.11/include/linux/capability.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/capability.h 2006-09-01 16:20:29.000000000 -0400 @@ -364,6 +364,7 @@ static inline kernel_cap_t cap_invert(ke #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK) int capable(int cap); +int capable_nolog(int cap); int __capable(struct task_struct *t, int cap); #endif /* __KERNEL__ */ diff -urNp linux-2.6.17.11/include/linux/elf.h linux-2.6.17.11/include/linux/elf.h --- linux-2.6.17.11/include/linux/elf.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/elf.h 2006-09-01 16:20:29.000000000 -0400 @@ -5,6 +5,10 @@ #include #include +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +#undef elf_read_implies_exec +#endif + #ifndef elf_read_implies_exec /* Executables for which elf_read_implies_exec() returns TRUE will have the READ_IMPLIES_EXEC personality flag set automatically. @@ -46,6 +50,16 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_PAX_FLAGS (PT_LOOS + 0x5041580) + +/* Constants for the e_flags field */ +#define EF_PAX_PAGEEXEC 1 /* Paging based non-executable pages */ +#define EF_PAX_EMUTRAMP 2 /* Emulate trampolines */ +#define EF_PAX_MPROTECT 4 /* Restrict mprotect() */ +#define EF_PAX_RANDMMAP 8 /* Randomize mmap() base */ +/*#define EF_PAX_RANDEXEC 16*/ /* Randomize ET_EXEC base */ +#define EF_PAX_SEGMEXEC 32 /* Segmentation based non-executable pages */ + /* These constants define the different elf file types */ #define ET_NONE 0 #define ET_REL 1 @@ -138,6 +152,8 @@ typedef __s64 Elf64_Sxword; #define DT_DEBUG 21 #define DT_TEXTREL 22 #define DT_JMPREL 23 +#define DT_FLAGS 30 + #define DF_TEXTREL 0x00000004 #define DT_LOPROC 0x70000000 #define DT_HIPROC 0x7fffffff @@ -267,6 +283,19 @@ typedef struct elf64_hdr { #define PF_W 0x2 #define PF_X 0x1 +#define PF_PAGEEXEC (1U << 4) /* Enable PAGEEXEC */ +#define PF_NOPAGEEXEC (1U << 5) /* Disable PAGEEXEC */ +#define PF_SEGMEXEC (1U << 6) /* Enable SEGMEXEC */ +#define PF_NOSEGMEXEC (1U << 7) /* Disable SEGMEXEC */ +#define PF_MPROTECT (1U << 8) /* Enable MPROTECT */ +#define PF_NOMPROTECT (1U << 9) /* Disable MPROTECT */ +/*#define PF_RANDEXEC (1U << 10)*/ /* Enable RANDEXEC */ +/*#define PF_NORANDEXEC (1U << 11)*/ /* Disable RANDEXEC */ +#define PF_EMUTRAMP (1U << 12) /* Enable EMUTRAMP */ +#define PF_NOEMUTRAMP (1U << 13) /* Disable EMUTRAMP */ +#define PF_RANDMMAP (1U << 14) /* Enable RANDMMAP */ +#define PF_NORANDMMAP (1U << 15) /* Disable RANDMMAP */ + typedef struct elf32_phdr{ Elf32_Word p_type; Elf32_Off p_offset; @@ -359,6 +388,8 @@ typedef struct elf64_shdr { #define EI_OSABI 7 #define EI_PAD 8 +#define EI_PAX 14 + #define ELFMAG0 0x7f /* EI_MAG */ #define ELFMAG1 'E' #define ELFMAG2 'L' @@ -415,6 +446,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elfhdr elf32_hdr #define elf_phdr elf32_phdr #define elf_note elf32_note +#define elf_dyn Elf32_Dyn #else @@ -422,6 +454,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elfhdr elf64_hdr #define elf_phdr elf64_phdr #define elf_note elf64_note +#define elf_dyn Elf64_Dyn #endif diff -urNp linux-2.6.17.11/include/linux/gracl.h linux-2.6.17.11/include/linux/gracl.h --- linux-2.6.17.11/include/linux/gracl.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/include/linux/gracl.h 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,316 @@ +#ifndef GR_ACL_H +#define GR_ACL_H + +#include +#include +#include +#include + +/* Major status information */ + +#define GR_VERSION "grsecurity 2.1.9" +#define GRSECURITY_VERSION 0x219 + +enum { + + SHUTDOWN = 0, + ENABLE = 1, + SPROLE = 2, + RELOAD = 3, + SEGVMOD = 4, + STATUS = 5, + UNSPROLE = 6, + PASSSET = 7, + SPROLEPAM = 8 +}; + +/* Password setup definitions + * kernel/grhash.c */ +enum { + GR_PW_LEN = 128, + GR_SALT_LEN = 16, + GR_SHA_LEN = 32, +}; + +enum { + GR_SPROLE_LEN = 64, +}; + +#define GR_NLIMITS (RLIMIT_LOCKS + 2) + +/* Begin Data Structures */ + +struct sprole_pw { + unsigned char *rolename; + unsigned char salt[GR_SALT_LEN]; + unsigned char sum[GR_SHA_LEN]; /* 256-bit SHA hash of the password */ +}; + +struct name_entry { + __u32 key; + ino_t inode; + dev_t device; + char *name; + __u16 len; + struct name_entry *prev; + struct name_entry *next; +}; + +struct inodev_entry { + struct name_entry *nentry; + struct inodev_entry *prev; + struct inodev_entry *next; +}; + +struct acl_role_db { + struct acl_role_label **r_hash; + __u32 r_size; +}; + +struct inodev_db { + struct inodev_entry **i_hash; + __u32 i_size; +}; + +struct name_db { + struct name_entry **n_hash; + __u32 n_size; +}; + +struct crash_uid { + uid_t uid; + unsigned long expires; +}; + +struct gr_hash_struct { + void **table; + void **nametable; + void *first; + __u32 table_size; + __u32 used_size; + int type; +}; + +/* Userspace Grsecurity ACL data structures */ + +struct acl_subject_label { + char *filename; + ino_t inode; + dev_t device; + __u32 mode; + __u32 cap_mask; + __u32 cap_lower; + + struct rlimit res[GR_NLIMITS]; + __u16 resmask; + + __u8 user_trans_type; + __u8 group_trans_type; + uid_t *user_transitions; + gid_t *group_transitions; + __u16 user_trans_num; + __u16 group_trans_num; + + __u32 ip_proto[8]; + __u32 ip_type; + struct acl_ip_label **ips; + __u32 ip_num; + + __u32 crashes; + unsigned long expires; + + struct acl_subject_label *parent_subject; + struct gr_hash_struct *hash; + struct acl_subject_label *prev; + struct acl_subject_label *next; + + struct acl_object_label **obj_hash; + __u32 obj_hash_size; + __u16 pax_flags; +}; + +struct role_allowed_ip { + __u32 addr; + __u32 netmask; + + struct role_allowed_ip *prev; + struct role_allowed_ip *next; +}; + +struct role_transition { + char *rolename; + + struct role_transition *prev; + struct role_transition *next; +}; + +struct acl_role_label { + char *rolename; + uid_t uidgid; + __u16 roletype; + + __u16 auth_attempts; + unsigned long expires; + + struct acl_subject_label *root_label; + struct gr_hash_struct *hash; + + struct acl_role_label *prev; + struct acl_role_label *next; + + struct role_transition *transitions; + struct role_allowed_ip *allowed_ips; + uid_t *domain_children; + __u16 domain_child_num; + + struct acl_subject_label **subj_hash; + __u32 subj_hash_size; +}; + +struct user_acl_role_db { + struct acl_role_label **r_table; + __u32 num_pointers; /* Number of allocations to track */ + __u32 num_roles; /* Number of roles */ + __u32 num_domain_children; /* Number of domain children */ + __u32 num_subjects; /* Number of subjects */ + __u32 num_objects; /* Number of objects */ +}; + +struct acl_object_label { + char *filename; + ino_t inode; + dev_t device; + __u32 mode; + + struct acl_subject_label *nested; + struct acl_object_label *globbed; + + /* next two structures not used */ + + struct acl_object_label *prev; + struct acl_object_label *next; +}; + +struct acl_ip_label { + char *iface; + __u32 addr; + __u32 netmask; + __u16 low, high; + __u8 mode; + __u32 type; + __u32 proto[8]; + + /* next two structures not used */ + + struct acl_ip_label *prev; + struct acl_ip_label *next; +}; + +struct gr_arg { + struct user_acl_role_db role_db; + unsigned char pw[GR_PW_LEN]; + unsigned char salt[GR_SALT_LEN]; + unsigned char sum[GR_SHA_LEN]; + unsigned char sp_role[GR_SPROLE_LEN]; + struct sprole_pw *sprole_pws; + dev_t segv_device; + ino_t segv_inode; + uid_t segv_uid; + __u16 num_sprole_pws; + __u16 mode; +}; + +struct gr_arg_wrapper { + struct gr_arg *arg; + __u32 version; + __u32 size; +}; + +struct subject_map { + struct acl_subject_label *user; + struct acl_subject_label *kernel; + struct subject_map *prev; + struct subject_map *next; +}; + +struct acl_subj_map_db { + struct subject_map **s_hash; + __u32 s_size; +}; + +/* End Data Structures Section */ + +/* Hash functions generated by empirical testing by Brad Spengler + Makes good use of the low bits of the inode. Generally 0-1 times + in loop for successful match. 0-3 for unsuccessful match. + Shift/add algorithm with modulus of table size and an XOR*/ + +static __inline__ unsigned int +rhash(const uid_t uid, const __u16 type, const unsigned int sz) +{ + return (((uid << type) + (uid ^ type)) % sz); +} + + static __inline__ unsigned int +shash(const struct acl_subject_label *userp, const unsigned int sz) +{ + return ((const unsigned long)userp % sz); +} + +static __inline__ unsigned int +fhash(const ino_t ino, const dev_t dev, const unsigned int sz) +{ + return (((ino + dev) ^ ((ino << 13) + (ino << 23) + (dev << 9))) % sz); +} + +static __inline__ unsigned int +nhash(const char *name, const __u16 len, const unsigned int sz) +{ + return full_name_hash(name, len) % sz; +} + +#define FOR_EACH_ROLE_START(role,iter) \ + role = NULL; \ + iter = 0; \ + while (iter < acl_role_set.r_size) { \ + if (role == NULL) \ + role = acl_role_set.r_hash[iter]; \ + if (role == NULL) { \ + iter++; \ + continue; \ + } + +#define FOR_EACH_ROLE_END(role,iter) \ + role = role->next; \ + if (role == NULL) \ + iter++; \ + } + +#define FOR_EACH_SUBJECT_START(role,subj,iter) \ + subj = NULL; \ + iter = 0; \ + while (iter < role->subj_hash_size) { \ + if (subj == NULL) \ + subj = role->subj_hash[iter]; \ + if (subj == NULL) { \ + iter++; \ + continue; \ + } + +#define FOR_EACH_SUBJECT_END(subj,iter) \ + subj = subj->next; \ + if (subj == NULL) \ + iter++; \ + } + + +#define FOR_EACH_NESTED_SUBJECT_START(role,subj) \ + subj = role->hash->first; \ + while (subj != NULL) { + +#define FOR_EACH_NESTED_SUBJECT_END(subj) \ + subj = subj->next; \ + } + +#endif + diff -urNp linux-2.6.17.11/include/linux/gralloc.h linux-2.6.17.11/include/linux/gralloc.h --- linux-2.6.17.11/include/linux/gralloc.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/include/linux/gralloc.h 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,8 @@ +#ifndef __GRALLOC_H +#define __GRALLOC_H + +void acl_free_all(void); +int acl_alloc_stack_init(unsigned long size); +void *acl_alloc(unsigned long len); + +#endif diff -urNp linux-2.6.17.11/include/linux/grdefs.h linux-2.6.17.11/include/linux/grdefs.h --- linux-2.6.17.11/include/linux/grdefs.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/include/linux/grdefs.h 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,131 @@ +#ifndef GRDEFS_H +#define GRDEFS_H + +/* Begin grsecurity status declarations */ + +enum { + GR_READY = 0x01, + GR_STATUS_INIT = 0x00 // disabled state +}; + +/* Begin ACL declarations */ + +/* Role flags */ + +enum { + GR_ROLE_USER = 0x0001, + GR_ROLE_GROUP = 0x0002, + GR_ROLE_DEFAULT = 0x0004, + GR_ROLE_SPECIAL = 0x0008, + GR_ROLE_AUTH = 0x0010, + GR_ROLE_NOPW = 0x0020, + GR_ROLE_GOD = 0x0040, + GR_ROLE_LEARN = 0x0080, + GR_ROLE_TPE = 0x0100, + GR_ROLE_DOMAIN = 0x0200, + GR_ROLE_PAM = 0x0400 +}; + +/* ACL Subject and Object mode flags */ +enum { + GR_DELETED = 0x80000000 +}; + +/* ACL Object-only mode flags */ +enum { + GR_READ = 0x00000001, + GR_APPEND = 0x00000002, + GR_WRITE = 0x00000004, + GR_EXEC = 0x00000008, + GR_FIND = 0x00000010, + GR_INHERIT = 0x00000020, + GR_SETID = 0x00000040, + GR_CREATE = 0x00000080, + GR_DELETE = 0x00000100, + GR_LINK = 0x00000200, + GR_AUDIT_READ = 0x00000400, + GR_AUDIT_APPEND = 0x00000800, + GR_AUDIT_WRITE = 0x00001000, + GR_AUDIT_EXEC = 0x00002000, + GR_AUDIT_FIND = 0x00004000, + GR_AUDIT_INHERIT= 0x00008000, + GR_AUDIT_SETID = 0x00010000, + GR_AUDIT_CREATE = 0x00020000, + GR_AUDIT_DELETE = 0x00040000, + GR_AUDIT_LINK = 0x00080000, + GR_PTRACERD = 0x00100000, + GR_NOPTRACE = 0x00200000, + GR_SUPPRESS = 0x00400000, + GR_NOLEARN = 0x00800000 +}; + +#define GR_AUDITS (GR_AUDIT_READ | GR_AUDIT_WRITE | GR_AUDIT_APPEND | GR_AUDIT_EXEC | \ + GR_AUDIT_FIND | GR_AUDIT_INHERIT | GR_AUDIT_SETID | \ + GR_AUDIT_CREATE | GR_AUDIT_DELETE | GR_AUDIT_LINK) + +/* ACL subject-only mode flags */ +enum { + GR_KILL = 0x00000001, + GR_VIEW = 0x00000002, + GR_PROTECTED = 0x00000004, + GR_LEARN = 0x00000008, + GR_OVERRIDE = 0x00000010, + /* just a placeholder, this mode is only used in userspace */ + GR_DUMMY = 0x00000020, + GR_PROTSHM = 0x00000040, + GR_KILLPROC = 0x00000080, + GR_KILLIPPROC = 0x00000100, + /* just a placeholder, this mode is only used in userspace */ + GR_NOTROJAN = 0x00000200, + GR_PROTPROCFD = 0x00000400, + GR_PROCACCT = 0x00000800, + GR_RELAXPTRACE = 0x00001000, + GR_NESTED = 0x00002000, + GR_INHERITLEARN = 0x00004000, + GR_PROCFIND = 0x00008000, + GR_POVERRIDE = 0x00010000, + GR_KERNELAUTH = 0x00020000, +}; + +enum { + GR_PAX_ENABLE_SEGMEXEC = 0x0001, + GR_PAX_ENABLE_PAGEEXEC = 0x0002, + GR_PAX_ENABLE_MPROTECT = 0x0004, + GR_PAX_ENABLE_RANDMMAP = 0x0008, + GR_PAX_ENABLE_EMUTRAMP = 0x0010, + GR_PAX_DISABLE_SEGMEXEC = 0x8001, + GR_PAX_DISABLE_PAGEEXEC = 0x8002, + GR_PAX_DISABLE_MPROTECT = 0x8004, + GR_PAX_DISABLE_RANDMMAP = 0x8008, + GR_PAX_DISABLE_EMUTRAMP = 0x8010, +}; + +enum { + GR_ID_USER = 0x01, + GR_ID_GROUP = 0x02, +}; + +enum { + GR_ID_ALLOW = 0x01, + GR_ID_DENY = 0x02, +}; + +#define GR_CRASH_RES 11 +#define GR_UIDTABLE_MAX 500 + +/* begin resource learning section */ +enum { + GR_RLIM_CPU_BUMP = 60, + GR_RLIM_FSIZE_BUMP = 50000, + GR_RLIM_DATA_BUMP = 10000, + GR_RLIM_STACK_BUMP = 1000, + GR_RLIM_CORE_BUMP = 10000, + GR_RLIM_RSS_BUMP = 500000, + GR_RLIM_NPROC_BUMP = 1, + GR_RLIM_NOFILE_BUMP = 5, + GR_RLIM_MEMLOCK_BUMP = 50000, + GR_RLIM_AS_BUMP = 500000, + GR_RLIM_LOCKS_BUMP = 2 +}; + +#endif diff -urNp linux-2.6.17.11/include/linux/grinternal.h linux-2.6.17.11/include/linux/grinternal.h --- linux-2.6.17.11/include/linux/grinternal.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/include/linux/grinternal.h 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,211 @@ +#ifndef __GRINTERNAL_H +#define __GRINTERNAL_H + +#ifdef CONFIG_GRKERNSEC + +#include +#include +#include +#include + +extern void gr_add_learn_entry(const char *fmt, ...); +extern __u32 gr_search_file(const struct dentry *dentry, const __u32 mode, + const struct vfsmount *mnt); +extern __u32 gr_check_create(const struct dentry *new_dentry, + const struct dentry *parent, + const struct vfsmount *mnt, const __u32 mode); +extern int gr_check_protected_task(const struct task_struct *task); +extern __u32 to_gr_audit(const __u32 reqmode); +extern int gr_set_acls(const int type); + +extern int gr_acl_is_enabled(void); +extern char gr_roletype_to_char(void); + +extern void gr_handle_alertkill(struct task_struct *task); +extern char *gr_to_filename(const struct dentry *dentry, + const struct vfsmount *mnt); +extern char *gr_to_filename1(const struct dentry *dentry, + const struct vfsmount *mnt); +extern char *gr_to_filename2(const struct dentry *dentry, + const struct vfsmount *mnt); +extern char *gr_to_filename3(const struct dentry *dentry, + const struct vfsmount *mnt); + +extern int grsec_enable_link; +extern int grsec_enable_fifo; +extern int grsec_enable_execve; +extern int grsec_enable_shm; +extern int grsec_enable_execlog; +extern int grsec_enable_signal; +extern int grsec_enable_forkfail; +extern int grsec_enable_time; +extern int grsec_enable_chroot_shmat; +extern int grsec_enable_chroot_findtask; +extern int grsec_enable_chroot_mount; +extern int grsec_enable_chroot_double; +extern int grsec_enable_chroot_pivot; +extern int grsec_enable_chroot_chdir; +extern int grsec_enable_chroot_chmod; +extern int grsec_enable_chroot_mknod; +extern int grsec_enable_chroot_fchdir; +extern int grsec_enable_chroot_nice; +extern int grsec_enable_chroot_execlog; +extern int grsec_enable_chroot_caps; +extern int grsec_enable_chroot_sysctl; +extern int grsec_enable_chroot_unix; +extern int grsec_enable_tpe; +extern int grsec_tpe_gid; +extern int grsec_enable_tpe_all; +extern int grsec_enable_sidcaps; +extern int grsec_enable_randpid; +extern int grsec_enable_socket_all; +extern int grsec_socket_all_gid; +extern int grsec_enable_socket_client; +extern int grsec_socket_client_gid; +extern int grsec_enable_socket_server; +extern int grsec_socket_server_gid; +extern int grsec_audit_gid; +extern int grsec_enable_group; +extern int grsec_enable_audit_ipc; +extern int grsec_enable_audit_textrel; +extern int grsec_enable_mount; +extern int grsec_enable_chdir; +extern int grsec_resource_logging; +extern int grsec_lock; + +extern struct task_struct *child_reaper; + +extern spinlock_t grsec_alert_lock; +extern unsigned long grsec_alert_wtime; +extern unsigned long grsec_alert_fyet; + +extern spinlock_t grsec_audit_lock; + +extern rwlock_t grsec_exec_file_lock; + +#define gr_task_fullpath(tsk) (tsk->exec_file ? \ + gr_to_filename2(tsk->exec_file->f_dentry, \ + tsk->exec_file->f_vfsmnt) : "/") + +#define gr_parent_task_fullpath(tsk) (tsk->parent->exec_file ? \ + gr_to_filename3(tsk->parent->exec_file->f_dentry, \ + tsk->parent->exec_file->f_vfsmnt) : "/") + +#define gr_task_fullpath0(tsk) (tsk->exec_file ? \ + gr_to_filename(tsk->exec_file->f_dentry, \ + tsk->exec_file->f_vfsmnt) : "/") + +#define gr_parent_task_fullpath0(tsk) (tsk->parent->exec_file ? \ + gr_to_filename1(tsk->parent->exec_file->f_dentry, \ + tsk->parent->exec_file->f_vfsmnt) : "/") + +#define proc_is_chrooted(tsk_a) ((tsk_a->pid > 1) && (tsk_a->fs != NULL) && \ + ((tsk_a->fs->root->d_inode->i_sb->s_dev != \ + child_reaper->fs->root->d_inode->i_sb->s_dev) || \ + (tsk_a->fs->root->d_inode->i_ino != \ + child_reaper->fs->root->d_inode->i_ino))) + +#define have_same_root(tsk_a,tsk_b) ((tsk_a->fs != NULL) && (tsk_b->fs != NULL) && \ + (tsk_a->fs->root->d_inode->i_sb->s_dev == \ + tsk_b->fs->root->d_inode->i_sb->s_dev) && \ + (tsk_a->fs->root->d_inode->i_ino == \ + tsk_b->fs->root->d_inode->i_ino)) + +#define DEFAULTSECARGS(task) gr_task_fullpath(task), task->comm, \ + task->pid, task->uid, \ + task->euid, task->gid, task->egid, \ + gr_parent_task_fullpath(task), \ + task->parent->comm, task->parent->pid, \ + task->parent->uid, task->parent->euid, \ + task->parent->gid, task->parent->egid + +#define GR_CHROOT_CAPS ( \ + CAP_TO_MASK(CAP_LINUX_IMMUTABLE) | CAP_TO_MASK(CAP_NET_ADMIN) | \ + CAP_TO_MASK(CAP_SYS_MODULE) | CAP_TO_MASK(CAP_SYS_RAWIO) | \ + CAP_TO_MASK(CAP_SYS_PACCT) | CAP_TO_MASK(CAP_SYS_ADMIN) | \ + CAP_TO_MASK(CAP_SYS_BOOT) | CAP_TO_MASK(CAP_SYS_TIME) | \ + CAP_TO_MASK(CAP_NET_RAW) | CAP_TO_MASK(CAP_SYS_TTY_CONFIG) | \ + CAP_TO_MASK(CAP_IPC_OWNER)) + +#define security_learn(normal_msg,args...) \ +({ \ + read_lock(&grsec_exec_file_lock); \ + gr_add_learn_entry(normal_msg "\n", ## args); \ + read_unlock(&grsec_exec_file_lock); \ +}) + +enum { + GR_DO_AUDIT, + GR_DONT_AUDIT, + GR_DONT_AUDIT_GOOD +}; + +enum { + GR_TTYSNIFF, + GR_RBAC, + GR_RBAC_STR, + GR_STR_RBAC, + GR_RBAC_MODE2, + GR_RBAC_MODE3, + GR_FILENAME, + GR_NOARGS, + GR_ONE_INT, + GR_ONE_INT_TWO_STR, + GR_ONE_STR, + GR_STR_INT, + GR_TWO_INT, + GR_THREE_INT, + GR_FIVE_INT_TWO_STR, + GR_TWO_STR, + GR_THREE_STR, + GR_FOUR_STR, + GR_STR_FILENAME, + GR_FILENAME_STR, + GR_FILENAME_TWO_INT, + GR_FILENAME_TWO_INT_STR, + GR_TEXTREL, + GR_PTRACE, + GR_RESOURCE, + GR_CAP, + GR_SIG, + GR_CRASH1, + GR_CRASH2, + GR_PSACCT +}; + +#define gr_log_ttysniff(audit, msg, task) gr_log_varargs(audit, msg, GR_TTYSNIFF, task) +#define gr_log_fs_rbac_generic(audit, msg, dentry, mnt) gr_log_varargs(audit, msg, GR_RBAC, dentry, mnt) +#define gr_log_fs_rbac_str(audit, msg, dentry, mnt, str) gr_log_varargs(audit, msg, GR_RBAC_STR, dentry, mnt, str) +#define gr_log_fs_str_rbac(audit, msg, str, dentry, mnt) gr_log_varargs(audit, msg, GR_STR_RBAC, str, dentry, mnt) +#define gr_log_fs_rbac_mode2(audit, msg, dentry, mnt, str1, str2) gr_log_varargs(audit, msg, GR_RBAC_MODE2, dentry, mnt, str1, str2) +#define gr_log_fs_rbac_mode3(audit, msg, dentry, mnt, str1, str2, str3) gr_log_varargs(audit, msg, GR_RBAC_MODE3, dentry, mnt, str1, str2, str3) +#define gr_log_fs_generic(audit, msg, dentry, mnt) gr_log_varargs(audit, msg, GR_FILENAME, dentry, mnt) +#define gr_log_noargs(audit, msg) gr_log_varargs(audit, msg, GR_NOARGS) +#define gr_log_int(audit, msg, num) gr_log_varargs(audit, msg, GR_ONE_INT, num) +#define gr_log_int_str2(audit, msg, num, str1, str2) gr_log_varargs(audit, msg, GR_ONE_INT_TWO_STR, num, str1, str2) +#define gr_log_str(audit, msg, str) gr_log_varargs(audit, msg, GR_ONE_STR, str) +#define gr_log_str_int(audit, msg, str, num) gr_log_varargs(audit, msg, GR_STR_INT, str, num) +#define gr_log_int_int(audit, msg, num1, num2) gr_log_varargs(audit, msg, GR_TWO_INT, num1, num2) +#define gr_log_int3(audit, msg, num1, num2, num3) gr_log_varargs(audit, msg, GR_THREE_INT, num1, num2, num3) +#define gr_log_int5_str2(audit, msg, num1, num2, str1, str2) gr_log_varargs(audit, msg, GR_FIVE_INT_TWO_STR, num1, num2, str1, str2) +#define gr_log_str_str(audit, msg, str1, str2) gr_log_varargs(audit, msg, GR_TWO_STR, str1, str2) +#define gr_log_str3(audit, msg, str1, str2, str3) gr_log_varargs(audit, msg, GR_THREE_STR, str1, str2, str3) +#define gr_log_str4(audit, msg, str1, str2, str3, str4) gr_log_varargs(audit, msg, GR_FOUR_STR, str1, str2, str3, str4) +#define gr_log_str_fs(audit, msg, str, dentry, mnt) gr_log_varargs(audit, msg, GR_STR_FILENAME, str, dentry, mnt) +#define gr_log_fs_str(audit, msg, dentry, mnt, str) gr_log_varargs(audit, msg, GR_FILENAME_STR, dentry, mnt, str) +#define gr_log_fs_int2(audit, msg, dentry, mnt, num1, num2) gr_log_varargs(audit, msg, GR_FILENAME_TWO_INT, dentry, mnt, num1, num2) +#define gr_log_fs_int2_str(audit, msg, dentry, mnt, num1, num2, str) gr_log_varargs(audit, msg, GR_FILENAME_TWO_INT_STR, dentry, mnt, num1, num2, str) +#define gr_log_textrel_ulong_ulong(audit, msg, file, ulong1, ulong2) gr_log_varargs(audit, msg, GR_TEXTREL, file, ulong1, ulong2) +#define gr_log_ptrace(audit, msg, task) gr_log_varargs(audit, msg, GR_PTRACE, task) +#define gr_log_res_ulong2_str(audit, msg, task, ulong1, str, ulong2) gr_log_varargs(audit, msg, GR_RESOURCE, task, ulong1, str, ulong2) +#define gr_log_cap(audit, msg, task, str) gr_log_varargs(audit, msg, GR_CAP, task, str) +#define gr_log_sig(audit, msg, task, num) gr_log_varargs(audit, msg, GR_SIG, task, num) +#define gr_log_crash1(audit, msg, task, ulong) gr_log_varargs(audit, msg, GR_CRASH1, task, ulong) +#define gr_log_crash2(audit, msg, task, ulong1) gr_log_varargs(audit, msg, GR_CRASH2, task, ulong1) +#define gr_log_procacct(audit, msg, task, num1, num2, num3, num4, num5, num6, num7, num8, num9) gr_log_varargs(audit, msg, GR_PSACCT, task, num1, num2, num3, num4, num5, num6, num7, num8, num9) + +extern void gr_log_varargs(int audit, const char *msg, int argtypes, ...); + +#endif + +#endif diff -urNp linux-2.6.17.11/include/linux/grmsg.h linux-2.6.17.11/include/linux/grmsg.h --- linux-2.6.17.11/include/linux/grmsg.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/include/linux/grmsg.h 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,108 @@ +#define DEFAULTSECMSG "%.256s[%.16s:%d] uid/euid:%u/%u gid/egid:%u/%u, parent %.256s[%.16s:%d] uid/euid:%u/%u gid/egid:%u/%u" +#define GR_ACL_PROCACCT_MSG "%.256s[%.16s:%d] IP:%u.%u.%u.%u TTY:%.64s uid/euid:%u/%u gid/egid:%u/%u run time:[%ud %uh %um %us] cpu time:[%ud %uh %um %us] %s with exit code %ld, parent %.256s[%.16s:%d] IP:%u.%u.%u.%u TTY:%.64s uid/euid:%u/%u gid/egid:%u/%u" +#define GR_PTRACE_ACL_MSG "denied ptrace of %.950s(%.16s:%d) by " +#define GR_STOPMOD_MSG "denied modification of module state by " +#define GR_IOPERM_MSG "denied use of ioperm() by " +#define GR_IOPL_MSG "denied use of iopl() by " +#define GR_SHMAT_ACL_MSG "denied attach of shared memory of UID %u, PID %d, ID %u by " +#define GR_UNIX_CHROOT_MSG "denied connect() to abstract AF_UNIX socket outside of chroot by " +#define GR_SHMAT_CHROOT_MSG "denied attach of shared memory outside of chroot by " +#define GR_KMEM_MSG "denied write of /dev/kmem by " +#define GR_PORT_OPEN_MSG "denied open of /dev/port by " +#define GR_MEM_WRITE_MSG "denied write of /dev/mem by " +#define GR_MEM_MMAP_MSG "denied mmap write of /dev/[k]mem by " +#define GR_SYMLINK_MSG "not following symlink %.950s owned by %d.%d by " +#define GR_LEARN_AUDIT_MSG "%s\t%u\t%u\t%u\t%.4095s\t%.4095s\t%lu\t%lu\t%.4095s\t%lu\t%u.%u.%u.%u" +#define GR_ID_LEARN_MSG "%s\t%u\t%u\t%u\t%.4095s\t%.4095s\t%c\t%d\t%d\t%d\t%u.%u.%u.%u" +#define GR_HIDDEN_ACL_MSG "%s access to hidden file %.950s by " +#define GR_OPEN_ACL_MSG "%s open of %.950s for%s%s by " +#define GR_CREATE_ACL_MSG "%s create of %.950s for%s%s by " +#define GR_FIFO_MSG "denied writing FIFO %.950s of %d.%d by " +#define GR_MKNOD_CHROOT_MSG "denied mknod of %.950s from chroot by " +#define GR_MKNOD_ACL_MSG "%s mknod of %.950s by " +#define GR_UNIXCONNECT_ACL_MSG "%s connect() to the unix domain socket %.950s by " +#define GR_TTYSNIFF_ACL_MSG "terminal being sniffed by IP:%u.%u.%u.%u %.480s[%.16s:%d], parent %.480s[%.16s:%d] against " +#define GR_MKDIR_ACL_MSG "%s mkdir of %.950s by " +#define GR_RMDIR_ACL_MSG "%s rmdir of %.950s by " +#define GR_UNLINK_ACL_MSG "%s unlink of %.950s by " +#define GR_SYMLINK_ACL_MSG "%s symlink from %.480s to %.480s by " +#define GR_HARDLINK_MSG "denied hardlink of %.930s (owned by %d.%d) to %.30s for " +#define GR_LINK_ACL_MSG "%s link of %.480s to %.480s by " +#define GR_INHERIT_ACL_MSG "successful inherit of %.480s's ACL for %.480s by " +#define GR_RENAME_ACL_MSG "%s rename of %.480s to %.480s by " +#define GR_PTRACE_EXEC_ACL_MSG "denied ptrace of %.950s by " +#define GR_NPROC_MSG "denied overstep of process limit by " +#define GR_EXEC_ACL_MSG "%s execution of %.950s by " +#define GR_EXEC_TPE_MSG "denied untrusted exec of %.950s by " +#define GR_SEGVSTART_ACL_MSG "possible exploit bruteforcing on " DEFAULTSECMSG " banning uid %u from login for %lu seconds" +#define GR_SEGVNOSUID_ACL_MSG "possible exploit bruteforcing on " DEFAULTSECMSG " banning execution for %lu seconds" +#define GR_MOUNT_CHROOT_MSG "denied mount of %.30s as %.930s from chroot by " +#define GR_PIVOT_CHROOT_MSG "denied pivot_root from chroot by " +#define GR_TRUNCATE_ACL_MSG "%s truncate of %.950s by " +#define GR_ATIME_ACL_MSG "%s access time change of %.950s by " +#define GR_ACCESS_ACL_MSG "%s access of %.950s for%s%s%s by " +#define GR_CHROOT_CHROOT_MSG "denied double chroot to %.950s by " +#define GR_FCHMOD_ACL_MSG "%s fchmod of %.950s by " +#define GR_CHMOD_CHROOT_MSG "denied chmod +s of %.950s by " +#define GR_CHMOD_ACL_MSG "%s chmod of %.950s by " +#define GR_CHROOT_FCHDIR_MSG "denied fchdir outside of chroot to %.950s by " +#define GR_CHOWN_ACL_MSG "%s chown of %.950s by " +#define GR_WRITLIB_ACL_MSG "denied load of writable library %.950s by " +#define GR_INITF_ACL_MSG "init_variables() failed %s by " +#define GR_DISABLED_ACL_MSG "Error loading %s, trying to run kernel with acls disabled. To disable acls at startup use gracl=off from your boot loader" +#define GR_DEV_ACL_MSG "/dev/grsec: %d bytes sent %d required, being fed garbaged by " +#define GR_SHUTS_ACL_MSG "shutdown auth success for " +#define GR_SHUTF_ACL_MSG "shutdown auth failure for " +#define GR_SHUTI_ACL_MSG "ignoring shutdown for disabled RBAC system for " +#define GR_SEGVMODS_ACL_MSG "segvmod auth success for " +#define GR_SEGVMODF_ACL_MSG "segvmod auth failure for " +#define GR_SEGVMODI_ACL_MSG "ignoring segvmod for disabled RBAC system for " +#define GR_ENABLE_ACL_MSG "%s RBAC system loaded by " +#define GR_ENABLEF_ACL_MSG "unable to load %s for " +#define GR_RELOADI_ACL_MSG "ignoring reload request for disabled RBAC system" +#define GR_RELOAD_ACL_MSG "%s RBAC system reloaded by " +#define GR_RELOADF_ACL_MSG "failed reload of %s for " +#define GR_SPROLEI_ACL_MSG "ignoring change to special role for disabled RBAC system for " +#define GR_SPROLES_ACL_MSG "successful change to special role %s (id %d) by " +#define GR_SPROLEL_ACL_MSG "special role %s (id %d) exited by " +#define GR_SPROLEF_ACL_MSG "special role %s failure for " +#define GR_UNSPROLEI_ACL_MSG "ignoring unauth of special role for disabled RBAC system for " +#define GR_UNSPROLES_ACL_MSG "successful unauth of special role %s (id %d) by " +#define GR_UNSPROLEF_ACL_MSG "special role unauth of %s failure for " +#define GR_INVMODE_ACL_MSG "invalid mode %d by " +#define GR_PRIORITY_CHROOT_MSG "denied priority change of process (%.16s:%d) by " +#define GR_FAILFORK_MSG "failed fork with errno %d by " +#define GR_NICE_CHROOT_MSG "denied priority change by " +#define GR_UNISIGLOG_MSG "signal %d sent to " +#define GR_DUALSIGLOG_MSG "signal %d sent to " DEFAULTSECMSG " by " +#define GR_SIG_ACL_MSG "denied send of signal %d to protected task " DEFAULTSECMSG " by " +#define GR_SYSCTL_MSG "denied modification of grsecurity sysctl value : %.32s by " +#define GR_SYSCTL_ACL_MSG "%s sysctl of %.950s for%s%s by " +#define GR_TIME_MSG "time set by " +#define GR_DEFACL_MSG "fatal: unable to find subject for (%.16s:%d), loaded by " +#define GR_MMAP_ACL_MSG "%s executable mmap of %.950s by " +#define GR_MPROTECT_ACL_MSG "%s executable mprotect of %.950s by " +#define GR_SOCK_MSG "denied socket(%.16s,%.16s,%.16s) by " +#define GR_SOCK2_MSG "denied socket(%d,%.16s,%.16s) by " +#define GR_BIND_MSG "denied bind() by " +#define GR_CONNECT_MSG "denied connect() by " +#define GR_BIND_ACL_MSG "denied bind() to %u.%u.%u.%u port %u sock type %.16s protocol %.16s by " +#define GR_CONNECT_ACL_MSG "denied connect() to %u.%u.%u.%u port %u sock type %.16s protocol %.16s by " +#define GR_IP_LEARN_MSG "%s\t%u\t%u\t%u\t%.4095s\t%.4095s\t%u.%u.%u.%u\t%u\t%u\t%u\t%u\t%u.%u.%u.%u" +#define GR_EXEC_CHROOT_MSG "exec of %.980s within chroot by process " +#define GR_CAP_ACL_MSG "use of %s denied for " +#define GR_USRCHANGE_ACL_MSG "change to uid %u denied for " +#define GR_GRPCHANGE_ACL_MSG "change to gid %u denied for " +#define GR_REMOUNT_AUDIT_MSG "remount of %.30s by " +#define GR_UNMOUNT_AUDIT_MSG "unmount of %.30s by " +#define GR_MOUNT_AUDIT_MSG "mount of %.30s to %.64s by " +#define GR_CHDIR_AUDIT_MSG "chdir to %.980s by " +#define GR_EXEC_AUDIT_MSG "exec of %.930s (%.128s) by " +#define GR_MSGQ_AUDIT_MSG "message queue created by " +#define GR_MSGQR_AUDIT_MSG "message queue of uid:%u euid:%u removed by " +#define GR_SEM_AUDIT_MSG "semaphore created by " +#define GR_SEMR_AUDIT_MSG "semaphore of uid:%u euid:%u removed by " +#define GR_SHM_AUDIT_MSG "shared memory of size %d created by " +#define GR_SHMR_AUDIT_MSG "shared memory of uid:%u euid:%u removed by " +#define GR_RESOURCE_MSG "denied resource overstep by requesting %lu for %.16s against limit %lu for " +#define GR_TEXTREL_AUDIT_MSG "text relocation in %s, VMA:0x%08lx 0x%08lx by " diff -urNp linux-2.6.17.11/include/linux/grsecurity.h linux-2.6.17.11/include/linux/grsecurity.h --- linux-2.6.17.11/include/linux/grsecurity.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/include/linux/grsecurity.h 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,196 @@ +#ifndef GR_SECURITY_H +#define GR_SECURITY_H +#include +#include +#include + +extern void gr_handle_brute_attach(struct task_struct *p); +extern void gr_handle_brute_check(void); + +extern char gr_roletype_to_char(void); + +extern int gr_check_user_change(int real, int effective, int fs); +extern int gr_check_group_change(int real, int effective, int fs); + +extern void gr_del_task_from_ip_table(struct task_struct *p); + +extern int gr_pid_is_chrooted(struct task_struct *p); +extern int gr_handle_chroot_nice(void); +extern int gr_handle_chroot_sysctl(const int op); +extern int gr_handle_chroot_setpriority(struct task_struct *p, + const int niceval); +extern int gr_chroot_fchdir(struct dentry *u_dentry, struct vfsmount *u_mnt); +extern int gr_handle_chroot_chroot(const struct dentry *dentry, + const struct vfsmount *mnt); +extern void gr_handle_chroot_caps(struct task_struct *task); +extern void gr_handle_chroot_chdir(struct dentry *dentry, struct vfsmount *mnt); +extern int gr_handle_chroot_chmod(const struct dentry *dentry, + const struct vfsmount *mnt, const int mode); +extern int gr_handle_chroot_mknod(const struct dentry *dentry, + const struct vfsmount *mnt, const int mode); +extern int gr_handle_chroot_mount(const struct dentry *dentry, + const struct vfsmount *mnt, + const char *dev_name); +extern int gr_handle_chroot_pivot(void); +extern int gr_handle_chroot_unix(const pid_t pid); + +extern int gr_handle_rawio(const struct inode *inode); +extern int gr_handle_nproc(void); + +extern void gr_handle_ioperm(void); +extern void gr_handle_iopl(void); + +extern int gr_tpe_allow(const struct file *file); + +extern int gr_random_pid(void); + +extern void gr_log_forkfail(const int retval); +extern void gr_log_timechange(void); +extern void gr_log_signal(const int sig, const struct task_struct *t); +extern void gr_log_chdir(const struct dentry *dentry, + const struct vfsmount *mnt); +extern void gr_log_chroot_exec(const struct dentry *dentry, + const struct vfsmount *mnt); +extern void gr_handle_exec_args(struct linux_binprm *bprm, char **argv); +extern void gr_log_remount(const char *devname, const int retval); +extern void gr_log_unmount(const char *devname, const int retval); +extern void gr_log_mount(const char *from, const char *to, const int retval); +extern void gr_log_msgget(const int ret, const int msgflg); +extern void gr_log_msgrm(const uid_t uid, const uid_t cuid); +extern void gr_log_semget(const int err, const int semflg); +extern void gr_log_semrm(const uid_t uid, const uid_t cuid); +extern void gr_log_shmget(const int err, const int shmflg, const size_t size); +extern void gr_log_shmrm(const uid_t uid, const uid_t cuid); +extern void gr_log_textrel(struct vm_area_struct *vma); + +extern int gr_handle_follow_link(const struct inode *parent, + const struct inode *inode, + const struct dentry *dentry, + const struct vfsmount *mnt); +extern int gr_handle_fifo(const struct dentry *dentry, + const struct vfsmount *mnt, + const struct dentry *dir, const int flag, + const int acc_mode); +extern int gr_handle_hardlink(const struct dentry *dentry, + const struct vfsmount *mnt, + struct inode *inode, + const int mode, const char *to); + +extern int gr_task_is_capable(struct task_struct *task, const int cap); +extern int gr_is_capable_nolog(const int cap); +extern void gr_learn_resource(const struct task_struct *task, const int limit, + const unsigned long wanted, const int gt); +extern void gr_copy_label(struct task_struct *tsk); +extern void gr_handle_crash(struct task_struct *task, const int sig); +extern int gr_handle_signal(const struct task_struct *p, const int sig); +extern int gr_check_crash_uid(const uid_t uid); +extern int gr_check_protected_task(const struct task_struct *task); +extern int gr_acl_handle_mmap(const struct file *file, + const unsigned long prot); +extern int gr_acl_handle_mprotect(const struct file *file, + const unsigned long prot); +extern int gr_check_hidden_task(const struct task_struct *tsk); +extern __u32 gr_acl_handle_truncate(const struct dentry *dentry, + const struct vfsmount *mnt); +extern __u32 gr_acl_handle_utime(const struct dentry *dentry, + const struct vfsmount *mnt); +extern __u32 gr_acl_handle_access(const struct dentry *dentry, + const struct vfsmount *mnt, const int fmode); +extern __u32 gr_acl_handle_fchmod(const struct dentry *dentry, + const struct vfsmount *mnt, mode_t mode); +extern __u32 gr_acl_handle_chmod(const struct dentry *dentry, + const struct vfsmount *mnt, mode_t mode); +extern __u32 gr_acl_handle_chown(const struct dentry *dentry, + const struct vfsmount *mnt); +extern int gr_handle_ptrace(struct task_struct *task, const long request); +extern int gr_handle_proc_ptrace(struct task_struct *task); +extern __u32 gr_acl_handle_execve(const struct dentry *dentry, + const struct vfsmount *mnt); +extern int gr_check_crash_exec(const struct file *filp); +extern int gr_acl_is_enabled(void); +extern void gr_set_kernel_label(struct task_struct *task); +extern void gr_set_role_label(struct task_struct *task, const uid_t uid, + const gid_t gid); +extern int gr_set_proc_label(const struct dentry *dentry, + const struct vfsmount *mnt); +extern __u32 gr_acl_handle_hidden_file(const struct dentry *dentry, + const struct vfsmount *mnt); +extern __u32 gr_acl_handle_open(const struct dentry *dentry, + const struct vfsmount *mnt, const int fmode); +extern __u32 gr_acl_handle_creat(const struct dentry *dentry, + const struct dentry *p_dentry, + const struct vfsmount *p_mnt, const int fmode, + const int imode); +extern void gr_handle_create(const struct dentry *dentry, + const struct vfsmount *mnt); +extern __u32 gr_acl_handle_mknod(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + const int mode); +extern __u32 gr_acl_handle_mkdir(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt); +extern __u32 gr_acl_handle_rmdir(const struct dentry *dentry, + const struct vfsmount *mnt); +extern void gr_handle_delete(const ino_t ino, const dev_t dev); +extern __u32 gr_acl_handle_unlink(const struct dentry *dentry, + const struct vfsmount *mnt); +extern __u32 gr_acl_handle_symlink(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + const char *from); +extern __u32 gr_acl_handle_link(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + const struct dentry *old_dentry, + const struct vfsmount *old_mnt, const char *to); +extern int gr_acl_handle_rename(struct dentry *new_dentry, + struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + struct dentry *old_dentry, + struct inode *old_parent_inode, + struct vfsmount *old_mnt, const char *newname); +extern void gr_handle_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, + struct vfsmount *mnt, const __u8 replace); +extern __u32 gr_check_link(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + const struct dentry *old_dentry, + const struct vfsmount *old_mnt); +extern int gr_acl_handle_filldir(const struct file *file, const char *name, + const unsigned int namelen, const ino_t ino); + +extern __u32 gr_acl_handle_unix(const struct dentry *dentry, + const struct vfsmount *mnt); +extern void gr_acl_handle_exit(void); +extern void gr_acl_handle_psacct(struct task_struct *task, const long code); +extern int gr_acl_handle_procpidmem(const struct task_struct *task); +extern __u32 gr_cap_rtnetlink(void); + +#ifdef CONFIG_SYSVIPC +extern void gr_shm_exit(struct task_struct *task); +#else +static inline void gr_shm_exit(struct task_struct *task) +{ + return; +} +#endif + +#ifdef CONFIG_GRKERNSEC +extern void gr_handle_mem_write(void); +extern void gr_handle_kmem_write(void); +extern void gr_handle_open_port(void); +extern int gr_handle_mem_mmap(const unsigned long offset, + struct vm_area_struct *vma); + +extern unsigned long pax_get_random_long(void); +#define get_random_long() pax_get_random_long() + +extern int grsec_enable_dmesg; +extern int grsec_enable_randsrc; +extern int grsec_enable_shm; +#endif + +#endif diff -urNp linux-2.6.17.11/include/linux/highmem.h linux-2.6.17.11/include/linux/highmem.h --- linux-2.6.17.11/include/linux/highmem.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/highmem.h 2006-09-01 16:20:29.000000000 -0400 @@ -70,9 +70,9 @@ alloc_zeroed_user_highpage(struct vm_are static inline void clear_highpage(struct page *page) { - void *kaddr = kmap_atomic(page, KM_USER0); + void *kaddr = kmap_atomic(page, KM_CLEARPAGE); clear_page(kaddr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_CLEARPAGE); } /* diff -urNp linux-2.6.17.11/include/linux/mman.h linux-2.6.17.11/include/linux/mman.h --- linux-2.6.17.11/include/linux/mman.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/mman.h 2006-09-01 16:20:29.000000000 -0400 @@ -59,6 +59,11 @@ static inline unsigned long calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | + +#ifdef CONFIG_PAX_SEGMEXEC + _calc_vm_trans(flags, MAP_MIRROR, VM_MIRROR) | +#endif + _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); diff -urNp linux-2.6.17.11/include/linux/mm.h linux-2.6.17.11/include/linux/mm.h --- linux-2.6.17.11/include/linux/mm.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/mm.h 2006-09-01 16:20:29.000000000 -0400 @@ -38,6 +38,7 @@ extern int sysctl_legacy_va_layout; #include #include #include +#include #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) @@ -111,8 +112,43 @@ struct vm_area_struct { #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif + + unsigned long vm_mirror; /* PaX: mirror distance */ }; +#ifdef CONFIG_PAX_SOFTMODE +extern unsigned int pax_softmode; +#endif + +extern int pax_check_flags(unsigned long *); + +/* if tsk != current then task_lock must be held on it */ +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) +static inline unsigned long pax_get_flags(struct task_struct *tsk) +{ + if (likely(tsk->mm)) + return tsk->mm->pax_flags; + else + return 0UL; +} + +/* if tsk != current then task_lock must be held on it */ +static inline long pax_set_flags(struct task_struct *tsk, unsigned long flags) +{ + if (likely(tsk->mm)) { + tsk->mm->pax_flags = flags; + return 0; + } + return -EINVAL; +} +#endif + +#ifdef CONFIG_PAX_HAVE_ACL_FLAGS +extern void pax_set_initial_flags(struct linux_binprm * bprm); +#elif defined(CONFIG_PAX_HOOK_ACL_FLAGS) +extern void (*pax_set_initial_flags_func)(struct linux_binprm * bprm); +#endif + /* * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is * disabled, then there's a single shared list of VMAs maintained by the @@ -167,6 +203,18 @@ extern unsigned int kobjsize(const void #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#ifdef CONFIG_PAX_SEGMEXEC +#define VM_MIRROR 0x04000000 /* vma is mirroring another */ +#endif + +#ifdef CONFIG_PAX_MPROTECT +#define VM_MAYNOTWRITE 0x08000000 /* vma cannot be granted VM_WRITE any more */ +#endif + +#ifdef __VM_STACK_FLAGS +#define VM_STACK_DEFAULT_FLAGS (0x00000033 | __VM_STACK_FLAGS) +#endif + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif @@ -1058,5 +1106,11 @@ void drop_slab(void); extern int randomize_va_space; #endif +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT +extern void track_exec_limit(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long prot); +#else +static inline void track_exec_limit(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long prot) {} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff -urNp linux-2.6.17.11/include/linux/module.h linux-2.6.17.11/include/linux/module.h --- linux-2.6.17.11/include/linux/module.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/module.h 2006-09-01 16:20:29.000000000 -0400 @@ -273,16 +273,16 @@ struct module int (*init)(void); /* If this is non-NULL, vfree after init() returns */ - void *module_init; + void *module_init_rx, *module_init_rw; /* Here is the actual code + data, vfree'd on unload. */ - void *module_core; + void *module_core_rx, *module_core_rw; /* Here are the sizes of the init and core sections */ - unsigned long init_size, core_size; + unsigned long init_size_rw, core_size_rw; /* The size of the executable code in each section. */ - unsigned long init_text_size, core_text_size; + unsigned long init_size_rx, core_size_rx; /* Arch-specific module values */ struct mod_arch_specific arch; diff -urNp linux-2.6.17.11/include/linux/moduleloader.h linux-2.6.17.11/include/linux/moduleloader.h --- linux-2.6.17.11/include/linux/moduleloader.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/moduleloader.h 2006-09-01 16:20:29.000000000 -0400 @@ -17,9 +17,21 @@ int module_frob_arch_sections(Elf_Ehdr * sections. Returns NULL on failure. */ void *module_alloc(unsigned long size); +#ifdef CONFIG_PAX_KERNEXEC +void *module_alloc_exec(unsigned long size); +#else +#define module_alloc_exec(x) module_alloc(x) +#endif + /* Free memory returned from module_alloc. */ void module_free(struct module *mod, void *module_region); +#ifdef CONFIG_PAX_KERNEXEC +void module_free_exec(struct module *mod, void *module_region); +#else +#define module_free_exec(x, y) module_free(x, y) +#endif + /* Apply the given relocation to the (simplified) ELF. Return -error or 0. */ int apply_relocate(Elf_Shdr *sechdrs, diff -urNp linux-2.6.17.11/include/linux/random.h linux-2.6.17.11/include/linux/random.h --- linux-2.6.17.11/include/linux/random.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/random.h 2006-09-01 16:20:29.000000000 -0400 @@ -62,6 +62,8 @@ extern __u32 secure_tcpv6_sequence_numbe extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport); +extern unsigned long pax_get_random_long(void); + #ifndef MODULE extern struct file_operations random_fops, urandom_fops; #endif diff -urNp linux-2.6.17.11/include/linux/sched.h linux-2.6.17.11/include/linux/sched.h --- linux-2.6.17.11/include/linux/sched.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/sched.h 2006-09-01 16:20:29.000000000 -0400 @@ -41,6 +41,7 @@ #include /* For AT_VECTOR_SIZE */ struct exec_domain; +struct linux_binprm; /* * cloning flags: @@ -357,8 +358,34 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + unsigned long pax_flags; +#endif + +#ifdef CONFIG_PAX_DLRESOLVE + unsigned long call_dl_resolve; +#endif + +#if defined(CONFIG_PPC32) && defined(CONFIG_PAX_EMUSIGRT) + unsigned long call_syscall; +#endif + +#ifdef CONFIG_PAX_ASLR + unsigned long delta_mmap; /* randomized offset */ + unsigned long delta_exec; /* randomized offset */ + unsigned long delta_stack; /* randomized offset */ +#endif + }; +#define MF_PAX_PAGEEXEC 0x01000000 /* Paging based non-executable pages */ +#define MF_PAX_EMUTRAMP 0x02000000 /* Emulate trampolines */ +#define MF_PAX_MPROTECT 0x04000000 /* Restrict mprotect() */ +#define MF_PAX_RANDMMAP 0x08000000 /* Randomize mmap() base */ +/*#define MF_PAX_RANDEXEC 0x10000000*/ /* Randomize ET_EXEC base */ +#define MF_PAX_SEGMEXEC 0x20000000 /* Segmentation based non-executable pages */ + struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; @@ -456,6 +483,15 @@ struct signal_struct { struct key *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ #endif + +#ifdef CONFIG_GRKERNSEC + u32 curr_ip; + u32 gr_saddr; + u32 gr_daddr; + u16 gr_sport; + u16 gr_dport; + u8 used_accept:1; +#endif }; /* Context switch must be unlocked if interrupts are to be enabled */ @@ -898,6 +934,17 @@ struct task_struct { struct compat_robust_list_head __user *compat_robust_list; #endif +#ifdef CONFIG_GRKERNSEC + /* grsecurity */ + struct acl_subject_label *acl; + struct acl_role_label *role; + struct file *exec_file; + u16 acl_role_id; + u8 acl_sp_role:1; + u8 is_writable:1; + u8 brute:1; +#endif + atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; @@ -1416,6 +1463,12 @@ extern void arch_pick_mmap_layout(struct static inline void arch_pick_mmap_layout(struct mm_struct *mm) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } diff -urNp linux-2.6.17.11/include/linux/shm.h linux-2.6.17.11/include/linux/shm.h --- linux-2.6.17.11/include/linux/shm.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/shm.h 2006-09-01 16:20:29.000000000 -0400 @@ -86,6 +86,10 @@ struct shmid_kernel /* private to the ke pid_t shm_cprid; pid_t shm_lprid; struct user_struct *mlock_user; +#ifdef CONFIG_GRKERNSEC + time_t shm_createtime; + pid_t shm_lapid; +#endif }; /* shm_mode upper byte flags */ diff -urNp linux-2.6.17.11/include/linux/sysctl.h linux-2.6.17.11/include/linux/sysctl.h --- linux-2.6.17.11/include/linux/sysctl.h 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/include/linux/sysctl.h 2006-09-01 16:20:29.000000000 -0400 @@ -78,9 +78,22 @@ INOTIFY_MAX_QUEUED_EVENTS=3 /* max queued events per instance */ }; +#ifdef CONFIG_PAX_SOFTMODE +enum { + PAX_SOFTMODE=1 /* PaX: disable/enable soft mode */ +}; +#endif + /* CTL_KERN names: */ enum { +#ifdef CONFIG_GRKERNSEC + KERN_GRSECURITY=98, /* grsecurity */ +#endif + +#ifdef CONFIG_PAX_SOFTMODE + KERN_PAX=99, /* PaX control */ +#endif KERN_OSTYPE=1, /* string: system version */ KERN_OSRELEASE=2, /* string: system release */ KERN_OSREV=3, /* int: system revision */ diff -urNp linux-2.6.17.11/init/Kconfig linux-2.6.17.11/init/Kconfig --- linux-2.6.17.11/init/Kconfig 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/init/Kconfig 2006-09-01 16:20:29.000000000 -0400 @@ -268,6 +268,7 @@ menuconfig EMBEDDED config KALLSYMS bool "Load all symbols for debugging/kksymoops" if EMBEDDED default y + depends on !GRKERNSEC_HIDESYM help Say Y here to let the kernel print out symbolic crash information and symbolic stack backtraces. This increases the size of the kernel diff -urNp linux-2.6.17.11/init/main.c linux-2.6.17.11/init/main.c --- linux-2.6.17.11/init/main.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/init/main.c 2006-09-01 16:20:29.000000000 -0400 @@ -100,6 +100,7 @@ static inline void mark_rodata_ro(void) #ifdef CONFIG_TC extern void tc_init(void); #endif +extern void grsecurity_init(void); enum system_states system_state; EXPORT_SYMBOL(system_state); @@ -150,6 +151,15 @@ static int __init maxcpus(char *str) __setup("maxcpus=", maxcpus); +#ifdef CONFIG_PAX_SOFTMODE +static int __init setup_pax_softmode(char *str) +{ + get_option(&str, &pax_softmode); + return 1; +} +__setup("pax_softmode=", setup_pax_softmode); +#endif + static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; @@ -694,6 +704,8 @@ static int init(void * unused) prepare_namespace(); } + grsecurity_init(); + /* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the diff -urNp linux-2.6.17.11/ipc/msg.c linux-2.6.17.11/ipc/msg.c --- linux-2.6.17.11/ipc/msg.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/ipc/msg.c 2006-09-01 16:20:29.000000000 -0400 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -238,6 +239,9 @@ asmlinkage long sys_msgget (key_t key, i msg_unlock(msq); } mutex_unlock(&msg_ids.mutex); + + gr_log_msgget(ret, msgflg); + return ret; } @@ -496,6 +500,8 @@ asmlinkage long sys_msgctl (int msqid, i break; } case IPC_RMID: + gr_log_msgrm(ipcp->uid, ipcp->cuid); + freeque (msq, msqid); break; } diff -urNp linux-2.6.17.11/ipc/sem.c linux-2.6.17.11/ipc/sem.c --- linux-2.6.17.11/ipc/sem.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/ipc/sem.c 2006-09-01 16:20:29.000000000 -0400 @@ -80,6 +80,7 @@ #include #include #include +#include #include #include "util.h" @@ -247,6 +248,9 @@ asmlinkage long sys_semget (key_t key, i } mutex_unlock(&sem_ids.mutex); + + gr_log_semget(err, semflg); + return err; } @@ -840,6 +844,8 @@ static int semctl_down(int semid, int se switch(cmd){ case IPC_RMID: + gr_log_semrm(ipcp->uid, ipcp->cuid); + freeary(sma, semid); err = 0; break; diff -urNp linux-2.6.17.11/ipc/shm.c linux-2.6.17.11/ipc/shm.c --- linux-2.6.17.11/ipc/shm.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/ipc/shm.c 2006-09-01 16:20:29.000000000 -0400 @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -56,6 +57,14 @@ static void shm_close (struct vm_area_st static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif +#ifdef CONFIG_GRKERNSEC +extern int gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, + const time_t shm_createtime, const uid_t cuid, + const int shmid); +extern int gr_chroot_shmat(const pid_t shm_cprid, const pid_t shm_lapid, + const time_t shm_createtime); +#endif + size_t shm_ctlmax = SHMMAX; size_t shm_ctlall = SHMALL; int shm_ctlmni = SHMMNI; @@ -149,6 +158,17 @@ static void shm_close (struct vm_area_st shp->shm_lprid = current->tgid; shp->shm_dtim = get_seconds(); shp->shm_nattch--; +#ifdef CONFIG_GRKERNSEC_SHM + if (grsec_enable_shm) { + if (shp->shm_nattch == 0) { + shp->shm_perm.mode |= SHM_DEST; + shm_destroy(shp); + } else + shm_unlock(shp); + mutex_unlock(&shm_ids.mutex); + return; + } +#endif if(shp->shm_nattch == 0 && shp->shm_perm.mode & SHM_DEST) shm_destroy (shp); @@ -248,6 +268,9 @@ static int newseg (key_t key, int shmflg shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = get_seconds(); +#ifdef CONFIG_GRKERNSEC + shp->shm_createtime = get_seconds(); +#endif shp->shm_segsz = size; shp->shm_nattch = 0; shp->id = shm_buildid(id,shp->shm_perm.seq); @@ -302,6 +325,8 @@ asmlinkage long sys_shmget (key_t key, s } mutex_unlock(&shm_ids.mutex); + gr_log_shmget(err, shmflg, size); + return err; } @@ -615,6 +640,8 @@ asmlinkage long sys_shmctl (int shmid, i if (err) goto out_unlock_up; + gr_log_shmrm(shp->shm_perm.uid, shp->shm_perm.cuid); + if (shp->shm_nattch){ shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */ @@ -763,9 +790,27 @@ long do_shmat(int shmid, char __user *sh return err; } +#ifdef CONFIG_GRKERNSEC + if (!gr_handle_shmat(shp->shm_cprid, shp->shm_lapid, shp->shm_createtime, + shp->shm_perm.cuid, shmid)) { + shm_unlock(shp); + return -EACCES; + } + + if (!gr_chroot_shmat(shp->shm_cprid, shp->shm_lapid, shp->shm_createtime)) { + shm_unlock(shp); + return -EACCES; + } +#endif + file = shp->shm_file; size = i_size_read(file->f_dentry->d_inode); shp->shm_nattch++; + +#ifdef CONFIG_GRKERNSEC + shp->shm_lapid = current->pid; +#endif + shm_unlock(shp); down_write(¤t->mm->mmap_sem); @@ -935,3 +980,24 @@ static int sysvipc_shm_proc_show(struct shp->shm_ctim); } #endif + +void gr_shm_exit(struct task_struct *task) +{ +#ifdef CONFIG_GRKERNSEC_SHM + int i; + struct shmid_kernel *shp; + + if (!grsec_enable_shm) + return; + + for (i = 0; i <= shm_ids.max_id; i++) { + shp = shm_get(i); + if (shp && (shp->shm_cprid == task->pid) && + (shp->shm_nattch <= 0)) { + shp->shm_perm.mode |= SHM_DEST; + shm_destroy(shp); + } + } +#endif + return; +} diff -urNp linux-2.6.17.11/kernel/capability.c linux-2.6.17.11/kernel/capability.c --- linux-2.6.17.11/kernel/capability.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/capability.c 2006-09-01 16:20:29.000000000 -0400 @@ -13,6 +13,7 @@ #include #include #include +#include #include unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ @@ -235,14 +236,25 @@ out: return ret; } +extern int gr_task_is_capable(struct task_struct *task, const int cap); +extern int gr_is_capable_nolog(const int cap); + int __capable(struct task_struct *t, int cap) { - if (security_capable(t, cap) == 0) { + if ((security_capable(t, cap) == 0) && gr_task_is_capable(t, cap)) { t->flags |= PF_SUPERPRIV; return 1; } return 0; } +int capable_nolog(int cap) +{ + if ((security_capable(current, cap) == 0) && gr_is_capable_nolog(cap)) { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} EXPORT_SYMBOL(__capable); int capable(int cap) @@ -253,3 +265,4 @@ int capable(int cap) return __capable(current, cap); } EXPORT_SYMBOL(capable); +EXPORT_SYMBOL(capable_nolog); diff -urNp linux-2.6.17.11/kernel/configs.c linux-2.6.17.11/kernel/configs.c --- linux-2.6.17.11/kernel/configs.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/configs.c 2006-09-01 16:20:29.000000000 -0400 @@ -89,8 +89,16 @@ static int __init ikconfig_init(void) struct proc_dir_entry *entry; /* create the current config file */ +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + entry = create_proc_entry("config.gz", S_IFREG | S_IRUSR, &proc_root); +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + entry = create_proc_entry("config.gz", S_IFREG | S_IRUSR | S_IRGRP, &proc_root); +#endif +#else entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, &proc_root); +#endif if (!entry) return -ENOMEM; diff -urNp linux-2.6.17.11/kernel/exit.c linux-2.6.17.11/kernel/exit.c --- linux-2.6.17.11/kernel/exit.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/exit.c 2006-09-01 16:20:29.000000000 -0400 @@ -40,6 +40,11 @@ #include #include #include +#include + +#ifdef CONFIG_GRKERNSEC +extern rwlock_t grsec_exec_file_lock; +#endif #include #include @@ -119,6 +124,7 @@ static void __exit_signal(struct task_st __unhash_process(tsk); + gr_del_task_from_ip_table(tsk); tsk->signal = NULL; tsk->sighand = NULL; spin_unlock(&sighand->siglock); @@ -291,6 +297,15 @@ static void reparent_to_init(void) { write_lock_irq(&tasklist_lock); +#ifdef CONFIG_GRKERNSEC + write_lock(&grsec_exec_file_lock); + if (current->exec_file) { + fput(current->exec_file); + current->exec_file = NULL; + } + write_unlock(&grsec_exec_file_lock); +#endif + ptrace_unlink(current); /* Reparent to init */ remove_parent(current); @@ -298,6 +313,8 @@ static void reparent_to_init(void) current->real_parent = child_reaper; add_parent(current); + gr_set_kernel_label(current); + /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; @@ -394,6 +411,17 @@ void daemonize(const char *name, ...) vsnprintf(current->comm, sizeof(current->comm), name, args); va_end(args); +#ifdef CONFIG_GRKERNSEC + write_lock(&grsec_exec_file_lock); + if (current->exec_file) { + fput(current->exec_file); + current->exec_file = NULL; + } + write_unlock(&grsec_exec_file_lock); +#endif + + gr_set_kernel_label(current); + /* * If we were started as result of loading a module, close all of the * user space pages. We don't need them, and if we didn't close them @@ -915,9 +943,14 @@ fastcall NORET_TYPE void do_exit(long co #endif if (unlikely(tsk->audit_context)) audit_free(tsk); + + gr_acl_handle_psacct(tsk, code); + gr_acl_handle_exit(); + exit_mm(tsk); exit_sem(tsk); + gr_shm_exit(tsk); __exit_files(tsk); __exit_fs(tsk); exit_namespace(tsk); diff -urNp linux-2.6.17.11/kernel/fork.c linux-2.6.17.11/kernel/fork.c --- linux-2.6.17.11/kernel/fork.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/fork.c 2006-09-01 16:20:29.000000000 -0400 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -204,8 +205,8 @@ static inline int dup_mmap(struct mm_str mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; - mm->free_area_cache = oldmm->mmap_base; - mm->cached_hole_size = ~0UL; + mm->free_area_cache = oldmm->free_area_cache; + mm->cached_hole_size = oldmm->cached_hole_size; mm->map_count = 0; __set_mm_counter(mm, file_rss, 0); __set_mm_counter(mm, anon_rss, 0); @@ -330,7 +331,7 @@ static struct mm_struct * mm_init(struct spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; - mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->free_area_cache = ~0UL; mm->cached_hole_size = ~0UL; if (likely(!mm_alloc_pgd(mm))) { @@ -988,6 +989,8 @@ static task_t *copy_process(unsigned lon if (!vx_nproc_avail(1)) goto bad_fork_cleanup_vm; + gr_learn_resource(p, RLIMIT_NPROC, atomic_read(&p->user->processes), 0); + if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && @@ -1098,6 +1101,8 @@ static task_t *copy_process(unsigned lon if (retval) goto bad_fork_cleanup_namespace; + gr_copy_label(p); + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? @@ -1302,6 +1307,8 @@ bad_fork_cleanup_count: bad_fork_free: free_task(p); fork_out: + gr_log_forkfail(retval); + return ERR_PTR(retval); } @@ -1384,6 +1391,8 @@ long do_fork(unsigned long clone_flags, if (!IS_ERR(p)) { struct completion vfork; + gr_handle_brute_check(); + if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); diff -urNp linux-2.6.17.11/kernel/futex.c linux-2.6.17.11/kernel/futex.c --- linux-2.6.17.11/kernel/futex.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/futex.c 2006-09-01 16:20:29.000000000 -0400 @@ -152,6 +152,11 @@ static int get_futex_key(unsigned long u struct page *page; int err; +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (uaddr >= SEGMEXEC_TASK_SIZE)) + return -EFAULT; +#endif + /* * The futex address must be "naturally" aligned. */ diff -urNp linux-2.6.17.11/kernel/kallsyms.c linux-2.6.17.11/kernel/kallsyms.c --- linux-2.6.17.11/kernel/kallsyms.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/kallsyms.c 2006-09-01 16:20:29.000000000 -0400 @@ -301,7 +301,6 @@ static unsigned long get_ksymbol_core(st static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) { - iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; } @@ -380,7 +379,7 @@ static int kallsyms_open(struct inode *i struct kallsym_iter *iter; int ret; - iter = kmalloc(sizeof(*iter), GFP_KERNEL); + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; reset_iter(iter, 0); @@ -411,7 +410,15 @@ static int __init kallsyms_init(void) { struct proc_dir_entry *entry; +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + entry = create_proc_entry("kallsyms", S_IFREG | S_IRUSR, NULL); +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + entry = create_proc_entry("kallsyms", S_IFREG | S_IRUSR | S_IRGRP, NULL); +#endif +#else entry = create_proc_entry("kallsyms", 0444, NULL); +#endif if (entry) entry->proc_fops = &kallsyms_operations; return 0; diff -urNp linux-2.6.17.11/kernel/kprobes.c linux-2.6.17.11/kernel/kprobes.c --- linux-2.6.17.11/kernel/kprobes.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/kprobes.c 2006-09-01 16:20:29.000000000 -0400 @@ -106,7 +106,7 @@ kprobe_opcode_t __kprobes *get_insn_slot * kernel image and loaded module images reside. This is required * so x86_64 can correctly handle the %rip-relative fixups. */ - kip->insns = module_alloc(PAGE_SIZE); + kip->insns = module_alloc_exec(PAGE_SIZE); if (!kip->insns) { kfree(kip); return NULL; diff -urNp linux-2.6.17.11/kernel/module.c linux-2.6.17.11/kernel/module.c --- linux-2.6.17.11/kernel/module.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/module.c 2006-09-01 16:20:29.000000000 -0400 @@ -40,10 +40,15 @@ #include #include #include +#include #include #include #include +#ifdef CONFIG_PAX_KERNEXEC +#include +#endif + #if 0 #define DEBUGP printk #else @@ -66,6 +71,8 @@ static LIST_HEAD(modules); static BLOCKING_NOTIFIER_HEAD(module_notify_list); +extern int gr_check_modstop(void); + int register_module_notifier(struct notifier_block * nb) { return blocking_notifier_chain_register(&module_notify_list, nb); @@ -594,6 +601,9 @@ sys_delete_module(const char __user *nam char name[MODULE_NAME_LEN]; int ret, forced = 0; + if (gr_check_modstop()) + return -EPERM; + if (!capable(CAP_SYS_MODULE)) return -EPERM; @@ -1058,13 +1068,15 @@ static void free_module(struct module *m module_unload_free(mod); /* This may be NULL, but that's OK */ - module_free(mod, mod->module_init); + module_free(mod, mod->module_init_rw); + module_free_exec(mod, mod->module_init_rx); kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); /* Finally, free the core (containing the module structure) */ - module_free(mod, mod->module_core); + module_free_exec(mod, mod->module_core_rx); + module_free(mod, mod->module_core_rw); } void *__symbol_get(const char *symbol) @@ -1221,11 +1233,14 @@ static void layout_sections(struct modul || strncmp(secstrings + s->sh_name, ".init", 5) == 0) continue; - s->sh_entsize = get_offset(&mod->core_size, s); + if ((s->sh_flags & SHF_WRITE) || !(s->sh_flags & SHF_ALLOC)) + s->sh_entsize = get_offset(&mod->core_size_rw, s); + else + s->sh_entsize = get_offset(&mod->core_size_rx, s); DEBUGP("\t%s\n", secstrings + s->sh_name); } if (m == 0) - mod->core_text_size = mod->core_size; + mod->core_size_rx = mod->core_size_rx; } DEBUGP("Init section allocation order:\n"); @@ -1239,12 +1254,15 @@ static void layout_sections(struct modul || strncmp(secstrings + s->sh_name, ".init", 5) != 0) continue; - s->sh_entsize = (get_offset(&mod->init_size, s) - | INIT_OFFSET_MASK); + if ((s->sh_flags & SHF_WRITE) || !(s->sh_flags & SHF_ALLOC)) + s->sh_entsize = get_offset(&mod->init_size_rw, s); + else + s->sh_entsize = get_offset(&mod->init_size_rx, s); + s->sh_entsize |= INIT_OFFSET_MASK; DEBUGP("\t%s\n", secstrings + s->sh_name); } if (m == 0) - mod->init_text_size = mod->init_size; + mod->init_size_rx = mod->init_size_rx; } } @@ -1419,6 +1437,10 @@ static struct module *load_module(void _ struct exception_table_entry *extable; mm_segment_t old_fs; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs); if (len < sizeof(*hdr)) @@ -1568,21 +1590,57 @@ static struct module *load_module(void _ layout_sections(mod, hdr, sechdrs, secstrings); /* Do the allocs. */ - ptr = module_alloc(mod->core_size); + ptr = module_alloc(mod->core_size_rw); if (!ptr) { err = -ENOMEM; goto free_percpu; } - memset(ptr, 0, mod->core_size); - mod->module_core = ptr; + memset(ptr, 0, mod->core_size_rw); + mod->module_core_rw = ptr; - ptr = module_alloc(mod->init_size); - if (!ptr && mod->init_size) { + ptr = module_alloc(mod->init_size_rw); + if (!ptr && mod->init_size_rw) { err = -ENOMEM; - goto free_core; + goto free_core_rw; } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; + memset(ptr, 0, mod->init_size_rw); + mod->module_init_rw = ptr; + + ptr = module_alloc_exec(mod->core_size_rx); + if (!ptr) { + err = -ENOMEM; + goto free_init_rw; + } + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + + memset(ptr, 0, mod->core_size_rx); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + mod->module_core_rx = ptr; + + ptr = module_alloc_exec(mod->init_size_rx); + if (!ptr && mod->init_size_rx) { + err = -ENOMEM; + goto free_core_rx; + } + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + + memset(ptr, 0, mod->init_size_rx); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + mod->module_init_rx = ptr; /* Transfer each section which specifies SHF_ALLOC */ DEBUGP("final section addresses:\n"); @@ -1592,17 +1650,44 @@ static struct module *load_module(void _ if (!(sechdrs[i].sh_flags & SHF_ALLOC)) continue; - if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) - dest = mod->module_init - + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); - else - dest = mod->module_core + sechdrs[i].sh_entsize; + if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) { + if ((sechdrs[i].sh_flags & SHF_WRITE) || !(sechdrs[i].sh_flags & SHF_ALLOC)) + dest = mod->module_init_rw + + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); + else + dest = mod->module_init_rx + + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); + } else { + if ((sechdrs[i].sh_flags & SHF_WRITE) || !(sechdrs[i].sh_flags & SHF_ALLOC)) + dest = mod->module_core_rw + sechdrs[i].sh_entsize; + else + dest = mod->module_core_rx + sechdrs[i].sh_entsize; + } + + if (sechdrs[i].sh_type != SHT_NOBITS) { - if (sechdrs[i].sh_type != SHT_NOBITS) - memcpy(dest, (void *)sechdrs[i].sh_addr, - sechdrs[i].sh_size); +#ifdef CONFIG_PAX_KERNEXEC + if (!(sechdrs[i].sh_flags & SHF_WRITE) && (sechdrs[i].sh_flags & SHF_ALLOC)) + pax_open_kernel(cr0); +#endif + + memcpy(dest, (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size); + +#ifdef CONFIG_PAX_KERNEXEC + if (!(sechdrs[i].sh_flags & SHF_WRITE) && (sechdrs[i].sh_flags & SHF_ALLOC)) + pax_close_kernel(cr0); +#endif + + } /* Update sh_addr to point to copy in image. */ - sechdrs[i].sh_addr = (unsigned long)dest; + +#ifdef CONFIG_PAX_KERNEXEC + if (sechdrs[i].sh_flags & SHF_EXECINSTR) + sechdrs[i].sh_addr = (unsigned long)dest - __KERNEL_TEXT_OFFSET; + else +#endif + + sechdrs[i].sh_addr = (unsigned long)dest; DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); } /* Module has been moved. */ @@ -1623,8 +1708,18 @@ static struct module *load_module(void _ setup_modinfo(mod, sechdrs, infoindex); /* Fix up syms, so that st_value is a pointer to location. */ + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, mod); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + if (err < 0) goto cleanup; @@ -1666,11 +1761,20 @@ static struct module *load_module(void _ if (!(sechdrs[info].sh_flags & SHF_ALLOC)) continue; +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + if (sechdrs[i].sh_type == SHT_REL) err = apply_relocate(sechdrs, strtab, symindex, i,mod); else if (sechdrs[i].sh_type == SHT_RELA) err = apply_relocate_add(sechdrs, strtab, symindex, i, mod); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + if (err < 0) goto cleanup; } @@ -1684,14 +1788,31 @@ static struct module *load_module(void _ /* Set up and sort exception table */ mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); mod->extable = extable = (void *)sechdrs[exindex].sh_addr; + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + sort_extable(extable, extable + mod->num_exentries); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + /* Finally, copy percpu area over. */ percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -1705,12 +1826,12 @@ static struct module *load_module(void _ * Do it before processing of module parameters, so the module * can provide parameter accessor functions of its own. */ - if (mod->module_init) - flush_icache_range((unsigned long)mod->module_init, - (unsigned long)mod->module_init - + mod->init_size); - flush_icache_range((unsigned long)mod->module_core, - (unsigned long)mod->module_core + mod->core_size); + if (mod->module_init_rx) + flush_icache_range((unsigned long)mod->module_init_rx, + (unsigned long)mod->module_init_rx + + mod->init_size_rx); + flush_icache_range((unsigned long)mod->module_core_rx, + (unsigned long)mod->module_core_rx + mod->core_size_rx); set_fs(old_fs); @@ -1748,9 +1869,13 @@ static struct module *load_module(void _ module_arch_cleanup(mod); cleanup: module_unload_free(mod); - module_free(mod, mod->module_init); - free_core: - module_free(mod, mod->module_core); + module_free_exec(mod, mod->module_init_rx); + free_core_rx: + module_free_exec(mod, mod->module_core_rx); + free_init_rw: + module_free(mod, mod->module_init_rw); + free_core_rw: + module_free(mod, mod->module_core_rw); free_percpu: if (percpu) percpu_modfree(percpu); @@ -1786,6 +1911,9 @@ sys_init_module(void __user *umod, struct module *mod; int ret = 0; + if (gr_check_modstop()) + return -EPERM; + /* Must have permission */ if (!capable(CAP_SYS_MODULE)) return -EPERM; @@ -1836,10 +1964,12 @@ sys_init_module(void __user *umod, mod->state = MODULE_STATE_LIVE; /* Drop initial reference. */ module_put(mod); - module_free(mod, mod->module_init); - mod->module_init = NULL; - mod->init_size = 0; - mod->init_text_size = 0; + module_free(mod, mod->module_init_rw); + module_free_exec(mod, mod->module_init_rx); + mod->module_init_rw = NULL; + mod->module_init_rx = NULL; + mod->init_size_rw = 0; + mod->init_size_rx = 0; mutex_unlock(&module_mutex); return 0; @@ -1870,10 +2000,14 @@ static const char *get_ksymbol(struct mo unsigned long nextval; /* At worse, next value is at end of module */ - if (within(addr, mod->module_init, mod->init_size)) - nextval = (unsigned long)mod->module_init+mod->init_text_size; - else - nextval = (unsigned long)mod->module_core+mod->core_text_size; + if (within(addr, mod->module_init_rx, mod->init_size_rx)) + nextval = (unsigned long)mod->module_init_rw; + else if (within(addr, mod->module_init_rw, mod->init_size_rw)) + nextval = (unsigned long)mod->module_core_rx; + else if (within(addr, mod->module_core_rx, mod->core_size_rx)) + nextval = (unsigned long)mod->module_core_rw; + else + nextval = (unsigned long)mod->module_core_rw+mod->core_size_rw; /* Scan for closest preceeding symbol, and next symbol. (ELF starts real symbols at 1). */ @@ -1914,8 +2048,10 @@ const char *module_address_lookup(unsign struct module *mod; list_for_each_entry(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) - || within(addr, mod->module_core, mod->core_size)) { + if (within(addr, mod->module_init_rx, mod->init_size_rx) + || within(addr, mod->module_init_rw, mod->init_size_rw) + || within(addr, mod->module_core_rx, mod->core_size_rx) + || within(addr, mod->module_core_rw, mod->core_size_rw)) { *modname = mod->name; return get_ksymbol(mod, addr, size, offset); } @@ -1926,7 +2062,7 @@ const char *module_address_lookup(unsign struct module *module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, - char namebuf[128]) + char namebuf[KSYM_NAME_LEN+1]) { struct module *mod; @@ -1937,7 +2073,7 @@ struct module *module_get_kallsym(unsign *type = mod->symtab[symnum].st_info; strncpy(namebuf, mod->strtab + mod->symtab[symnum].st_name, - 127); + KSYM_NAME_LEN); mutex_unlock(&module_mutex); return mod; } @@ -2014,7 +2150,7 @@ static int m_show(struct seq_file *m, vo { struct module *mod = list_entry(p, struct module, list); seq_printf(m, "%s %lu", - mod->name, mod->init_size + mod->core_size); + mod->name, mod->init_size_rx + mod->init_size_rw + mod->core_size_rx + mod->core_size_rw); print_unload_info(m, mod); /* Informative for users. */ @@ -2023,7 +2159,7 @@ static int m_show(struct seq_file *m, vo mod->state == MODULE_STATE_COMING ? "Loading": "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%p", mod->module_core); + seq_printf(m, " 0x%p 0x%p", mod->module_core_rx, mod->module_core_rw); seq_printf(m, "\n"); return 0; @@ -2072,8 +2208,8 @@ struct module *__module_text_address(uns struct module *mod; list_for_each_entry(mod, &modules, list) - if (within(addr, mod->module_init, mod->init_text_size) - || within(addr, mod->module_core, mod->core_text_size)) + if (within(addr, mod->module_init_rx, mod->init_size_rx) + || within(addr, mod->module_core_rx, mod->core_size_rx)) return mod; return NULL; } diff -urNp linux-2.6.17.11/kernel/pid.c linux-2.6.17.11/kernel/pid.c --- linux-2.6.17.11/kernel/pid.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/pid.c 2006-09-01 16:20:29.000000000 -0400 @@ -26,6 +26,7 @@ #include #include #include +#include #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) static struct hlist_head *pid_hash; @@ -90,7 +91,9 @@ static int alloc_pidmap(void) int i, offset, max_scan, pid, last = last_pid; pidmap_t *map; - pid = last + 1; + pid = gr_random_pid(); + if (!pid) + pid = last_pid + 1; if (pid >= pid_max) pid = RESERVED_PIDS; offset = pid & BITS_PER_PAGE_MASK; @@ -269,7 +272,14 @@ struct task_struct * fastcall pid_task(s */ task_t *find_task_by_pid_type(int type, int nr) { - return pid_task(find_pid(nr), type); + task_t *task; + + task = pid_task(find_pid(nr), type); + + if (gr_pid_is_chrooted(task)) + return NULL; + + return task; } EXPORT_SYMBOL(find_task_by_pid_type); diff -urNp linux-2.6.17.11/kernel/posix-cpu-timers.c linux-2.6.17.11/kernel/posix-cpu-timers.c --- linux-2.6.17.11/kernel/posix-cpu-timers.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/posix-cpu-timers.c 2006-09-01 16:20:29.000000000 -0400 @@ -7,6 +7,7 @@ #include #include #include +#include static int check_clock(const clockid_t which_clock) { @@ -1126,6 +1127,7 @@ static void check_process_timers(struct __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } + gr_learn_resource(tsk, RLIMIT_CPU, psecs, 1); if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) { /* * At the soft limit, send a SIGXCPU every second. diff -urNp linux-2.6.17.11/kernel/printk.c linux-2.6.17.11/kernel/printk.c --- linux-2.6.17.11/kernel/printk.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/printk.c 2006-09-01 16:20:29.000000000 -0400 @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -187,6 +188,11 @@ int do_syslog(int type, char __user *buf char c; int error; +#ifdef CONFIG_GRKERNSEC_DMESG + if (grsec_enable_dmesg && !capable(CAP_SYS_ADMIN)) + return -EPERM; +#endif + error = security_syslog(type); if (error) return error; diff -urNp linux-2.6.17.11/kernel/ptrace.c linux-2.6.17.11/kernel/ptrace.c --- linux-2.6.17.11/kernel/ptrace.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/ptrace.c 2006-09-01 16:20:29.000000000 -0400 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -128,10 +129,10 @@ static int may_attach(struct task_struct (current->uid != task->uid) || (current->gid != task->egid) || (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + (current->gid != task->gid)) && !capable_nolog(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + if (!task->mm->dumpable && !capable_nolog(CAP_SYS_PTRACE)) return -EPERM; if (!vx_check(task->xid, VX_ADMIN_P|VX_IDENT)) return -EPERM; @@ -525,6 +526,11 @@ asmlinkage long sys_ptrace(long request, if (ret < 0) goto out_put_task_struct; + if (gr_handle_ptrace(child, request)) { + ret = -EPERM; + goto out_put_task_struct; + } + ret = arch_ptrace(child, request, addr, data); if (ret < 0) goto out_put_task_struct; diff -urNp linux-2.6.17.11/kernel/resource.c linux-2.6.17.11/kernel/resource.c --- linux-2.6.17.11/kernel/resource.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/resource.c 2006-09-01 16:20:29.000000000 -0400 @@ -136,10 +136,27 @@ static int __init ioresources_init(void) { struct proc_dir_entry *entry; +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + entry = create_proc_entry("ioports", S_IRUSR, NULL); +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + entry = create_proc_entry("ioports", S_IRUSR | S_IRGRP, NULL); +#endif +#else entry = create_proc_entry("ioports", 0, NULL); +#endif if (entry) entry->proc_fops = &proc_ioports_operations; + +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + entry = create_proc_entry("iomem", S_IRUSR, NULL); +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + entry = create_proc_entry("iomem", S_IRUSR | S_IRGRP, NULL); +#endif +#else entry = create_proc_entry("iomem", 0, NULL); +#endif if (entry) entry->proc_fops = &proc_iomem_operations; return 0; diff -urNp linux-2.6.17.11/kernel/sched.c linux-2.6.17.11/kernel/sched.c --- linux-2.6.17.11/kernel/sched.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/sched.c 2006-09-01 16:20:29.000000000 -0400 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -3636,7 +3637,8 @@ asmlinkage long sys_nice(int increment) if (nice > 19) nice = 19; - if (increment < 0 && !can_nice(current, nice)) + if (increment < 0 && (!can_nice(current, nice) || + gr_handle_chroot_nice())) return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM; retval = security_task_setnice(current, nice); diff -urNp linux-2.6.17.11/kernel/signal.c linux-2.6.17.11/kernel/signal.c --- linux-2.6.17.11/kernel/signal.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/signal.c 2006-09-01 16:20:29.000000000 -0400 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -583,11 +583,11 @@ static int check_kill_permission(int sig (!is_si_special(info) && SI_FROMUSER(info))); error = -EPERM; - if (user && ((sig != SIGCONT) || + if (user && ((((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) - && !capable(CAP_KILL)) + && !capable(CAP_KILL)) || gr_handle_signal(t, sig))) return error; error = -ESRCH; @@ -595,8 +595,10 @@ static int check_kill_permission(int sig return error; error = security_task_kill(t, info, sig); - if (!error) + if (!error) { audit_signal_info(sig, t); /* Let audit system see the signal */ + gr_log_signal(sig, t); + } return error; } @@ -774,7 +777,7 @@ out_set: (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) -static int +int specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) { int ret = 0; @@ -819,6 +822,10 @@ force_sig_info(int sig, struct siginfo * } recalc_sigpending_tsk(t); ret = specific_send_sig_info(sig, info, t); + + gr_log_signal(sig, t); + gr_handle_crash(t, sig); + spin_unlock_irqrestore(&t->sighand->siglock, flags); return ret; diff -urNp linux-2.6.17.11/kernel/sys.c linux-2.6.17.11/kernel/sys.c --- linux-2.6.17.11/kernel/sys.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/sys.c 2006-09-01 16:20:29.000000000 -0400 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -448,6 +449,12 @@ static int set_one_prio(struct task_stru error = -EACCES; goto out; } + + if (gr_handle_chroot_setpriority(p, niceval)) { + error = -EACCES; + goto out; + } + no_nice = security_task_setnice(p, niceval); if (no_nice) { error = no_nice; @@ -845,6 +852,9 @@ asmlinkage long sys_setregid(gid_t rgid, if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && egid != old_rgid)) current->sgid = new_egid; + + gr_set_role_label(current, current->uid, new_rgid); + current->fsgid = new_egid; current->egid = new_egid; current->gid = new_rgid; @@ -874,6 +884,9 @@ asmlinkage long sys_setgid(gid_t gid) current->mm->dumpable = suid_dumpable; smp_wmb(); } + + gr_set_role_label(current, current->uid, gid); + current->gid = current->egid = current->sgid = current->fsgid = gid; } else if ((gid == current->gid) || (gid == current->sgid)) @@ -915,6 +928,9 @@ static int set_user(uid_t new_ruid, int current->mm->dumpable = suid_dumpable; smp_wmb(); } + + gr_set_role_label(current, new_ruid, current->gid); + current->uid = new_ruid; return 0; } @@ -1018,6 +1034,9 @@ asmlinkage long sys_setuid(uid_t uid) } else if ((uid != current->uid) && (uid != new_suid)) return -EPERM; + if (gr_check_crash_uid(uid)) + return -EPERM; + if (old_euid != uid) { current->mm->dumpable = suid_dumpable; @@ -1123,8 +1142,10 @@ asmlinkage long sys_setresgid(gid_t rgid current->egid = egid; } current->fsgid = current->egid; - if (rgid != (gid_t) -1) + if (rgid != (gid_t) -1) { + gr_set_role_label(current, current->uid, rgid); current->gid = rgid; + } if (sgid != (gid_t) -1) current->sgid = sgid; diff -urNp linux-2.6.17.11/kernel/sysctl.c linux-2.6.17.11/kernel/sysctl.c --- linux-2.6.17.11/kernel/sysctl.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/sysctl.c 2006-09-01 16:20:29.000000000 -0400 @@ -54,6 +54,14 @@ extern int proc_nr_files(ctl_table *tabl void __user *buffer, size_t *lenp, loff_t *ppos); #if defined(CONFIG_SYSCTL) +#include +#include + +extern __u32 gr_handle_sysctl(const ctl_table *table, const void *oldval, + const void *newval); +extern int gr_handle_sysctl_mod(const char *dirname, const char *name, + const int op); +extern int gr_handle_chroot_sysctl(const int op); /* External variables not in a header file. */ extern int C_A_D; @@ -157,6 +165,22 @@ extern ctl_table inotify_table[]; #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT int sysctl_legacy_va_layout; #endif +extern ctl_table grsecurity_table[]; + +#ifdef CONFIG_PAX_SOFTMODE +static ctl_table pax_table[] = { + { + .ctl_name = PAX_SOFTMODE, + .procname = "softmode", + .data = &pax_softmode, + .maxlen = sizeof(unsigned int), + .mode = 0600, + .proc_handler = &proc_dointvec, + }, + + { .ctl_name = 0 } +}; +#endif /* /proc declarations: */ @@ -683,6 +707,24 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif + +#if defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_MODSTOP) + { + .ctl_name = KERN_GRSECURITY, + .procname = "grsecurity", + .mode = 0500, + .child = grsecurity_table, + }, +#endif + +#ifdef CONFIG_PAX_SOFTMODE + { + .ctl_name = KERN_PAX, + .procname = "pax", + .mode = 0500, + .child = pax_table, + }, +#endif { .ctl_name = 0 } }; @@ -1180,6 +1222,10 @@ static int test_perm(int mode, int op) static inline int ctl_perm(ctl_table *table, int op) { int error; + if (table->de && gr_handle_sysctl_mod(table->de->parent->name, table->de->name, op)) + return -EACCES; + if (gr_handle_chroot_sysctl(op)) + return -EACCES; error = security_sysctl(table, op); if (error) return error; @@ -1216,6 +1262,10 @@ repeat: table = table->child; goto repeat; } + + if (!gr_handle_sysctl(table, oldval, newval)) + return -EPERM; + error = do_sysctl_strategy(table, name, nlen, oldval, oldlenp, newval, newlen, context); diff -urNp linux-2.6.17.11/kernel/time.c linux-2.6.17.11/kernel/time.c --- linux-2.6.17.11/kernel/time.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/kernel/time.c 2006-09-01 16:20:29.000000000 -0400 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -93,6 +94,9 @@ asmlinkage long sys_stime(time_t __user return err; vx_settimeofday(&tv); + + gr_log_timechange(); + return 0; } @@ -199,6 +203,8 @@ asmlinkage long sys_settimeofday(struct return -EFAULT; } + gr_log_timechange(); + return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } diff -urNp linux-2.6.17.11/Makefile linux-2.6.17.11/Makefile --- linux-2.6.17.11/Makefile 2006-08-23 19:30:00.000000000 -0400 +++ linux-2.6.17.11/Makefile 2006-09-01 16:20:29.000000000 -0400 @@ -518,7 +518,7 @@ export MODLIB ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff -urNp linux-2.6.17.11/mm/filemap.c linux-2.6.17.11/mm/filemap.c --- linux-2.6.17.11/mm/filemap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/filemap.c 2006-09-01 16:20:29.000000000 -0400 @@ -30,6 +30,7 @@ #include #include #include +#include #include "filemap.h" #include "internal.h" @@ -1650,7 +1651,13 @@ int generic_file_mmap(struct file * file struct address_space *mapping = file->f_mapping; if (!mapping->a_ops->readpage) - return -ENOEXEC; + return -ENODEV; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if ((vma->vm_mm->pax_flags & MF_PAX_PAGEEXEC) && !(vma->vm_flags & VM_EXEC)) + vma->vm_page_prot = __pgprot(pte_val(pte_exprotect(__pte(pgprot_val(vma->vm_page_prot))))); +#endif + file_accessed(file); vma->vm_ops = &generic_file_vm_ops; return 0; @@ -1872,6 +1879,7 @@ inline int generic_write_checks(struct f *pos = i_size_read(inode); if (limit != RLIM_INFINITY) { + gr_learn_resource(current, RLIMIT_FSIZE,*pos, 0); if (*pos >= limit) { send_sig(SIGXFSZ, current, 0); return -EFBIG; diff -urNp linux-2.6.17.11/mm/madvise.c linux-2.6.17.11/mm/madvise.c --- linux-2.6.17.11/mm/madvise.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/madvise.c 2006-09-01 16:20:29.000000000 -0400 @@ -15,9 +15,46 @@ * We can potentially split a vm area into separate * areas, each area with its own behavior. */ + +#ifdef CONFIG_PAX_SEGMEXEC +static long __madvise_behavior(struct vm_area_struct * vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior); + +static long madvise_behavior(struct vm_area_struct * vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior) +{ + if (vma->vm_flags & VM_MIRROR) { + struct vm_area_struct * vma_m, * prev_m; + unsigned long start_m, end_m; + int error; + + start_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma_prev(vma->vm_mm, start_m, &prev_m); + if (vma_m && vma_m->vm_start == start_m && (vma_m->vm_flags & VM_MIRROR)) { + start_m = start + vma->vm_mirror; + end_m = end + vma->vm_mirror; + error = __madvise_behavior(vma_m, &prev_m, start_m, end_m, behavior); + if (error) + return error; + } else { + printk("PAX: VMMIRROR: madvise bug in %s, %08lx\n", current->comm, vma->vm_start); + return -ENOMEM; + } + } + + return __madvise_behavior(vma, prev, start, end, behavior); +} + +static long __madvise_behavior(struct vm_area_struct * vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior) +#else static long madvise_behavior(struct vm_area_struct * vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) +#endif { struct mm_struct * mm = vma->vm_mm; int error = 0; diff -urNp linux-2.6.17.11/mm/memory.c linux-2.6.17.11/mm/memory.c --- linux-2.6.17.11/mm/memory.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/memory.c 2006-09-01 16:20:29.000000000 -0400 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -320,6 +321,11 @@ int __pte_alloc(struct mm_struct *mm, pm int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) { + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + pte_t *new = pte_alloc_one_kernel(&init_mm, address); if (!new) return -ENOMEM; @@ -327,8 +333,19 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig spin_lock(&init_mm.page_table_lock); if (pmd_present(*pmd)) /* Another has populated it */ pte_free_kernel(new); - else + else { + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + pmd_populate_kernel(&init_mm, pmd, new); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + } spin_unlock(&init_mm.page_table_lock); return 0; } @@ -1430,6 +1447,88 @@ static inline void cow_user_page(struct copy_user_highpage(dst, src, va); } +#ifdef CONFIG_PAX_SEGMEXEC +/* PaX: if vma is mirrored, synchronize the mirror's PTE + * + * the ptl of the lower mapped page is held on entry and is not released on exit + * or inside to ensure atomic changes to the PTE states (swapout, mremap, munmap, etc) + */ +static void pax_mirror_fault(struct vm_area_struct *vma, unsigned long address, pte_t *pte) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address_m, pfn_m; + struct vm_area_struct * vma_m = NULL; + pte_t * pte_m, entry_m; + struct page * page_m = NULL; + + address_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma(mm, address_m); + BUG_ON(!vma_m || vma_m->vm_start != address_m); + + address_m = address + vma->vm_mirror; + pte_m = pte_offset_map_nested(pmd_offset(pud_offset(pgd_offset(mm, address_m), address_m), address_m), address_m); + + if (pte_same(*pte, *pte_m)) { + pte_unmap_nested(pte_m); + return; + } + + pfn_m = pte_pfn(*pte); + if (pte_present(*pte_m)) { + page_m = vm_normal_page(vma_m, address_m, *pte_m); + if (page_m) { + flush_cache_page(vma_m, address_m, pfn_m); + flush_icache_page(vma_m, page_m); + } + } + + if (pte_present(*pte_m)) + entry_m = ptep_clear_flush(vma_m, address_m, pte_m); + else + entry_m = ptep_get_and_clear(mm, address_m, pte_m); + + if (pte_none(entry_m)) { + } else if (pte_present(entry_m)) { + if (page_m) { + page_remove_rmap(page_m); + if (PageAnon(page_m)) + dec_mm_counter(mm, anon_rss); + else + dec_mm_counter(mm, file_rss); + page_cache_release(page_m); + } + } else if (!pte_file(entry_m)) { + free_swap_and_cache(pte_to_swp_entry(entry_m)); + } else { + printk(KERN_ERR "PAX: VMMIRROR: bug in mirror_fault: %08lx, %08lx, %08lx, %08lx\n", + address, vma->vm_start, address_m, vma_m->vm_start); + } + + page_m = vm_normal_page(vma, address, *pte); + entry_m = pfn_pte(pfn_m, vma_m->vm_page_prot); + if (pte_write(*pte)) + entry_m = maybe_mkwrite(pte_mkdirty(entry_m), vma_m); + if (page_m) { + page_cache_get(page_m); + /* + * we can test PAGE_MAPPING_ANON without holding page_map_lock because + * we hold the page table lock and have a reference to page_m + */ + if (PageAnon(page_m)) { + page_add_anon_rmap(page_m, vma_m, address_m); + inc_mm_counter(mm, anon_rss); + } else { + page_add_file_rmap(page_m); + inc_mm_counter(mm, file_rss); + } + } + set_pte_at(mm, address_m, pte_m, entry_m); + update_mmu_cache(vma_m, address_m, entry_m); + lazy_mmu_prot_update(entry_m); + pte_unmap_nested(pte_m); +} +#endif + /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address @@ -1520,6 +1619,12 @@ gotten: /* Free the old page.. */ new_page = old_page; ret |= VM_FAULT_WRITE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + pax_mirror_fault(vma, address, page_table); +#endif + } if (new_page) page_cache_release(new_page); @@ -1770,6 +1875,7 @@ int vmtruncate(struct inode * inode, lof do_expand: limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + gr_learn_resource(current, RLIMIT_FSIZE, offset, 1); if (limit != RLIM_INFINITY && offset > limit) goto out_sig; if (offset > inode->i_sb->s_maxbytes) @@ -1963,6 +2069,12 @@ again: /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); lazy_mmu_prot_update(pte); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + pax_mirror_fault(vma, address, page_table); +#endif + unlock: pte_unmap_unlock(page_table, ptl); out: @@ -2027,6 +2139,12 @@ static int do_anonymous_page(struct mm_s /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + pax_mirror_fault(vma, address, page_table); +#endif + unlock: pte_unmap_unlock(page_table, ptl); return VM_FAULT_MINOR; @@ -2155,6 +2273,12 @@ retry: /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + pax_mirror_fault(vma, address, page_table); +#endif + unlock: pte_unmap_unlock(page_table, ptl); return ret; @@ -2278,6 +2402,12 @@ static inline int handle_pte_fault(struc flush_tlb_page(vma, address); } unlock: + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + pax_mirror_fault(vma, address, pte); +#endif + pte_unmap_unlock(pte, ptl); ret = VM_FAULT_MINOR; out: @@ -2303,6 +2433,49 @@ int __handle_mm_fault(struct mm_struct * if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, write_access); +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) { + unsigned long address_m; + struct vm_area_struct * vma_m; + pgd_t *pgd_m; + pud_t *pud_m; + pmd_t *pmd_m; + + address_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma(mm, address_m); + + /* PaX: sanity checks */ + if (!vma_m) { + printk(KERN_ERR "PAX: VMMIRROR: fault bug, %08lx, %p, %08lx, %p\n", + address, vma, address_m, vma_m); + return VM_FAULT_SIGBUS; + } else if (!(vma_m->vm_flags & VM_MIRROR) || + vma_m->vm_start != address_m || + vma->vm_end - vma->vm_start != vma_m->vm_end - vma_m->vm_start) + { + printk(KERN_ERR "PAX: VMMIRROR: fault bug2, %08lx, %08lx, %08lx, %08lx, %08lx\n", + address, vma->vm_start, vma_m->vm_start, vma->vm_end, vma_m->vm_end); + return VM_FAULT_SIGBUS; + } + + if (address_m < address) { + address += vma->vm_mirror; + vma = vma_m; + } + + address_m = address + vma->vm_mirror; + pgd_m = pgd_offset(mm, address_m); + pud_m = pud_alloc(mm, pgd_m, address_m); + if (!pud_m) + return VM_FAULT_OOM; + pmd_m = pmd_alloc(mm, pud_m, address_m); + if (!pmd_m) + return VM_FAULT_OOM; + if (!pmd_present(*pmd_m) && __pte_alloc(mm, pmd_m, address_m)) + return VM_FAULT_OOM; + } +#endif + pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address); if (!pud) diff -urNp linux-2.6.17.11/mm/mempolicy.c linux-2.6.17.11/mm/mempolicy.c --- linux-2.6.17.11/mm/mempolicy.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/mempolicy.c 2006-09-01 16:20:29.000000000 -0400 @@ -346,6 +346,12 @@ check_range(struct mm_struct *mm, unsign if (prev && prev->vm_end < vma->vm_start) return ERR_PTR(-EFAULT); } + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + return ERR_PTR(-EFAULT); +#endif + if (!is_vm_hugetlb_page(vma) && ((flags & MPOL_MF_STRICT) || ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && diff -urNp linux-2.6.17.11/mm/mlock.c linux-2.6.17.11/mm/mlock.c --- linux-2.6.17.11/mm/mlock.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/mlock.c 2006-09-01 16:20:29.000000000 -0400 @@ -10,14 +10,85 @@ #include #include #include +#include +static int __mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, + unsigned long start, unsigned long end, unsigned int newflags); static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned int newflags) { struct mm_struct * mm = vma->vm_mm; - pgoff_t pgoff; int pages; + int ret; + +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct * vma_m = NULL, *prev_m; + unsigned long start_m = 0UL, end_m = 0UL, newflags_m = 0UL; + + if (vma->vm_flags & VM_MIRROR) { + start_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma_prev(mm, start_m, &prev_m); + if (!vma_m || vma_m->vm_start != start_m || !(vma_m->vm_flags & VM_MIRROR)) { + printk("PAX: VMMIRROR: mlock bug in %s, %08lx\n", current->comm, vma->vm_start); + return -ENOMEM; + } + + start_m = start + vma->vm_mirror; + end_m = end + vma->vm_mirror; + if (newflags & VM_LOCKED) + newflags_m = vma_m->vm_flags | VM_LOCKED; + else + newflags_m = vma_m->vm_flags & ~VM_LOCKED; + ret = __mlock_fixup(vma_m, &prev_m, start_m, end_m, newflags_m); + if (ret) + return ret; + } +#endif + + ret = __mlock_fixup(vma, prev, start, end, newflags); + if (ret) + return ret; + + /* + * vm_flags is protected by the mmap_sem held in write mode. + * It's okay if try_to_unmap_one unmaps a page just after we + * set VM_LOCKED, make_pages_present below will bring it back. + */ + vma->vm_flags = newflags; + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + vma_m->vm_flags = newflags_m; +#endif + + /* + * Keep track of amount of locked VM. + */ + pages = (end - start) >> PAGE_SHIFT; + if (newflags & VM_LOCKED) { + pages = -pages; + if (!(newflags & VM_IO)) + ret = make_pages_present(start, end); + } + + mm->locked_vm -= pages; + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) + mm->locked_vm -= pages; +#endif + + if (ret == -ENOMEM) + ret = -EAGAIN; + return ret; +} + +static int __mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, + unsigned long start, unsigned long end, unsigned int newflags) +{ + struct mm_struct * mm = vma->vm_mm; + pgoff_t pgoff; int ret = 0; if (newflags == vma->vm_flags) { @@ -30,7 +101,7 @@ static int mlock_fixup(struct vm_area_st vma->vm_file, pgoff, vma_policy(vma)); if (*prev) { vma = *prev; - goto success; + goto out; } *prev = vma; @@ -41,31 +112,9 @@ static int mlock_fixup(struct vm_area_st goto out; } - if (end != vma->vm_end) { + if (end != vma->vm_end) ret = split_vma(mm, vma, end, 0); - if (ret) - goto out; - } -success: - /* - * vm_flags is protected by the mmap_sem held in write mode. - * It's okay if try_to_unmap_one unmaps a page just after we - * set VM_LOCKED, make_pages_present below will bring it back. - */ - vma->vm_flags = newflags; - - /* - * Keep track of amount of locked VM. - */ - pages = (end - start) >> PAGE_SHIFT; - if (newflags & VM_LOCKED) { - pages = -pages; - if (!(newflags & VM_IO)) - ret = make_pages_present(start, end); - } - - vx_vmlocked_sub(vma->vm_mm, pages); out: if (ret == -ENOMEM) ret = -EAGAIN; @@ -84,6 +133,17 @@ static int do_mlock(unsigned long start, return -EINVAL; if (end == start) return 0; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { + if (end > SEGMEXEC_TASK_SIZE) + return -EINVAL; + } else +#endif + + if (end > TASK_SIZE) + return -EINVAL; + vma = find_vma_prev(current->mm, start, &prev); if (!vma || vma->vm_start > start) return -ENOMEM; @@ -141,6 +201,7 @@ asmlinkage long sys_mlock(unsigned long lock_limit >>= PAGE_SHIFT; /* check against resource limits */ + gr_learn_resource(current, RLIMIT_MEMLOCK, (current->mm->locked_vm << PAGE_SHIFT) + len, 1); if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); out: @@ -173,6 +234,16 @@ static int do_mlockall(int flags) for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { unsigned int newflags; +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { + if (vma->vm_end > SEGMEXEC_TASK_SIZE) + break; + } else +#endif + + if (vma->vm_end > TASK_SIZE) + break; + newflags = vma->vm_flags | VM_LOCKED; if (!(flags & MCL_CURRENT)) newflags &= ~VM_LOCKED; @@ -202,6 +273,7 @@ asmlinkage long sys_mlockall(int flags) ret = -ENOMEM; if (!vx_vmlocked_avail(current->mm, current->mm->total_vm)) goto out; + gr_learn_resource(current, RLIMIT_MEMLOCK, current->mm->total_vm, 1); if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); diff -urNp linux-2.6.17.11/mm/mmap.c linux-2.6.17.11/mm/mmap.c --- linux-2.6.17.11/mm/mmap.c 2006-08-23 19:30:01.000000000 -0400 +++ linux-2.6.17.11/mm/mmap.c 2006-09-01 16:20:29.000000000 -0400 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -240,6 +241,7 @@ asmlinkage unsigned long sys_brk(unsigne * not page aligned -Ram Gupta */ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + gr_learn_resource(current, RLIMIT_DATA, brk - mm->start_data, 1); if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) goto out; @@ -628,11 +630,17 @@ again: remove_next = 1 + (end > next-> * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. */ +#ifdef CONFIG_PAX_SEGMEXEC +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP | VM_MIRROR) +#else #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +#endif static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { + if ((vma->vm_flags | vm_flags) & VM_SPECIAL) + return 0; if (vma->vm_flags != vm_flags) return 0; if (vma->vm_file != file) @@ -857,14 +865,11 @@ none: void vm_stat_account(struct mm_struct *mm, unsigned long flags, struct file *file, long pages) { - const unsigned long stack_flags - = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); - if (file) { mm->shared_vm += pages; if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) mm->exec_vm += pages; - } else if (flags & stack_flags) + } else if (flags & (VM_GROWSUP|VM_GROWSDOWN)) mm->stack_vm += pages; if (flags & (VM_RESERVED|VM_IO)) mm->reserved_vm += pages; @@ -875,10 +880,55 @@ void vm_stat_account(struct mm_struct *m * The caller must hold down_write(current->mm->mmap_sem). */ +#ifdef CONFIG_PAX_SEGMEXEC +static unsigned long __do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff); + unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { + unsigned long ret = -EINVAL; + + if (flags & MAP_MIRROR) + return ret; + + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && + (len > SEGMEXEC_TASK_SIZE || (addr > SEGMEXEC_TASK_SIZE-len))) + return ret; + + ret = __do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && ret < TASK_SIZE && ((flags & MAP_TYPE) == MAP_PRIVATE) + +#ifdef CONFIG_PAX_MPROTECT + && (!(current->mm->pax_flags & MF_PAX_MPROTECT) || ((prot & PROT_EXEC) && file && !(prot & PROT_WRITE))) +#endif + + ) + { + unsigned long ret_m; + prot = prot & PROT_EXEC ? prot & ~PROT_WRITE : PROT_NONE; + ret_m = __do_mmap_pgoff(NULL, ret + SEGMEXEC_TASK_SIZE, 0UL, prot, flags | MAP_MIRROR | MAP_FIXED, ret); + if (ret_m >= TASK_SIZE) { + do_munmap(current->mm, ret, len); + ret = ret_m; + } + } + + return ret; +} + +static unsigned long __do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff) +#else +unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff) +#endif +{ struct mm_struct * mm = current->mm; struct vm_area_struct * vma, * prev; struct inode *inode; @@ -889,6 +939,28 @@ unsigned long do_mmap_pgoff(struct file int accountable = 1; unsigned long charged = 0, reqprot = prot; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct * vma_m = NULL; + + if (flags & MAP_MIRROR) { + /* PaX: sanity checks, to be removed when proved to be stable */ + if (file || len || ((flags & MAP_TYPE) != MAP_PRIVATE)) + return -EINVAL; + + vma_m = find_vma(mm, pgoff); + + if (!vma_m || is_vm_hugetlb_page(vma_m) || + vma_m->vm_start != pgoff || + (vma_m->vm_flags & VM_SPECIAL) || + (prot & PROT_WRITE)) + return -EINVAL; + + file = vma_m->vm_file; + pgoff = vma_m->vm_pgoff; + len = vma_m->vm_end - vma_m->vm_start; + } +#endif + if (file) { if (is_file_hugepages(file)) accountable = 0; @@ -906,7 +978,7 @@ unsigned long do_mmap_pgoff(struct file * (the exception is when the underlying filesystem is noexec * mounted, in which case we dont add PROT_EXEC.) */ - if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) + if ((prot & (PROT_READ | PROT_WRITE)) && (current->personality & READ_IMPLIES_EXEC)) if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))) prot |= PROT_EXEC; @@ -933,7 +1005,7 @@ unsigned long do_mmap_pgoff(struct file /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ - addr = get_unmapped_area(file, addr, len, pgoff, flags); + addr = get_unmapped_area(file, addr, len, pgoff, flags | ((prot & PROT_EXEC) ? MAP_EXECUTABLE : 0)); if (addr & ~PAGE_MASK) return addr; @@ -944,6 +1016,24 @@ unsigned long do_mmap_pgoff(struct file vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + if (file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)) + vm_flags &= ~VM_MAYEXEC; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) { + if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC) + vm_flags &= ~(VM_EXEC | VM_MAYEXEC); + else + vm_flags &= ~(VM_WRITE | VM_MAYWRITE); + } +#endif + + } +#endif + if (flags & MAP_LOCKED) { if (!can_do_mlock()) return -EPERM; @@ -956,6 +1046,7 @@ unsigned long do_mmap_pgoff(struct file locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; + gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1003,6 +1094,11 @@ unsigned long do_mmap_pgoff(struct file /* * Set pgoff according to addr for anon_vma. */ + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(flags & MAP_MIRROR)) +#endif + pgoff = addr >> PAGE_SHIFT; break; default: @@ -1014,14 +1110,17 @@ unsigned long do_mmap_pgoff(struct file if (error) return error; + if (!gr_acl_handle_mmap(file, prot)) + return -EACCES; + /* Clear old maps */ error = -ENOMEM; -munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { if (do_munmap(mm, addr, len)) return -ENOMEM; - goto munmap_back; + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + BUG_ON(vma && vma->vm_start < addr + len); } /* Check against address space limit. */ @@ -1069,7 +1168,14 @@ munmap_back: vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; - vma->vm_page_prot = protection_map[vm_flags & 0x0f]; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if ((file || !(mm->pax_flags & MF_PAX_PAGEEXEC)) && (vm_flags & (VM_READ|VM_WRITE))) + vma->vm_page_prot = protection_map[(vm_flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + else +#endif + + vma->vm_page_prot = protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; vma->vm_pgoff = pgoff; if (file) { @@ -1093,6 +1199,14 @@ munmap_back: goto free_vma; } +#ifdef CONFIG_PAX_SEGMEXEC + if (flags & MAP_MIRROR) { + vma_m->vm_flags |= VM_MIRROR; + vma_m->vm_mirror = vma->vm_start - vma_m->vm_start; + vma->vm_mirror = vma_m->vm_start - vma->vm_start; + } +#endif + /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) * that memory reservation must be checked; but that reservation @@ -1128,6 +1242,7 @@ munmap_back: out: vx_vmpages_add(mm, len >> PAGE_SHIFT); vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); + track_exec_limit(mm, addr, addr + len, vm_flags); if (vm_flags & VM_LOCKED) { vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); @@ -1182,6 +1297,10 @@ arch_get_unmapped_area(struct file *filp if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); @@ -1192,7 +1311,7 @@ arch_get_unmapped_area(struct file *filp if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -1204,9 +1323,8 @@ full_search: * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - addr = TASK_UNMAPPED_BASE; - start_addr = addr; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } @@ -1231,7 +1349,7 @@ void arch_unmap_area(struct mm_struct *m /* * Is this a new hole at the lowest possible address? */ - if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) { + if (addr >= mm->mmap_base && addr < mm->free_area_cache) { mm->free_area_cache = addr; mm->cached_hole_size = ~0UL; } @@ -1249,12 +1367,16 @@ arch_get_unmapped_area_topdown(struct fi { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long base = mm->mmap_base, addr = addr0; /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); @@ -1312,13 +1434,21 @@ bottomup: * can happen with large stack limits and large mmap() * allocations. */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ - mm->free_area_cache = mm->mmap_base; + mm->mmap_base = base; + mm->free_area_cache = base; mm->cached_hole_size = ~0UL; return addr; @@ -1334,8 +1464,10 @@ void arch_unmap_area_topdown(struct mm_s mm->free_area_cache = addr; /* dont allow allocations above current base */ - if (mm->free_area_cache > mm->mmap_base) + if (mm->free_area_cache > mm->mmap_base) { mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + } } unsigned long @@ -1468,6 +1600,7 @@ static int acct_stack_growth(struct vm_a return -ENOMEM; /* Stack limit test */ + gr_learn_resource(current, RLIMIT_STACK, size, 1); if (size > rlim[RLIMIT_STACK].rlim_cur) return -ENOMEM; @@ -1477,6 +1610,7 @@ static int acct_stack_growth(struct vm_a unsigned long limit; locked = mm->locked_vm + grow; limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); if (locked > limit && !capable(CAP_IPC_LOCK)) return -ENOMEM; } @@ -1594,13 +1728,49 @@ int expand_stack(struct vm_area_struct * if (address < vma->vm_start) { unsigned long size, grow; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct *vma_m = NULL; + unsigned long address_m = 0UL; + + if (vma->vm_flags & VM_MIRROR) { + address_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma(vma->vm_mm, address_m); + if (!vma_m || vma_m->vm_start != address_m || + !(vma_m->vm_flags & VM_MIRROR) || + vma->vm_end - vma->vm_start != + vma_m->vm_end - vma_m->vm_start || + vma->anon_vma != vma_m->anon_vma) { + printk(KERN_ERR "PAX: VMMIRROR: expand bug, %08lx, %08lx, %08lx, %08lx, %08lx\n", + address, vma->vm_start, vma_m->vm_start, vma->vm_end, vma_m->vm_end); + anon_vma_unlock(vma); + return -EFAULT; + } + address_m = address + vma->vm_mirror; + } +#endif + size = vma->vm_end - address; grow = (vma->vm_start - address) >> PAGE_SHIFT; +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) + error = acct_stack_growth(vma, size, 2*grow); + else +#endif + error = acct_stack_growth(vma, size, grow); if (!error) { vma->vm_start = address; vma->vm_pgoff -= grow; + track_exec_limit(vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_flags); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) { + vma_m->vm_start = address_m; + vma_m->vm_pgoff -= grow; + } +#endif + } } anon_vma_unlock(vma); @@ -1762,7 +1932,24 @@ int split_vma(struct mm_struct * mm, str * work. This now handles partial unmappings. * Jeremy Fitzhardinge */ +#ifdef CONFIG_PAX_SEGMEXEC +static int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len); + +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +{ + if (mm->pax_flags & MF_PAX_SEGMEXEC) { + int ret = __do_munmap(mm, start + SEGMEXEC_TASK_SIZE, len); + if (ret) + return ret; + } + + return __do_munmap(mm, start, len); +} + +static int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +#else int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +#endif { unsigned long end; struct vm_area_struct *vma, *prev, *last; @@ -1816,6 +2003,8 @@ int do_munmap(struct mm_struct *mm, unsi /* Fix up all other VM information */ remove_vma_list(mm, vma); + track_exec_limit(mm, start, end, 0UL); + return 0; } @@ -1828,6 +2017,12 @@ asmlinkage long sys_munmap(unsigned long profile_munmap(addr); +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && + (len > SEGMEXEC_TASK_SIZE || addr > SEGMEXEC_TASK_SIZE-len)) + return -EINVAL; +#endif + down_write(&mm->mmap_sem); ret = do_munmap(mm, addr, len); up_write(&mm->mmap_sem); @@ -1849,11 +2044,35 @@ static inline void verify_mm_writelocked * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ +#ifdef CONFIG_PAX_SEGMEXEC +static unsigned long __do_brk(unsigned long addr, unsigned long len); + +unsigned long do_brk(unsigned long addr, unsigned long len) +{ + unsigned long ret; + + ret = __do_brk(addr, len); + if (ret == addr && (current->mm->pax_flags & (MF_PAX_SEGMEXEC | MF_PAX_MPROTECT)) == MF_PAX_SEGMEXEC) { + unsigned long ret_m; + + ret_m = __do_mmap_pgoff(NULL, addr + SEGMEXEC_TASK_SIZE, 0UL, PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_MIRROR, addr); + if (ret_m > TASK_SIZE) { + do_munmap(current->mm, addr, len); + ret = ret_m; + } + } + + return ret; +} + +static unsigned long __do_brk(unsigned long addr, unsigned long len) +#else unsigned long do_brk(unsigned long addr, unsigned long len) +#endif { struct mm_struct * mm = current->mm; struct vm_area_struct * vma, * prev; - unsigned long flags; + unsigned long flags, task_size = TASK_SIZE; struct rb_node ** rb_link, * rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; @@ -1862,11 +2081,28 @@ unsigned long do_brk(unsigned long addr, if (!len) return addr; - if ((addr + len) > TASK_SIZE || (addr + len) < addr) +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if ((addr + len) > task_size || (addr + len) < addr) return -EINVAL; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { + flags &= ~VM_EXEC; + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + flags &= ~VM_MAYEXEC; +#endif + + } +#endif + error = arch_mmap_check(addr, len, flags); if (error) return error; @@ -1880,6 +2116,7 @@ unsigned long do_brk(unsigned long addr, locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; + gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT)) @@ -1895,12 +2132,12 @@ unsigned long do_brk(unsigned long addr, /* * Clear old maps. this also does some error checking for us */ - munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { if (do_munmap(mm, addr, len)) return -ENOMEM; - goto munmap_back; + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + BUG_ON(vma && vma->vm_start < addr + len); } /* Check against address space limits *after* clearing old maps... */ @@ -1933,7 +2170,14 @@ unsigned long do_brk(unsigned long addr, vma->vm_end = addr + len; vma->vm_pgoff = pgoff; vma->vm_flags = flags; - vma->vm_page_prot = protection_map[flags & 0x0f]; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if (!(mm->pax_flags & MF_PAX_PAGEEXEC)) + vma->vm_page_prot = protection_map[(flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + else +#endif + + vma->vm_page_prot = protection_map[flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; vma_link(mm, vma, prev, rb_link, rb_parent); out: vx_vmpages_add(mm, len >> PAGE_SHIFT); @@ -1941,6 +2185,7 @@ out: vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } + track_exec_limit(mm, addr, addr + len, flags); return addr; } @@ -2079,7 +2324,7 @@ int may_expand_vm(struct mm_struct *mm, unsigned long lim; lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - + gr_learn_resource(current, RLIMIT_AS, (cur + npages) << PAGE_SHIFT, 1); if (cur + npages > lim) return 0; if (!vx_vmpages_avail(mm, npages)) diff -urNp linux-2.6.17.11/mm/mprotect.c linux-2.6.17.11/mm/mprotect.c --- linux-2.6.17.11/mm/mprotect.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/mprotect.c 2006-09-01 16:20:29.000000000 -0400 @@ -19,11 +19,18 @@ #include #include #include +#include + +#ifdef CONFIG_PAX_MPROTECT +#include +#include +#endif #include #include #include #include +#include static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot) @@ -98,6 +105,94 @@ static void change_protection(struct vm_ flush_tlb_range(vma, start, end); } +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT +/* called while holding the mmap semaphor for writing */ +static inline void establish_user_cs_limit(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + struct vm_area_struct *vma = find_vma(mm, start); + + for (; vma && vma->vm_start < end; vma = vma->vm_next) + change_protection(vma, vma->vm_start, vma->vm_end, vma->vm_page_prot); + +} + +void track_exec_limit(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long prot) +{ + unsigned long oldlimit, newlimit = 0UL; + + if (!(mm->pax_flags & MF_PAX_PAGEEXEC)) + return; + + spin_lock(&mm->page_table_lock); + oldlimit = mm->context.user_cs_limit; + if ((prot & VM_EXEC) && oldlimit < end) + /* USER_CS limit moved up */ + newlimit = end; + else if (!(prot & VM_EXEC) && start < oldlimit && oldlimit <= end) + /* USER_CS limit moved down */ + newlimit = start; + + if (newlimit) { + mm->context.user_cs_limit = newlimit; + +#ifdef CONFIG_SMP + wmb(); + cpus_clear(mm->context.cpu_user_cs_mask); + cpu_set(smp_processor_id(), mm->context.cpu_user_cs_mask); +#endif + + set_user_cs(mm, smp_processor_id()); + } + spin_unlock(&mm->page_table_lock); + if (newlimit == end) + establish_user_cs_limit(mm, oldlimit, end); +} +#endif + +#ifdef CONFIG_PAX_SEGMEXEC +static int __mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned int newflags); + +static int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned int newflags) +{ + if (vma->vm_flags & VM_MIRROR) { + struct vm_area_struct * vma_m, * prev_m; + unsigned long start_m, end_m; + int error; + + start_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma_prev(vma->vm_mm, start_m, &prev_m); + if (vma_m && vma_m->vm_start == start_m && (vma_m->vm_flags & VM_MIRROR)) { + start_m = start + vma->vm_mirror; + end_m = end + vma->vm_mirror; + + if (vma_m->vm_start >= SEGMEXEC_TASK_SIZE && !(newflags & VM_EXEC)) + error = __mprotect_fixup(vma_m, &prev_m, start_m, end_m, vma_m->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); + else + error = __mprotect_fixup(vma_m, &prev_m, start_m, end_m, newflags); + if (error) + return error; + } else { + printk("PAX: VMMIRROR: mprotect bug in %s, %08lx\n", current->comm, vma->vm_start); + return -ENOMEM; + } + } + + return __mprotect_fixup(vma, pprev, start, end, newflags); +} + +static int __mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned int newflags) +{ + struct mm_struct * mm = vma->vm_mm; + unsigned long oldflags = vma->vm_flags; + long nrpages = (end - start) >> PAGE_SHIFT; + unsigned long charged = 0; + pgprot_t newprot; + pgoff_t pgoff; + int error; +#else static int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) @@ -114,6 +209,7 @@ mprotect_fixup(struct vm_area_struct *vm *pprev = vma; return 0; } +#endif /* * If we make a private mapping writable we increase our commit; @@ -132,7 +228,13 @@ mprotect_fixup(struct vm_area_struct *vm } } - newprot = protection_map[newflags & 0xf]; +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) && (newflags & (VM_READ|VM_WRITE))) + newprot = protection_map[(newflags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + else +#endif + + newprot = protection_map[newflags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; /* * First try to merge with previous and/or next vma. @@ -179,6 +281,69 @@ fail: return error; } +#ifdef CONFIG_PAX_MPROTECT +/* PaX: non-PIC ELF libraries need relocations on their executable segments + * therefore we'll grant them VM_MAYWRITE once during their life. + * + * The checks favour ld-linux.so behaviour which operates on a per ELF segment + * basis because we want to allow the common case and not the special ones. + */ +static inline void pax_handle_maywrite(struct vm_area_struct * vma, unsigned long start) +{ + struct elfhdr elf_h; + struct elf_phdr elf_p, p_dyn; + elf_dyn dyn; + unsigned long i, j = 65536UL / sizeof(struct elf_phdr); + +#ifndef CONFIG_PAX_NOELFRELOCS + if ((vma->vm_start != start) || + !vma->vm_file || + !(vma->vm_flags & VM_MAYEXEC) || + (vma->vm_flags & VM_MAYNOTWRITE)) +#endif + + return; + + if (sizeof(elf_h) != kernel_read(vma->vm_file, 0UL, (char*)&elf_h, sizeof(elf_h)) || + memcmp(elf_h.e_ident, ELFMAG, SELFMAG) || + +#ifdef CONFIG_PAX_ETEXECRELOCS + (elf_h.e_type != ET_DYN && elf_h.e_type != ET_EXEC) || +#else + elf_h.e_type != ET_DYN || +#endif + + !elf_check_arch(&elf_h) || + elf_h.e_phentsize != sizeof(struct elf_phdr) || + elf_h.e_phnum > j) + return; + + for (i = 0UL; i < elf_h.e_phnum; i++) { + if (sizeof(elf_p) != kernel_read(vma->vm_file, elf_h.e_phoff + i*sizeof(elf_p), (char*)&elf_p, sizeof(elf_p))) + return; + if (elf_p.p_type == PT_DYNAMIC) { + p_dyn = elf_p; + j = i; + } + } + if (elf_h.e_phnum <= j) + return; + + i = 0UL; + do { + if (sizeof(dyn) != kernel_read(vma->vm_file, p_dyn.p_offset + i*sizeof(dyn), (char*)&dyn, sizeof(dyn))) + return; + if (dyn.d_tag == DT_TEXTREL || (dyn.d_tag == DT_FLAGS && (dyn.d_un.d_val & DF_TEXTREL))) { + vma->vm_flags |= VM_MAYWRITE | VM_MAYNOTWRITE; + gr_log_textrel(vma); + return; + } + i++; + } while (dyn.d_tag != DT_NULL); + return; +} +#endif + asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) { @@ -198,6 +363,17 @@ sys_mprotect(unsigned long start, size_t end = start + len; if (end <= start) return -ENOMEM; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { + if (end > SEGMEXEC_TASK_SIZE) + return -EINVAL; + } else +#endif + + if (end > TASK_SIZE) + return -EINVAL; + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) return -EINVAL; @@ -205,7 +381,7 @@ sys_mprotect(unsigned long start, size_t /* * Does the application expect PROT_READ to imply PROT_EXEC: */ - if (unlikely((prot & PROT_READ) && + if (unlikely((prot & (PROT_READ | PROT_WRITE)) && (current->personality & READ_IMPLIES_EXEC))) prot |= PROT_EXEC; @@ -238,6 +414,16 @@ sys_mprotect(unsigned long start, size_t if (start > vma->vm_start) prev = vma; +#ifdef CONFIG_PAX_MPROTECT + if ((vma->vm_mm->pax_flags & MF_PAX_MPROTECT) && (prot & PROT_WRITE)) + pax_handle_maywrite(vma, start); +#endif + + if (!gr_acl_handle_mprotect(vma->vm_file, prot)) { + error = -EACCES; + goto out; + } + for (nstart = start ; ; ) { unsigned long newflags; @@ -251,6 +437,12 @@ sys_mprotect(unsigned long start, size_t goto out; } +#ifdef CONFIG_PAX_MPROTECT + /* PaX: disallow write access after relocs are done, hopefully noone else needs it... */ + if ((vma->vm_mm->pax_flags & MF_PAX_MPROTECT) && !(prot & PROT_WRITE) && (vma->vm_flags & VM_MAYNOTWRITE)) + newflags &= ~VM_MAYWRITE; +#endif + error = security_file_mprotect(vma, reqprot, prot); if (error) goto out; @@ -274,6 +466,9 @@ sys_mprotect(unsigned long start, size_t goto out; } } + + track_exec_limit(current->mm, start, end, vm_flags); + out: up_write(¤t->mm->mmap_sem); return error; diff -urNp linux-2.6.17.11/mm/mremap.c linux-2.6.17.11/mm/mremap.c --- linux-2.6.17.11/mm/mremap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/mremap.c 2006-09-01 16:20:29.000000000 -0400 @@ -106,6 +106,12 @@ static void move_ptes(struct vm_area_str pte = ptep_clear_flush(vma, old_addr, old_pte); /* ZERO_PAGE can be dependant on virtual addr */ pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && !(vma->vm_flags & VM_EXEC)) + pte_exprotect(pte); +#endif + set_pte_at(mm, new_addr, new_pte, pte); } @@ -253,6 +259,7 @@ unsigned long do_mremap(unsigned long ad struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; + unsigned long task_size = TASK_SIZE; if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -271,6 +278,15 @@ unsigned long do_mremap(unsigned long ad if (!new_len) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (new_len > task_size || addr > task_size-new_len || + old_len > task_size || addr > task_size-old_len) + goto out; + /* new_addr is only valid if MREMAP_FIXED is specified */ if (flags & MREMAP_FIXED) { if (new_addr & ~PAGE_MASK) @@ -278,16 +294,13 @@ unsigned long do_mremap(unsigned long ad if (!(flags & MREMAP_MAYMOVE)) goto out; - if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) + if (new_addr > task_size - new_len) goto out; /* Check if the location we're moving into overlaps the * old location at all, and fail if it does. */ - if ((new_addr <= addr) && (new_addr+new_len) > addr) - goto out; - - if ((addr <= new_addr) && (addr+old_len) > new_addr) + if (addr + old_len > new_addr && new_addr + new_len > addr) goto out; ret = do_munmap(mm, new_addr, new_len); @@ -321,6 +334,14 @@ unsigned long do_mremap(unsigned long ad ret = -EINVAL; goto out; } + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) { + ret = -EINVAL; + goto out; + } +#endif + /* We can't remap across vm area boundaries */ if (old_len > vma->vm_end - addr) goto out; @@ -354,7 +375,7 @@ unsigned long do_mremap(unsigned long ad if (old_len == vma->vm_end - addr && !((flags & MREMAP_FIXED) && (addr != new_addr)) && (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { - unsigned long max_addr = TASK_SIZE; + unsigned long max_addr = task_size; if (vma->vm_next) max_addr = vma->vm_next->vm_start; /* can we just expand the current mapping? */ @@ -372,6 +393,7 @@ unsigned long do_mremap(unsigned long ad addr + new_len); } ret = addr; + track_exec_limit(vma->vm_mm, vma->vm_start, addr + new_len, vma->vm_flags); goto out; } } @@ -382,8 +404,8 @@ unsigned long do_mremap(unsigned long ad */ ret = -ENOMEM; if (flags & MREMAP_MAYMOVE) { + unsigned long map_flags = 0; if (!(flags & MREMAP_FIXED)) { - unsigned long map_flags = 0; if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; @@ -393,7 +415,12 @@ unsigned long do_mremap(unsigned long ad if (new_addr & ~PAGE_MASK) goto out; } + map_flags = vma->vm_flags; ret = move_vma(vma, addr, old_len, new_len, new_addr); + if (!(ret & ~PAGE_MASK)) { + track_exec_limit(current->mm, addr, addr + old_len, 0UL); + track_exec_limit(current->mm, new_addr, new_addr + new_len, map_flags); + } } out: if (ret & ~PAGE_MASK) diff -urNp linux-2.6.17.11/mm/page_alloc.c linux-2.6.17.11/mm/page_alloc.c --- linux-2.6.17.11/mm/page_alloc.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/page_alloc.c 2006-09-01 16:20:29.000000000 -0400 @@ -334,7 +334,7 @@ static inline int page_is_buddy(struct p static inline void __free_one_page(struct page *page, struct zone *zone, unsigned int order) { - unsigned long page_idx; + unsigned long page_idx, index; int order_size = 1 << order; if (unlikely(PageCompound(page))) @@ -345,6 +345,11 @@ static inline void __free_one_page(struc BUG_ON(page_idx & (order_size - 1)); BUG_ON(bad_range(zone, page)); +#ifdef CONFIG_PAX_MEMORY_SANITIZE + for (index = order_size; index; --index) + clear_highpage(page + index - 1); +#endif + zone->free_pages += order_size; while (order < MAX_ORDER-1) { unsigned long combined_idx; diff -urNp linux-2.6.17.11/mm/rmap.c linux-2.6.17.11/mm/rmap.c --- linux-2.6.17.11/mm/rmap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/rmap.c 2006-09-01 16:20:29.000000000 -0400 @@ -107,6 +107,19 @@ int anon_vma_prepare(struct vm_area_stru list_add(&vma->anon_vma_node, &anon_vma->head); allocated = NULL; } + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) { + struct vm_area_struct *vma_m; + + vma_m = find_vma(vma->vm_mm, vma->vm_start + vma->vm_mirror); + BUG_ON(!vma_m || vma_m->vm_start != vma->vm_start + vma->vm_mirror); + BUG_ON(vma_m->anon_vma || vma->vm_pgoff != vma_m->vm_pgoff); + vma_m->anon_vma = anon_vma; + __anon_vma_link(vma_m); + } +#endif + spin_unlock(&mm->page_table_lock); if (locked) diff -urNp linux-2.6.17.11/mm/shmem.c linux-2.6.17.11/mm/shmem.c --- linux-2.6.17.11/mm/shmem.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/shmem.c 2006-09-01 16:20:29.000000000 -0400 @@ -2244,7 +2244,7 @@ static struct file_system_type tmpfs_fs_ .get_sb = shmem_get_sb, .kill_sb = kill_litter_super, }; -static struct vfsmount *shm_mnt; +struct vfsmount *shm_mnt; static int __init init_tmpfs(void) { diff -urNp linux-2.6.17.11/mm/slab.c linux-2.6.17.11/mm/slab.c --- linux-2.6.17.11/mm/slab.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/slab.c 2006-09-01 16:20:29.000000000 -0400 @@ -1534,6 +1534,11 @@ static void store_stackinfo(struct kmem_ while (!kstack_end(sptr)) { svalue = *sptr++; + +#ifdef CONFIG_PAX_KERNEXEC + svalue += __KERNEL_TEXT_OFFSET; +#endif + if (kernel_text_address(svalue)) { *addr++ = svalue; size -= sizeof(unsigned long); diff -urNp linux-2.6.17.11/mm/tiny-shmem.c linux-2.6.17.11/mm/tiny-shmem.c --- linux-2.6.17.11/mm/tiny-shmem.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/tiny-shmem.c 2006-09-01 16:20:29.000000000 -0400 @@ -27,7 +27,7 @@ static struct file_system_type tmpfs_fs_ .kill_sb = kill_litter_super, }; -static struct vfsmount *shm_mnt; +struct vfsmount *shm_mnt; static int __init init_tmpfs(void) { diff -urNp linux-2.6.17.11/mm/vmalloc.c linux-2.6.17.11/mm/vmalloc.c --- linux-2.6.17.11/mm/vmalloc.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/mm/vmalloc.c 2006-09-01 16:20:29.000000000 -0400 @@ -193,6 +193,8 @@ struct vm_struct *__get_vm_area_node(uns write_lock(&vmlist_lock); for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { + if (addr > end - size) + goto out; if ((unsigned long)tmp->addr < addr) { if((unsigned long)tmp->addr + tmp->size >= addr) addr = ALIGN(tmp->size + @@ -204,8 +206,6 @@ struct vm_struct *__get_vm_area_node(uns if (size + addr <= (unsigned long)tmp->addr) goto found; addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align); - if (addr > end - size) - goto out; } found: diff -urNp linux-2.6.17.11/net/ipv4/inet_connection_sock.c linux-2.6.17.11/net/ipv4/inet_connection_sock.c --- linux-2.6.17.11/net/ipv4/inet_connection_sock.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/ipv4/inet_connection_sock.c 2006-09-01 16:20:29.000000000 -0400 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff -urNp linux-2.6.17.11/net/ipv4/inet_hashtables.c linux-2.6.17.11/net/ipv4/inet_hashtables.c --- linux-2.6.17.11/net/ipv4/inet_hashtables.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/ipv4/inet_hashtables.c 2006-09-01 16:20:29.000000000 -0400 @@ -19,11 +19,14 @@ #include #include #include +#include #include #include #include +extern void gr_update_task_in_ip_table(struct task_struct *task, const struct inet_sock *inet); + /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. @@ -309,6 +312,8 @@ ok: } spin_unlock(&head->lock); + gr_update_task_in_ip_table(current, inet_sk(sk)); + if (tw) { inet_twsk_deschedule(tw, death_row); inet_twsk_put(tw); diff -urNp linux-2.6.17.11/net/ipv4/netfilter/ipt_stealth.c linux-2.6.17.11/net/ipv4/netfilter/ipt_stealth.c --- linux-2.6.17.11/net/ipv4/netfilter/ipt_stealth.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.17.11/net/ipv4/netfilter/ipt_stealth.c 2006-09-01 16:20:29.000000000 -0400 @@ -0,0 +1,116 @@ +/* Kernel module to add stealth support. + * + * Copyright (C) 2002,2005 Brad Spengler + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +MODULE_LICENSE("GPL"); + +extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + struct iphdr *ip = skb->nh.iph; + struct tcphdr th; + struct udphdr uh; + struct sock *sk = NULL; + + if (!ip || offset) return 0; + + switch(ip->protocol) { + case IPPROTO_TCP: + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &th, sizeof(th)) < 0) { + *hotdrop = 1; + return 0; + } + if (!(th.syn && !th.ack)) return 0; + sk = inet_lookup_listener(&tcp_hashinfo, ip->daddr, ntohs(th.dest), ((struct rtable*)skb->dst)->rt_iif); + break; + case IPPROTO_UDP: + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &uh, sizeof(uh)) < 0) { + *hotdrop = 1; + return 0; + } + sk = udp_v4_lookup(ip->saddr, uh.source, ip->daddr, uh.dest, skb->dev->ifindex); + break; + default: + return 0; + } + + if(!sk) // port is being listened on, match this + return 1; + else { + sock_put(sk); + return 0; + } +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const void *nip, + const struct xt_match *match, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_ip *ip = (const struct ipt_ip *)nip; + if (matchsize != IPT_ALIGN(0)) + return 0; + + if(((ip->proto == IPPROTO_TCP && !(ip->invflags & IPT_INV_PROTO)) || + ((ip->proto == IPPROTO_UDP) && !(ip->invflags & IPT_INV_PROTO))) + && (hook_mask & (1 << NF_IP_LOCAL_IN))) + return 1; + + printk("stealth: Only works on TCP and UDP for the INPUT chain.\n"); + + return 0; +} + + +static struct ipt_match stealth_match = { + .name = "stealth", + .match = match, + .checkentry = checkentry, + .destroy = NULL, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&stealth_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&stealth_match); +} + +module_init(init); +module_exit(fini); diff -urNp linux-2.6.17.11/net/ipv4/netfilter/Kconfig linux-2.6.17.11/net/ipv4/netfilter/Kconfig --- linux-2.6.17.11/net/ipv4/netfilter/Kconfig 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/ipv4/netfilter/Kconfig 2006-09-01 16:20:29.000000000 -0400 @@ -314,6 +314,21 @@ config IP_NF_MATCH_HASHLIMIT destination IP' or `500pps from any given source IP' with a single IPtables rule. +config IP_NF_MATCH_STEALTH + tristate "stealth match support" + depends on IP_NF_IPTABLES + help + Enabling this option will drop all syn packets coming to unserved tcp + ports as well as all packets coming to unserved udp ports. If you + are using your system to route any type of packets (ie. via NAT) + you should put this module at the end of your ruleset, since it will + drop packets that aren't going to ports that are listening on your + machine itself, it doesn't take into account that the packet might be + destined for someone on your internal network if you're using NAT for + instance. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" @@ -829,4 +844,3 @@ config IP_NF_ARP_MANGLE Documentation/modules.txt. If unsure, say `N'. endmenu - diff -urNp linux-2.6.17.11/net/ipv4/netfilter/Makefile linux-2.6.17.11/net/ipv4/netfilter/Makefile --- linux-2.6.17.11/net/ipv4/netfilter/Makefile 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/ipv4/netfilter/Makefile 2006-09-01 16:20:29.000000000 -0400 @@ -80,6 +80,7 @@ obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_ds obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o +obj-$(CONFIG_IP_NF_MATCH_STEALTH) += ipt_stealth.o # targets obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o diff -urNp linux-2.6.17.11/net/ipv4/tcp_ipv4.c linux-2.6.17.11/net/ipv4/tcp_ipv4.c --- linux-2.6.17.11/net/ipv4/tcp_ipv4.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/ipv4/tcp_ipv4.c 2006-09-01 16:20:29.000000000 -0400 @@ -62,6 +62,7 @@ #include #include #include +#include #include #include diff -urNp linux-2.6.17.11/net/ipv4/udp.c linux-2.6.17.11/net/ipv4/udp.c --- linux-2.6.17.11/net/ipv4/udp.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/ipv4/udp.c 2006-09-01 16:20:29.000000000 -0400 @@ -102,6 +102,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,12 @@ #include #include +extern int gr_search_udp_recvmsg(const struct sock *sk, + const struct sk_buff *skb); +extern int gr_search_udp_sendmsg(const struct sock *sk, + const struct sockaddr_in *addr); + + /* * Snmp MIB for the UDP layer */ @@ -266,8 +273,7 @@ static struct sock *udp_v4_lookup_longwa return result; } -static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) +struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) { struct sock *sk; @@ -542,9 +548,16 @@ int udp_sendmsg(struct kiocb *iocb, stru dport = usin->sin_port; if (dport == 0) return -EINVAL; + + if (!gr_search_udp_sendmsg(sk, usin)) + return -EPERM; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; + + if (!gr_search_udp_sendmsg(sk, NULL)) + return -EPERM; + daddr = inet->daddr; dport = inet->dport; /* Open fast path for connected socket. @@ -798,6 +811,11 @@ try_again: if (!skb) goto out; + if (!gr_search_udp_recvmsg(sk, skb)) { + err = -EPERM; + goto out_free; + } + copied = skb->len - sizeof(struct udphdr); if (copied > len) { copied = len; diff -urNp linux-2.6.17.11/net/socket.c linux-2.6.17.11/net/socket.c --- linux-2.6.17.11/net/socket.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/socket.c 2006-09-01 16:20:29.000000000 -0400 @@ -86,6 +86,7 @@ #include #include #include +#include #include #include @@ -96,6 +97,21 @@ #include #include +extern void gr_attach_curr_ip(const struct sock *sk); +extern int gr_handle_sock_all(const int family, const int type, + const int protocol); +extern int gr_handle_sock_server(const struct sockaddr *sck); +extern int gr_handle_sock_server_other(const struct socket *sck); +extern int gr_handle_sock_client(const struct sockaddr *sck); +extern int gr_search_connect(const struct socket * sock, + const struct sockaddr_in * addr); +extern int gr_search_bind(const struct socket * sock, + const struct sockaddr_in * addr); +extern int gr_search_listen(const struct socket * sock); +extern int gr_search_accept(const struct socket * sock); +extern int gr_search_socket(const int domain, const int type, + const int protocol); + static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, size_t size, loff_t pos); @@ -342,7 +358,7 @@ static struct super_block *sockfs_get_sb return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); } -static struct vfsmount *sock_mnt __read_mostly; +struct vfsmount *sock_mnt __read_mostly; static struct file_system_type sock_fs_type = { .name = "sockfs", @@ -1269,6 +1285,16 @@ asmlinkage long sys_socket(int family, i int retval; struct socket *sock; + if(!gr_search_socket(family, type, protocol)) { + retval = -EACCES; + goto out; + } + + if (gr_handle_sock_all(family, type, protocol)) { + retval = -EACCES; + goto out; + } + retval = sock_create(family, type, protocol, &sock); if (retval < 0) goto out; @@ -1367,16 +1393,25 @@ asmlinkage long sys_bind(int fd, struct { struct socket *sock; char address[MAX_SOCK_ADDR]; + struct sockaddr *sck; int err, fput_needed; if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) { if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { + sck = (struct sockaddr *)address; + if (!gr_search_bind(sock, (struct sockaddr_in *)sck) || + gr_handle_sock_server(sck)) { + err = -EACCES; + goto error; + } + err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); if (!err) err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen); } +error: fput_light(sock->file, fput_needed); } return err; @@ -1400,10 +1435,17 @@ asmlinkage long sys_listen(int fd, int b if ((unsigned) backlog > sysctl_somaxconn) backlog = sysctl_somaxconn; + if (gr_handle_sock_server_other(sock) || + !gr_search_listen(sock)) { + err = -EPERM; + goto error; + } + err = security_socket_listen(sock, backlog); if (!err) err = sock->ops->listen(sock, backlog); +error: fput_light(sock->file, fput_needed); } return err; @@ -1440,6 +1482,13 @@ asmlinkage long sys_accept(int fd, struc newsock->type = sock->type; newsock->ops = sock->ops; + if (gr_handle_sock_server_other(sock) || + !gr_search_accept(sock)) { + err = -EPERM; + sock_release(newsock); + goto out_put; + } + /* * We don't need try_module_get here, as the listening socket (sock) * has the protocol module (sock->ops->owner) held. @@ -1481,6 +1530,7 @@ asmlinkage long sys_accept(int fd, struc err = newfd; security_socket_post_accept(sock, newsock); + gr_attach_curr_ip(newsock->sk); out_put: fput_light(sock->file, fput_needed); @@ -1509,6 +1559,7 @@ asmlinkage long sys_connect(int fd, stru { struct socket *sock; char address[MAX_SOCK_ADDR]; + struct sockaddr *sck; int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1518,6 +1569,13 @@ asmlinkage long sys_connect(int fd, stru if (err < 0) goto out_put; + sck = (struct sockaddr *)address; + if (!gr_search_connect(sock, (struct sockaddr_in *)sck) || + gr_handle_sock_client(sck)) { + err = -EACCES; + goto out_put; + } + err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); if (err) goto out_put; @@ -1772,6 +1830,7 @@ asmlinkage long sys_shutdown(int fd, int err = sock->ops->shutdown(sock, how); fput_light(sock->file, fput_needed); } + return err; } diff -urNp linux-2.6.17.11/net/unix/af_unix.c linux-2.6.17.11/net/unix/af_unix.c --- linux-2.6.17.11/net/unix/af_unix.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/net/unix/af_unix.c 2006-09-01 16:20:29.000000000 -0400 @@ -120,6 +120,7 @@ #include #include #include +#include int sysctl_unix_max_dgram_qlen = 10; @@ -685,6 +686,11 @@ static struct sock *unix_find_other(stru if (err) goto put_fail; + if (!gr_acl_handle_unix(nd.dentry, nd.mnt)) { + err = -EACCES; + goto put_fail; + } + err = -ECONNREFUSED; if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) goto put_fail; @@ -708,6 +714,13 @@ static struct sock *unix_find_other(stru if (u) { struct dentry *dentry; dentry = unix_sk(u)->dentry; + + if (!gr_handle_chroot_unix(u->sk_peercred.pid)) { + err = -EPERM; + sock_put(u); + goto fail; + } + if (dentry) touch_atime(unix_sk(u)->mnt, dentry); } else @@ -786,9 +799,18 @@ static int unix_bind(struct socket *sock */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current->fs->umask); + + if (!gr_acl_handle_mknod(dentry, nd.dentry, nd.mnt, mode)) { + err = -EACCES; + goto out_mknod_dput; + } + err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL); if (err) goto out_mknod_dput; + + gr_handle_create(dentry, nd.mnt); + mutex_unlock(&nd.dentry->d_inode->i_mutex); dput(nd.dentry); nd.dentry = dentry; @@ -806,6 +828,10 @@ static int unix_bind(struct socket *sock goto out_unlock; } +#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX + sk->sk_peercred.pid = current->pid; +#endif + list = &unix_socket_table[addr->hash]; } else { list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; diff -urNp linux-2.6.17.11/security/commoncap.c linux-2.6.17.11/security/commoncap.c --- linux-2.6.17.11/security/commoncap.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/security/commoncap.c 2006-09-01 16:20:29.000000000 -0400 @@ -24,6 +24,7 @@ #include #include #include +#include int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { @@ -45,7 +46,15 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable (struct task_struct *tsk, int cap) { /* Derived from include/linux/sched.h:capable. */ - if (vx_cap_raised(tsk->vx_info, tsk->cap_effective, cap)) + if (vx_cap_raised(tsk->vx_info, tsk->cap_effective, cap) && gr_task_is_capable(tsk, cap)) + return 0; + return -EPERM; +} + +int cap_capable_nolog (struct task_struct *tsk, int cap) +{ + /* Derived from include/linux/sched.h:capable. */ + if (cap_raised (tsk->cap_effective, cap)) return 0; return -EPERM; } @@ -165,8 +174,11 @@ void cap_bprm_apply_creds (struct linux_ } } - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + if (!gr_check_user_change(-1, bprm->e_uid, bprm->e_uid)) + current->suid = current->euid = current->fsuid = bprm->e_uid; + + if (!gr_check_group_change(-1, bprm->e_gid, bprm->e_gid)) + current->sgid = current->egid = current->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual @@ -177,6 +189,8 @@ void cap_bprm_apply_creds (struct linux_ cap_intersect (new_permitted, bprm->cap_effective); } + gr_handle_chroot_caps(current); + /* AUD: Audit candidate if current->cap_effective is set */ current->keep_capabilities = 0; @@ -323,12 +337,13 @@ int cap_vm_enough_memory(long pages) { int cap_sys_admin = 0; - if (cap_capable(current, CAP_SYS_ADMIN) == 0) + if (cap_capable_nolog(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; return __vm_enough_memory(pages, cap_sys_admin); } EXPORT_SYMBOL(cap_capable); +EXPORT_SYMBOL(cap_capable_nolog); EXPORT_SYMBOL(cap_settime); EXPORT_SYMBOL(cap_ptrace); EXPORT_SYMBOL(cap_capget); diff -urNp linux-2.6.17.11/security/dummy.c linux-2.6.17.11/security/dummy.c --- linux-2.6.17.11/security/dummy.c 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/security/dummy.c 2006-09-01 16:20:29.000000000 -0400 @@ -29,6 +29,7 @@ #include #include #include +#include static int dummy_ptrace (struct task_struct *parent, struct task_struct *child) { @@ -139,8 +140,11 @@ static void dummy_bprm_apply_creds (stru } } - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + if (!gr_check_user_change(-1, bprm->e_uid, bprm->e_uid)) + current->suid = current->euid = current->fsuid = bprm->e_uid; + + if (!gr_check_group_change(-1, bprm->e_gid, bprm->e_gid)) + current->sgid = current->egid = current->fsgid = bprm->e_gid; dummy_capget(current, ¤t->cap_effective, ¤t->cap_inheritable, ¤t->cap_permitted); } diff -urNp linux-2.6.17.11/security/Kconfig linux-2.6.17.11/security/Kconfig --- linux-2.6.17.11/security/Kconfig 2006-08-07 00:18:54.000000000 -0400 +++ linux-2.6.17.11/security/Kconfig 2006-09-01 16:20:29.000000000 -0400 @@ -4,6 +4,453 @@ menu "Security options" +menu "PaX" + +config PAX + bool "Enable various PaX features" + depends on GRKERNSEC && (ALPHA || ARM || IA64 || MIPS32 || MIPS64 || PARISC || PPC32 || PPC64 || SPARC32 || SPARC64 || X86 || X86_64) + help + This allows you to enable various PaX features. PaX adds + intrusion prevention mechanisms to the kernel that reduce + the risks posed by exploitable memory corruption bugs. + +menu "PaX Control" + depends on PAX + +config PAX_SOFTMODE + bool 'Support soft mode' + help + Enabling this option will allow you to run PaX in soft mode, that + is, PaX features will not be enforced by default, only on executables + marked explicitly. You must also enable PT_PAX_FLAGS support as it + is the only way to mark executables for soft mode use. + + Soft mode can be activated by using the "pax_softmode=1" kernel command + line option on boot. Furthermore you can control various PaX features + at runtime via the entries in /proc/sys/kernel/pax. + +config PAX_EI_PAX + bool 'Use legacy ELF header marking' + help + Enabling this option will allow you to control PaX features on + a per executable basis via the 'chpax' utility available at + http://pax.grsecurity.net/. The control flags will be read from + an otherwise reserved part of the ELF header. This marking has + numerous drawbacks (no support for soft-mode, toolchain does not + know about the non-standard use of the ELF header) therefore it + has been deprecated in favour of PT_PAX_FLAGS support. + + If you have applications not marked by the PT_PAX_FLAGS ELF + program header then you MUST enable this option otherwise they + will not get any protection. + + Note that if you enable PT_PAX_FLAGS marking support as well, + the PT_PAX_FLAG marks will override the legacy EI_PAX marks. + +config PAX_PT_PAX_FLAGS + bool 'Use ELF program header marking' + help + Enabling this option will allow you to control PaX features on + a per executable basis via the 'paxctl' utility available at + http://pax.grsecurity.net/. The control flags will be read from + a PaX specific ELF program header (PT_PAX_FLAGS). This marking + has the benefits of supporting both soft mode and being fully + integrated into the toolchain (the binutils patch is available + from http://pax.grsecurity.net). + + If you have applications not marked by the PT_PAX_FLAGS ELF + program header then you MUST enable the EI_PAX marking support + otherwise they will not get any protection. + + Note that if you enable the legacy EI_PAX marking support as well, + the EI_PAX marks will be overridden by the PT_PAX_FLAGS marks. + +choice + prompt 'MAC system integration' + default PAX_HAVE_ACL_FLAGS + help + Mandatory Access Control systems have the option of controlling + PaX flags on a per executable basis, choose the method supported + by your particular system. + + - "none": if your MAC system does not interact with PaX, + - "direct": if your MAC system defines pax_set_flags() itself, + - "hook": if your MAC system uses the pax_set_flags_func callback. + + NOTE: this option is for developers/integrators only. + +config PAX_NO_ACL_FLAGS + bool 'none' + +config PAX_HAVE_ACL_FLAGS + bool 'direct' + +config PAX_HOOK_ACL_FLAGS + bool 'hook' +endchoice + +endmenu + +menu "Non-executable pages" + depends on PAX + +config PAX_NOEXEC + bool "Enforce non-executable pages" + depends on (PAX_EI_PAX || PAX_PT_PAX_FLAGS || PAX_HAVE_ACL_FLAGS || PAX_HOOK_ACL_FLAGS) && (ALPHA || IA64 || MIPS32 || MIPS64 || PARISC || PPC32 || PPC64 || SPARC32 || SPARC64 || X86 || X86_64) + help + By design some architectures do not allow for protecting memory + pages against execution or even if they do, Linux does not make + use of this feature. In practice this means that if a page is + readable (such as the stack or heap) it is also executable. + + There is a well known exploit technique that makes use of this + fact and a common programming mistake where an attacker can + introduce code of his choice somewhere in the attacked program's + memory (typically the stack or the heap) and then execute it. + + If the attacked program was running with different (typically + higher) privileges than that of the attacker, then he can elevate + his own privilege level (e.g. get a root shell, write to files for + which he does not have write access to, etc). + + Enabling this option will let you choose from various features + that prevent the injection and execution of 'foreign' code in + a program. + + This will also break programs that rely on the old behaviour and + expect that dynamically allocated memory via the malloc() family + of functions is executable (which it is not). Notable examples + are the XFree86 4.x server, the java runtime and wine. + +config PAX_PAGEEXEC + bool "Paging based non-executable pages" + depends on PAX_NOEXEC && (!X86_32 || M586 || M586TSC || M586MMX || M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MPENTIUM4 || MK7 || MK8 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MVIAC3_2) + select PAX_NOVSYSCALL if X86_32 + help + This implementation is based on the paging feature of the CPU. + On i386 and ppc there is a variable but usually low performance + impact on applications. On alpha, ia64, parisc, sparc, sparc64 + and x86_64 there is no performance impact. + +config PAX_SEGMEXEC + bool "Segmentation based non-executable pages" + depends on PAX_NOEXEC && X86_32 + select PAX_NOVSYSCALL + help + This implementation is based on the segmentation feature of the + CPU and has little performance impact, however applications will + be limited to a 1.5 GB address space instead of the normal 3 GB. + +choice + prompt "Default non-executable page method" + depends on PAX_PAGEEXEC && PAX_SEGMEXEC + default PAX_DEFAULT_SEGMEXEC + help + Select the default non-executable page method applied to applications + that do not select one themselves. + +config PAX_DEFAULT_PAGEEXEC + bool "PAGEEXEC" + +config PAX_DEFAULT_SEGMEXEC + bool "SEGMEXEC" +endchoice + +config PAX_EMUTRAMP + bool "Emulate trampolines" if (PAX_PAGEEXEC || PAX_SEGMEXEC) && (PARISC || PPC32 || X86_32) + default y if PARISC || PPC32 + help + There are some programs and libraries that for one reason or + another attempt to execute special small code snippets from + non-executable memory pages. Most notable examples are the + signal handler return code generated by the kernel itself and + the GCC trampolines. + + If you enabled CONFIG_PAX_PAGEEXEC or CONFIG_PAX_SEGMEXEC then + such programs will no longer work under your kernel. + + As a remedy you can say Y here and use the 'chpax' or 'paxctl' + utilities to enable trampoline emulation for the affected programs + yet still have the protection provided by the non-executable pages. + + On parisc and ppc you MUST enable this option and EMUSIGRT as + well, otherwise your system will not even boot. + + Alternatively you can say N here and use the 'chpax' or 'paxctl' + utilities to disable CONFIG_PAX_PAGEEXEC and CONFIG_PAX_SEGMEXEC + for the affected files. + + NOTE: enabling this feature *may* open up a loophole in the + protection provided by non-executable pages that an attacker + could abuse. Therefore the best solution is to not have any + files on your system that would require this option. This can + be achieved by not using libc5 (which relies on the kernel + signal handler return code) and not using or rewriting programs + that make use of the nested function implementation of GCC. + Skilled users can just fix GCC itself so that it implements + nested function calls in a way that does not interfere with PaX. + +config PAX_EMUSIGRT + bool "Automatically emulate sigreturn trampolines" + depends on PAX_EMUTRAMP && (PARISC || PPC32) + default y + help + Enabling this option will have the kernel automatically detect + and emulate signal return trampolines executing on the stack + that would otherwise lead to task termination. + + This solution is intended as a temporary one for users with + legacy versions of libc (libc5, glibc 2.0, uClibc before 0.9.17, + Modula-3 runtime, etc) or executables linked to such, basically + everything that does not specify its own SA_RESTORER function in + normal executable memory like glibc 2.1+ does. + + On parisc and ppc you MUST enable this option, otherwise your + system will not even boot. + + NOTE: this feature cannot be disabled on a per executable basis + and since it *does* open up a loophole in the protection provided + by non-executable pages, the best solution is to not have any + files on your system that would require this option. + +config PAX_MPROTECT + bool "Restrict mprotect()" + depends on (PAX_PAGEEXEC || PAX_SEGMEXEC) && !PPC64 + help + Enabling this option will prevent programs from + - changing the executable status of memory pages that were + not originally created as executable, + - making read-only executable pages writable again, + - creating executable pages from anonymous memory. + + You should say Y here to complete the protection provided by + the enforcement of non-executable pages. + + NOTE: you can use the 'chpax' or 'paxctl' utilities to control + this feature on a per file basis. + +config PAX_NOELFRELOCS + bool "Disallow ELF text relocations" + depends on PAX_MPROTECT && !PAX_ETEXECRELOCS && (IA64 || X86 || X86_64) + help + Non-executable pages and mprotect() restrictions are effective + in preventing the introduction of new executable code into an + attacked task's address space. There remain only two venues + for this kind of attack: if the attacker can execute already + existing code in the attacked task then he can either have it + create and mmap() a file containing his code or have it mmap() + an already existing ELF library that does not have position + independent code in it and use mprotect() on it to make it + writable and copy his code there. While protecting against + the former approach is beyond PaX, the latter can be prevented + by having only PIC ELF libraries on one's system (which do not + need to relocate their code). If you are sure this is your case, + then enable this option otherwise be careful as you may not even + be able to boot or log on your system (for example, some PAM + modules are erroneously compiled as non-PIC by default). + + NOTE: if you are using dynamic ELF executables (as suggested + when using ASLR) then you must have made sure that you linked + your files using the PIC version of crt1 (the et_dyn.tar.gz package + referenced there has already been updated to support this). + +config PAX_ETEXECRELOCS + bool "Allow ELF ET_EXEC text relocations" + depends on PAX_MPROTECT && (ALPHA || IA64 || PARISC) + default y + help + On some architectures there are incorrectly created applications + that require text relocations and would not work without enabling + this option. If you are an alpha, ia64 or parisc user, you should + enable this option and disable it once you have made sure that + none of your applications need it. + +config PAX_EMUPLT + bool "Automatically emulate ELF PLT" + depends on PAX_MPROTECT && (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) + default y + help + Enabling this option will have the kernel automatically detect + and emulate the Procedure Linkage Table entries in ELF files. + On some architectures such entries are in writable memory, and + become non-executable leading to task termination. Therefore + it is mandatory that you enable this option on alpha, parisc, ppc, + sparc and sparc64, otherwise your system would not even boot. + + NOTE: this feature *does* open up a loophole in the protection + provided by the non-executable pages, therefore the proper + solution is to modify the toolchain to produce a PLT that does + not need to be writable. + +config PAX_DLRESOLVE + bool + depends on PAX_EMUPLT && (SPARC32 || SPARC64) + default y + +config PAX_SYSCALL + bool + depends on PAX_PAGEEXEC && PPC32 + default y + +config PAX_KERNEXEC + bool "Enforce non-executable kernel pages" + depends on PAX_NOEXEC && X86_32 && !HOTPLUG_PCI_COMPAQ_NVRAM && !PCI_BIOS && !EFI && !DEBUG_RODATA + select PAX_NOVSYSCALL + help + This is the kernel land equivalent of PAGEEXEC and MPROTECT, + that is, enabling this option will make it harder to inject + and execute 'foreign' code in kernel memory itself. + +endmenu + +menu "Address Space Layout Randomization" + depends on PAX + +config PAX_ASLR + bool "Address Space Layout Randomization" + depends on PAX_EI_PAX || PAX_PT_PAX_FLAGS || PAX_HAVE_ACL_FLAGS || PAX_HOOK_ACL_FLAGS + help + Many if not most exploit techniques rely on the knowledge of + certain addresses in the attacked program. The following options + will allow the kernel to apply a certain amount of randomization + to specific parts of the program thereby forcing an attacker to + guess them in most cases. Any failed guess will most likely crash + the attacked program which allows the kernel to detect such attempts + and react on them. PaX itself provides no reaction mechanisms, + instead it is strongly encouraged that you make use of Nergal's + segvguard (ftp://ftp.pl.openwall.com/misc/segvguard/) or grsecurity's + (http://www.grsecurity.net/) built-in crash detection features or + develop one yourself. + + By saying Y here you can choose to randomize the following areas: + - top of the task's kernel stack + - top of the task's userland stack + - base address for mmap() requests that do not specify one + (this includes all libraries) + - base address of the main executable + + It is strongly recommended to say Y here as address space layout + randomization has negligible impact on performance yet it provides + a very effective protection. + + NOTE: you can use the 'chpax' or 'paxctl' utilities to control + this feature on a per file basis. + +config PAX_RANDKSTACK + bool "Randomize kernel stack base" + depends on PAX_ASLR && X86_TSC && X86_32 + help + By saying Y here the kernel will randomize every task's kernel + stack on every system call. This will not only force an attacker + to guess it but also prevent him from making use of possible + leaked information about it. + + Since the kernel stack is a rather scarce resource, randomization + may cause unexpected stack overflows, therefore you should very + carefully test your system. Note that once enabled in the kernel + configuration, this feature cannot be disabled on a per file basis. + +config PAX_RANDUSTACK + bool "Randomize user stack base" + depends on PAX_ASLR + help + By saying Y here the kernel will randomize every task's userland + stack. The randomization is done in two steps where the second + one may apply a big amount of shift to the top of the stack and + cause problems for programs that want to use lots of memory (more + than 2.5 GB if SEGMEXEC is not active, or 1.25 GB when it is). + For this reason the second step can be controlled by 'chpax' or + 'paxctl' on a per file basis. + +config PAX_RANDMMAP + bool "Randomize mmap() base" + depends on PAX_ASLR + help + By saying Y here the kernel will use a randomized base address for + mmap() requests that do not specify one themselves. As a result + all dynamically loaded libraries will appear at random addresses + and therefore be harder to exploit by a technique where an attacker + attempts to execute library code for his purposes (e.g. spawn a + shell from an exploited program that is running at an elevated + privilege level). + + Furthermore, if a program is relinked as a dynamic ELF file, its + base address will be randomized as well, completing the full + randomization of the address space layout. Attacking such programs + becomes a guess game. You can find an example of doing this at + http://pax.grsecurity.net/et_dyn.tar.gz and practical samples at + http://www.grsecurity.net/grsec-gcc-specs.tar.gz . + + NOTE: you can use the 'chpax' or 'paxctl' utilities to control this + feature on a per file basis. + +config PAX_NOVSYSCALL + bool "Disable the vsyscall page" + depends on PAX_ASLR && X86_32 + help + The Linux 2.6 kernel introduced a new feature that speeds up or + simplifies certain operations, such as system calls or returns + from signal handlers. + + Unfortunately the implementation also gives a powerful instrument + into the hands of exploit writers: the so-called vsyscall page exists + in every task at the same fixed address and it contains machine code + that is very useful in performing the return-to-libc style attack. + + Since this exploit technique cannot in general be protected against + via kernel solutions, this option will allow you to disable the use + of the vsyscall page and revert back to the old behaviour. + +endmenu + +menu "Miscellaneous hardening features" + +config PAX_MEMORY_SANITIZE + bool "Sanitize all freed memory" + help + By saying Y here the kernel will erase memory pages as soon as they + are freed. This in turn reduces the lifetime of data stored in the + pages, making it less likely that sensitive information such as + passwords, cryptographic secrets, etc stay in memory for too long. + + This is especially useful for programs whose runtime is short, long + lived processes and the kernel itself benefit from this as long as + they operate on whole memory pages and ensure timely freeing of pages + that may hold sensitive information. + + The tradeoff is performance impact, on a single CPU system kernel + compilation sees a 3% slowdown, other systems and workloads may vary + and you are advised to test this feature on your expected workload + before deploying it. + + Note that this feature does not protect data stored in live pages, + e.g., process memory swapped to disk may stay there for a long time. + +config PAX_MEMORY_UDEREF + bool "Prevent invalid userland pointer dereference" + depends on X86_32 + select PAX_NOVSYSCALL + help + By saying Y here the kernel will be prevented from dereferencing + userland pointers in contexts where the kernel expects only kernel + pointers. This is both a useful runtime debugging feature and a + security measure that prevents exploiting a class of kernel bugs. + + The tradeoff is that some virtualization solutions may experience + a huge slowdown and therefore you should not enable this feature + for kernels meant to run in such environments. Whether a given VM + solution is affected or not is best determined by simply trying it + out, the performance impact will be obvious right on boot as this + mechanism engages from very early on. A good rule of thumb is that + VMs running on CPUs without hardware virtualization support (i.e., + the majority of IA-32 CPUs) will likely experience the slowdown. + +endmenu + +endmenu + +source grsecurity/Kconfig + config KEYS bool "Enable access key retention support" help