From 2380c4865cc3612d22b7c8f21890de152daf79a1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20R=C4=99korajski?= Date: Tue, 31 Mar 2009 12:04:41 +0000 Subject: [PATCH] - merged from LINUX_2_6 Changed files: esfq-kernel.patch -> 1.2 hostap-kernel-2.6.18.patch -> 1.2 kernel-PF_RING.patch -> 1.2 kernel-alpha.config -> 1.95 kernel-apparmor-after-grsec_full.patch -> 1.2 kernel-apparmor.patch -> 1.2 kernel-atm-vbr.patch -> 1.3 kernel-atmdd.patch -> 1.3 kernel-aufs-support.patch -> 1.2 kernel-aufs.patch -> 1.2 kernel-autoconf.h -> 1.4 kernel-bzip2-lzma.patch -> 1.2 kernel-config.h -> 1.2 kernel-esfq.patch -> 1.2 kernel-fbcon-margins.patch -> 1.4 kernel-fbcondecor.patch -> 1.2 kernel-forcedeth-WON.patch -> 1.3 kernel-grsec-caps.patch -> 1.2 kernel-grsec-common.patch -> 1.2 kernel-grsec-minimal.patch -> 1.3 kernel-grsec-no-stupid-SbO.patch -> 1.2 kernel-grsec.config -> 1.30 kernel-grsec_fixes.patch -> 1.2 kernel-grsec_full.patch -> 1.2 kernel-grsec_minimal.config -> 1.2 kernel-hostap.patch -> 1.2 kernel-ia64.config -> 1.18 kernel-imq.config -> 1.2 kernel-imq.patch -> 1.2 kernel-inittmpfs.config -> 1.2 kernel-inittmpfs.patch -> 1.2 kernel-ipt_ACCOUNT.patch -> 1.2 kernel-ipt_account.patch -> 1.2 kernel-ipvs-nfct.patch -> 1.2 kernel-layer7.patch -> 1.2 kernel-module-build.pl -> 1.2 kernel-mpt-fusion.patch -> 1.2 kernel-multiarch.config -> 1.2 kernel-netfilter.config -> 1.45 kernel-nf_rtsp.patch -> 1.3 kernel-no-pax.config -> 1.2 kernel-owner-xid.patch -> 1.2 kernel-pax.config -> 1.2 kernel-pax.patch -> 1.2 kernel-pax_selinux_hooks.patch -> 1.2 kernel-pom-ng-IPV4OPTSSTRIP.patch -> 1.2 kernel-pom-ng-ROUTE.patch -> 1.2 kernel-pom-ng-ipv4options.patch -> 1.2 kernel-pom-ng-mms-conntrack-nat.patch -> 1.2 kernel-pom-ng-rpc.patch -> 1.2 kernel-pom-ng-rsh.patch -> 1.2 kernel-powerpc.config -> 1.4 kernel-ppc-ICE-hacks.patch -> 1.2 kernel-ppc-crtsavres.patch -> 1.3 kernel-ppclibs.Makefile -> 1.2 kernel-pwc-uncompress.patch -> 1.2 kernel-regressions.patch -> 1.3 kernel-reiser4.patch -> 1.3 kernel-rndis_host-wm5.patch -> 1.2 kernel-routes.patch -> 1.2 kernel-small_fixes.patch -> 1.2 kernel-sparc.config -> 1.62 kernel-sparc64.config -> 1.93 kernel-suspend2-headers.patch -> 1.2 kernel-tomoyo-with-apparmor.patch -> 1.2 kernel-toshiba_acpi.patch -> 1.3 kernel-tuxonice-headers.patch -> 1.2 kernel-tuxonice.config -> 1.2 kernel-tuxonice.patch -> 1.2 kernel-unionfs-apparmor.patch -> 1.2 kernel-unionfs.patch -> 1.2 kernel-vserver-2.3.patch -> 1.3 kernel-vserver-fixes.patch -> 1.2 kernel-vserver.config -> 1.3 kernel-warnings.patch -> 1.2 kernel-wrr.patch -> 1.2 kernel-x86.config -> 1.4 kernel-zph.patch -> 1.2 linux-2.6-atm-vbr.patch -> 1.3 linux-2.6-atmdd.patch -> 1.3 linux-2.6-forcedeth-WON.patch -> 1.3 linux-2.6-grsec-caps.patch -> 1.2 linux-2.6-grsec-common.patch -> 1.2 linux-2.6-grsec-minimal.patch -> 1.3 linux-2.6-grsec-no-stupid-SoB.patch -> 1.2 linux-2.6-grsec_full.patch -> 1.2 linux-2.6-ppc-ICE-hacks.patch -> 1.2 linux-2.6-suspend2.patch -> 1.2 linux-2.6-toshiba_acpi.patch -> 1.3 linux-2.6-vs2.3.patch -> 1.3 linux-2.6-warnings.patch -> 1.2 linux-2.6.21.1-pwc-uncompress.patch -> 1.2 linux-PF_RING.patch -> 1.2 linux-fbcon-margins.patch -> 1.4 pax_selinux_hooks-2.6.20.patch -> 1.2 tahoe9xx-2.6.24.patch -> 1.2 --- esfq-kernel.patch | 796 + hostap-kernel-2.6.18.patch | 198 + kernel-PF_RING.patch | 3697 +-- kernel-alpha.config | 2445 +- kernel-apparmor-after-grsec_full.patch | 1182 +- kernel-apparmor.patch | 9063 ++++++ kernel-atm-vbr.patch | 162 + kernel-atmdd.patch | 954 + kernel-aufs-support.patch | 5 +- kernel-aufs.patch | 6 +- kernel-autoconf.h | 26 +- kernel-bzip2-lzma.patch | 3252 ++ kernel-config.h | 6 + kernel-esfq.patch | 796 + kernel-fbcon-margins.patch | 29 + kernel-fbcondecor.patch | 1839 ++ kernel-forcedeth-WON.patch | 14 +- kernel-grsec-caps.patch | 12 + kernel-grsec-common.patch | 39 + kernel-grsec-minimal.patch | 1189 + kernel-grsec-no-stupid-SbO.patch | 44 +- kernel-grsec.config | 38 +- kernel-grsec_fixes.patch | 149 + kernel-grsec_full.patch | 37330 +++++++++++++++++++++++ kernel-grsec_minimal.config | 20 + kernel-hostap.patch | 198 + kernel-ia64.config | 2454 +- kernel-imq.config | 9 + kernel-imq.patch | 960 + kernel-inittmpfs.config | 1 + kernel-inittmpfs.patch | 133 + kernel-ipt_ACCOUNT.patch | 1362 + kernel-ipt_account.patch | 1077 + kernel-ipvs-nfct.patch | 780 + kernel-layer7.patch | 2132 ++ kernel-module-build.pl | 38 + kernel-mpt-fusion.patch | 933 +- kernel-multiarch.config | 5715 ++++ kernel-netfilter.config | 177 + kernel-nf_rtsp.patch | 36 +- kernel-no-pax.config | 8 + kernel-owner-xid.patch | 133 + kernel-pax.config | 28 +- kernel-pax.patch | 24038 +++++++++++++++ kernel-pax_selinux_hooks.patch | 119 + kernel-pom-ng-IPV4OPTSSTRIP.patch | 118 + kernel-pom-ng-ROUTE.patch | 883 + kernel-pom-ng-ipv4options.patch | 239 + kernel-pom-ng-mms-conntrack-nat.patch | 751 + kernel-pom-ng-rpc.patch | 1707 ++ kernel-pom-ng-rsh.patch | 448 + kernel-powerpc.config | 8 +- kernel-ppc-ICE-hacks.patch | 33 + kernel-ppc-crtsavres.patch | 75 + kernel-ppclibs.Makefile | 68 + kernel-pwc-uncompress.patch | 10 + kernel-regressions.patch | 345 +- kernel-reiser4.patch | 1635 +- kernel-rndis_host-wm5.patch | 12 + kernel-routes.patch | 1333 + kernel-small_fixes.patch | 133 +- kernel-sparc.config | 1246 +- kernel-sparc64.config | 1865 +- kernel-suspend2-headers.patch | 46 + kernel-tomoyo-with-apparmor.patch | 36 +- kernel-toshiba_acpi.patch | 12 +- kernel-tuxonice-headers.patch | 46 + kernel-tuxonice.config | 29 + kernel-tuxonice.patch | 19751 ++++++++++++ kernel-unionfs-apparmor.patch | 303 + kernel-unionfs.patch | 11346 +++++++ kernel-vserver-2.3.patch | 32622 +++++++++----------- kernel-vserver-fixes.patch | 45 + kernel-vserver.config | 48 + kernel-warnings.patch | 149 + kernel-wrr.patch | 128 +- kernel-x86.config | 16 +- kernel-zph.patch | 64 +- linux-2.6-atm-vbr.patch | 162 + linux-2.6-atmdd.patch | 954 + linux-2.6-forcedeth-WON.patch | 14 +- linux-2.6-grsec-caps.patch | 12 + linux-2.6-grsec-common.patch | 39 + linux-2.6-grsec-minimal.patch | 1189 + linux-2.6-grsec-no-stupid-SoB.patch | 44 +- linux-2.6-grsec_full.patch | 37330 +++++++++++++++++++++++ linux-2.6-ppc-ICE-hacks.patch | 33 + linux-2.6-suspend2.patch | 19751 ++++++++++++ linux-2.6-toshiba_acpi.patch | 12 +- linux-2.6-vs2.3.patch | 32622 +++++++++----------- linux-2.6-warnings.patch | 149 + linux-2.6.21.1-pwc-uncompress.patch | 10 + linux-PF_RING.patch | 3697 +-- linux-fbcon-margins.patch | 29 + pax_selinux_hooks-2.6.20.patch | 119 + tahoe9xx-2.6.24.patch | 2 +- 96 files changed, 222257 insertions(+), 53083 deletions(-) create mode 100644 esfq-kernel.patch create mode 100644 hostap-kernel-2.6.18.patch create mode 100644 kernel-apparmor.patch create mode 100644 kernel-atm-vbr.patch create mode 100644 kernel-atmdd.patch create mode 100644 kernel-bzip2-lzma.patch create mode 100644 kernel-config.h create mode 100644 kernel-esfq.patch create mode 100644 kernel-fbcon-margins.patch create mode 100644 kernel-fbcondecor.patch create mode 100644 kernel-grsec-caps.patch create mode 100644 kernel-grsec-common.patch create mode 100644 kernel-grsec-minimal.patch create mode 100644 kernel-grsec_fixes.patch create mode 100644 kernel-grsec_full.patch create mode 100644 kernel-grsec_minimal.config create mode 100644 kernel-hostap.patch create mode 100644 kernel-imq.config create mode 100644 kernel-imq.patch create mode 100644 kernel-inittmpfs.config create mode 100644 kernel-inittmpfs.patch create mode 100644 kernel-ipt_ACCOUNT.patch create mode 100644 kernel-ipt_account.patch create mode 100644 kernel-ipvs-nfct.patch create mode 100644 kernel-layer7.patch create mode 100644 kernel-module-build.pl create mode 100644 kernel-multiarch.config create mode 100644 kernel-netfilter.config create mode 100644 kernel-owner-xid.patch create mode 100644 kernel-pax.patch create mode 100644 kernel-pax_selinux_hooks.patch create mode 100644 kernel-pom-ng-IPV4OPTSSTRIP.patch create mode 100644 kernel-pom-ng-ROUTE.patch create mode 100644 kernel-pom-ng-ipv4options.patch create mode 100644 kernel-pom-ng-mms-conntrack-nat.patch create mode 100644 kernel-pom-ng-rpc.patch create mode 100644 kernel-pom-ng-rsh.patch create mode 100644 kernel-ppc-ICE-hacks.patch create mode 100644 kernel-ppc-crtsavres.patch create mode 100644 kernel-ppclibs.Makefile create mode 100644 kernel-pwc-uncompress.patch create mode 100644 kernel-rndis_host-wm5.patch create mode 100644 kernel-routes.patch create mode 100644 kernel-suspend2-headers.patch create mode 100644 kernel-tuxonice-headers.patch create mode 100644 kernel-tuxonice.config create mode 100644 kernel-tuxonice.patch create mode 100644 kernel-unionfs-apparmor.patch create mode 100644 kernel-unionfs.patch create mode 100644 kernel-vserver-fixes.patch create mode 100644 kernel-vserver.config create mode 100644 kernel-warnings.patch create mode 100644 linux-2.6-atm-vbr.patch create mode 100644 linux-2.6-atmdd.patch create mode 100644 linux-2.6-grsec-caps.patch create mode 100644 linux-2.6-grsec-common.patch create mode 100644 linux-2.6-grsec-minimal.patch create mode 100644 linux-2.6-grsec_full.patch create mode 100644 linux-2.6-ppc-ICE-hacks.patch create mode 100644 linux-2.6-suspend2.patch create mode 100644 linux-2.6-warnings.patch create mode 100644 linux-2.6.21.1-pwc-uncompress.patch create mode 100644 linux-fbcon-margins.patch create mode 100644 pax_selinux_hooks-2.6.20.patch diff --git a/esfq-kernel.patch b/esfq-kernel.patch new file mode 100644 index 00000000..4f5457da --- /dev/null +++ b/esfq-kernel.patch @@ -0,0 +1,796 @@ +diff -Naur linux-2.6.24.orig/include/linux/pkt_sched.h linux-2.6.24/include/linux/pkt_sched.h +--- linux-2.6.24.orig/include/linux/pkt_sched.h 2008-01-24 14:58:37.000000000 -0800 ++++ linux-2.6.24/include/linux/pkt_sched.h 2008-01-28 00:27:12.000000000 -0800 +@@ -157,6 +157,33 @@ + * to change these parameters in compile time. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_ESFQ_HASH_CLASSIC, ++ TCA_ESFQ_HASH_DST, ++ TCA_ESFQ_HASH_SRC, ++ TCA_ESFQ_HASH_FWMARK, ++ /* conntrack */ ++ TCA_ESFQ_HASH_CTORIGDST, ++ TCA_ESFQ_HASH_CTORIGSRC, ++ TCA_ESFQ_HASH_CTREPLDST, ++ TCA_ESFQ_HASH_CTREPLSRC, ++ TCA_ESFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ + /* RED section */ + + enum +diff -Naur linux-2.6.24.orig/net/sched/Kconfig linux-2.6.24/net/sched/Kconfig +--- linux-2.6.24.orig/net/sched/Kconfig 2008-01-24 14:58:37.000000000 -0800 ++++ linux-2.6.24/net/sched/Kconfig 2008-01-28 00:27:12.000000000 -0800 +@@ -139,6 +139,37 @@ + To compile this code as a module, choose M here: the + module will be called sch_sfq. + ++config NET_SCH_ESFQ ++ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" ++ ---help--- ++ Say Y here if you want to use the Enhanced Stochastic Fairness ++ Queueing (ESFQ) packet scheduling algorithm for some of your network ++ devices or as a leaf discipline for a classful qdisc such as HTB or ++ CBQ (see the top of for details and ++ references to the SFQ algorithm). ++ ++ This is an enchanced SFQ version which allows you to control some ++ hardcoded values in the SFQ scheduler. ++ ++ ESFQ also adds control of the hash function used to identify packet ++ flows. The original SFQ discipline hashes by connection; ESFQ add ++ several other hashing methods, such as by src IP or by dst IP, which ++ can be more fair to users in some networking situations. ++ ++ To compile this code as a module, choose M here: the ++ module will be called sch_esfq. ++ ++config NET_SCH_ESFQ_NFCT ++ bool "Connection Tracking Hash Types" ++ depends on NET_SCH_ESFQ && NF_CONNTRACK ++ ---help--- ++ Say Y here to enable support for hashing based on netfilter connection ++ tracking information. This is useful for a router that is also using ++ NAT to connect privately-addressed hosts to the Internet. If you want ++ to provide fair distribution of upstream bandwidth, ESFQ must use ++ connection tracking information, since all outgoing packets will share ++ the same source address. ++ + config NET_SCH_TEQL + tristate "True Link Equalizer (TEQL)" + ---help--- +diff -Naur linux-2.6.24.orig/net/sched/Makefile linux-2.6.24/net/sched/Makefile +--- linux-2.6.24.orig/net/sched/Makefile 2008-01-24 14:58:37.000000000 -0800 ++++ linux-2.6.24/net/sched/Makefile 2008-01-28 00:27:12.000000000 -0800 +@@ -23,6 +23,7 @@ + obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o + obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o ++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o + obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +diff -Naur linux-2.6.24.orig/net/sched/sch_esfq.c linux-2.6.24/net/sched/sch_esfq.c +--- linux-2.6.24.orig/net/sched/sch_esfq.c 1969-12-31 16:00:00.000000000 -0800 ++++ linux-2.6.24/net/sched/sch_esfq.c 2008-01-28 00:27:22.000000000 -0800 +@@ -0,0 +1,703 @@ ++/* ++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Alexey Kuznetsov, ++ * ++ * Changes: Alexander Atanasov, ++ * Added dynamic depth,limit,divisor,hash_kind options. ++ * Added dst and src hashes. ++ * ++ * Alexander Clouter, ++ * Ported ESFQ to Linux 2.6. ++ * ++ * Corey Hickey, ++ * Maintenance of the Linux 2.6 port. ++ * Added fwmark hash (thanks to Robert Kurjata). ++ * Added usage of jhash. ++ * Added conntrack support. ++ * Added ctnatchg hash (thanks to Ben Pfountz). ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Stochastic Fairness Queuing algorithm. ++ For more comments look at sch_sfq.c. ++ The difference is that you can change limit, depth, ++ hash table size and choose alternate hash types. ++ ++ classic: same as in sch_sfq.c ++ dst: destination IP address ++ src: source IP address ++ fwmark: netfilter mark value ++ ctorigdst: original destination IP address ++ ctorigsrc: original source IP address ++ ctrepldst: reply destination IP address ++ ctreplsrc: reply source IP ++ ++*/ ++ ++#define ESFQ_HEAD 0 ++#define ESFQ_TAIL 1 ++ ++/* This type should contain at least SFQ_DEPTH*2 values */ ++typedef unsigned int esfq_index; ++ ++struct esfq_head ++{ ++ esfq_index next; ++ esfq_index prev; ++}; ++ ++struct esfq_sched_data ++{ ++/* Parameters */ ++ int perturb_period; ++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ ++ int limit; ++ unsigned depth; ++ unsigned hash_divisor; ++ unsigned hash_kind; ++/* Variables */ ++ struct timer_list perturb_timer; ++ int perturbation; ++ esfq_index tail; /* Index of current slot in round */ ++ esfq_index max_depth; /* Maximal depth */ ++ ++ esfq_index *ht; /* Hash table */ ++ esfq_index *next; /* Active slots link */ ++ short *allot; /* Current allotment per slot */ ++ unsigned short *hash; /* Hash value indexed by slots */ ++ struct sk_buff_head *qs; /* Slot queue */ ++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ ++}; ++ ++/* This contains the info we will hash. */ ++struct esfq_packet_info ++{ ++ u32 proto; /* protocol or port */ ++ u32 src; /* source from packet header */ ++ u32 dst; /* destination from packet header */ ++ u32 ctorigsrc; /* original source from conntrack */ ++ u32 ctorigdst; /* original destination from conntrack */ ++ u32 ctreplsrc; /* reply source from conntrack */ ++ u32 ctrepldst; /* reply destination from conntrack */ ++ u32 mark; /* netfilter mark (fwmark) */ ++}; ++ ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) ++{ ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) ++{ ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++ struct esfq_packet_info info; ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct = nf_ct_get(skb, &ctinfo); ++#endif ++ ++ switch (skb->protocol) { ++ case __constant_htons(ETH_P_IP): ++ { ++ struct iphdr *iph = ip_hdr(skb); ++ info.dst = iph->daddr; ++ info.src = iph->saddr; ++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && ++ (iph->protocol == IPPROTO_TCP || ++ iph->protocol == IPPROTO_UDP || ++ iph->protocol == IPPROTO_SCTP || ++ iph->protocol == IPPROTO_DCCP || ++ iph->protocol == IPPROTO_ESP)) ++ info.proto = *(((u32*)iph) + iph->ihl); ++ else ++ info.proto = iph->protocol; ++ break; ++ } ++ case __constant_htons(ETH_P_IPV6): ++ { ++ struct ipv6hdr *iph = ipv6_hdr(skb); ++ /* Hash ipv6 addresses into a u32. This isn't ideal, ++ * but the code is simple. */ ++ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); ++ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); ++ if (iph->nexthdr == IPPROTO_TCP || ++ iph->nexthdr == IPPROTO_UDP || ++ iph->nexthdr == IPPROTO_SCTP || ++ iph->nexthdr == IPPROTO_DCCP || ++ iph->nexthdr == IPPROTO_ESP) ++ info.proto = *(u32*)&iph[1]; ++ else ++ info.proto = iph->nexthdr; ++ break; ++ } ++ default: ++ info.dst = (u32)(unsigned long)skb->dst; ++ info.src = (u32)(unsigned long)skb->sk; ++ info.proto = skb->protocol; ++ } ++ ++ info.mark = skb->mark; ++ ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ /* defaults if there is no conntrack info */ ++ info.ctorigsrc = info.src; ++ info.ctorigdst = info.dst; ++ info.ctreplsrc = info.dst; ++ info.ctrepldst = info.src; ++ /* collect conntrack info */ ++ if (ct && ct != &nf_conntrack_untracked) { ++ if (skb->protocol == __constant_htons(ETH_P_IP)) { ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; ++ } ++ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { ++ /* Again, hash ipv6 addresses into a single u32. */ ++ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); ++ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); ++ } ++ ++ } ++#endif ++ ++ switch(q->hash_kind) { ++ case TCA_ESFQ_HASH_CLASSIC: ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++ case TCA_ESFQ_HASH_DST: ++ return esfq_jhash_1word(q, info.dst); ++ case TCA_ESFQ_HASH_SRC: ++ return esfq_jhash_1word(q, info.src); ++ case TCA_ESFQ_HASH_FWMARK: ++ return esfq_jhash_1word(q, info.mark); ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ case TCA_ESFQ_HASH_CTORIGDST: ++ return esfq_jhash_1word(q, info.ctorigdst); ++ case TCA_ESFQ_HASH_CTORIGSRC: ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ case TCA_ESFQ_HASH_CTREPLDST: ++ return esfq_jhash_1word(q, info.ctrepldst); ++ case TCA_ESFQ_HASH_CTREPLSRC: ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ case TCA_ESFQ_HASH_CTNATCHG: ++ { ++ if (info.ctorigdst == info.ctreplsrc) ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ } ++#endif ++ default: ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); ++ } ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++} ++ ++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d = q->qs[x].qlen + q->depth; ++ ++ p = d; ++ n = q->dep[d].next; ++ q->dep[x].next = n; ++ q->dep[x].prev = p; ++ q->dep[p].next = q->dep[n].prev = x; ++} ++ ++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ ++ if (n == p && q->max_depth == q->qs[x].qlen + 1) ++ q->max_depth--; ++ ++ esfq_link(q, x); ++} ++ ++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ d = q->qs[x].qlen; ++ if (q->max_depth < d) ++ q->max_depth = d; ++ ++ esfq_link(q, x); ++} ++ ++static unsigned int esfq_drop(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index d = q->max_depth; ++ struct sk_buff *skb; ++ unsigned int len; ++ ++ /* Queue is full! Find the longest slot and ++ drop a packet from it */ ++ ++ if (d > 1) { ++ esfq_index x = q->dep[d+q->depth].next; ++ skb = q->qs[x].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[x]); ++ kfree_skb(skb); ++ esfq_dec(q, x); ++ sch->q.qlen--; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ if (d == 1) { ++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ ++ d = q->next[q->tail]; ++ q->next[q->tail] = q->next[d]; ++ q->allot[q->next[d]] += q->quantum; ++ skb = q->qs[d].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[d]); ++ kfree_skb(skb); ++ esfq_dec(q, d); ++ sch->q.qlen--; ++ q->ht[q->hash[d]] = q->depth; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ return 0; ++} ++ ++static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) ++{ ++ unsigned hash = esfq_hash(q, skb); ++ unsigned depth = q->depth; ++ esfq_index x; ++ ++ x = q->ht[hash]; ++ if (x == depth) { ++ q->ht[hash] = x = q->dep[depth].next; ++ q->hash[x] = hash; ++ } ++ ++ if (end == ESFQ_TAIL) ++ __skb_queue_tail(&q->qs[x], skb); ++ else ++ __skb_queue_head(&q->qs[x], skb); ++ ++ esfq_inc(q, x); ++ if (q->qs[x].qlen == 1) { /* The flow is new */ ++ if (q->tail == depth) { /* It is the first flow */ ++ q->tail = x; ++ q->next[x] = x; ++ q->allot[x] = q->quantum; ++ } else { ++ q->next[x] = q->next[q->tail]; ++ q->next[q->tail] = x; ++ q->tail = x; ++ } ++ } ++} ++ ++static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_enqueue(skb, q, ESFQ_TAIL); ++ sch->qstats.backlog += skb->len; ++ if (++sch->q.qlen < q->limit-1) { ++ sch->bstats.bytes += skb->len; ++ sch->bstats.packets++; ++ return 0; ++ } ++ ++ sch->qstats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++ ++static int esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_enqueue(skb, q, ESFQ_HEAD); ++ sch->qstats.backlog += skb->len; ++ if (++sch->q.qlen < q->limit - 1) { ++ sch->qstats.requeues++; ++ return 0; ++ } ++ ++ sch->qstats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) ++{ ++ struct sk_buff *skb; ++ unsigned depth = q->depth; ++ esfq_index a, old_a; ++ ++ /* No active slots */ ++ if (q->tail == depth) ++ return NULL; ++ ++ a = old_a = q->next[q->tail]; ++ ++ /* Grab packet */ ++ skb = __skb_dequeue(&q->qs[a]); ++ esfq_dec(q, a); ++ ++ /* Is the slot empty? */ ++ if (q->qs[a].qlen == 0) { ++ q->ht[q->hash[a]] = depth; ++ a = q->next[a]; ++ if (a == old_a) { ++ q->tail = depth; ++ return skb; ++ } ++ q->next[q->tail] = a; ++ q->allot[a] += q->quantum; ++ } else if ((q->allot[a] -= skb->len) <= 0) { ++ q->tail = a; ++ a = q->next[a]; ++ q->allot[a] += q->quantum; ++ } ++ ++ return skb; ++} ++ ++static struct sk_buff *esfq_dequeue(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct sk_buff *skb; ++ ++ skb = esfq_q_dequeue(q); ++ if (skb == NULL) ++ return NULL; ++ sch->q.qlen--; ++ sch->qstats.backlog -= skb->len; ++ return skb; ++} ++ ++static void esfq_q_destroy(struct esfq_sched_data *q) ++{ ++ del_timer(&q->perturb_timer); ++ if(q->ht) ++ kfree(q->ht); ++ if(q->dep) ++ kfree(q->dep); ++ if(q->next) ++ kfree(q->next); ++ if(q->allot) ++ kfree(q->allot); ++ if(q->hash) ++ kfree(q->hash); ++ if(q->qs) ++ kfree(q->qs); ++} ++ ++static void esfq_destroy(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_destroy(q); ++} ++ ++ ++static void esfq_reset(struct Qdisc* sch) ++{ ++ struct sk_buff *skb; ++ ++ while ((skb = esfq_dequeue(sch)) != NULL) ++ kfree_skb(skb); ++} ++ ++static void esfq_perturbation(unsigned long arg) ++{ ++ struct Qdisc *sch = (struct Qdisc*)arg; ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ ++ q->perturbation = net_random()&0x1F; ++ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++} ++ ++static unsigned int esfq_check_hash(unsigned int kind) ++{ ++ switch (kind) { ++ case TCA_ESFQ_HASH_CTORIGDST: ++ case TCA_ESFQ_HASH_CTORIGSRC: ++ case TCA_ESFQ_HASH_CTREPLDST: ++ case TCA_ESFQ_HASH_CTREPLSRC: ++ case TCA_ESFQ_HASH_CTNATCHG: ++#ifndef CONFIG_NET_SCH_ESFQ_NFCT ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); ++ return TCA_ESFQ_HASH_CLASSIC; ++ } ++#endif ++ case TCA_ESFQ_HASH_CLASSIC: ++ case TCA_ESFQ_HASH_DST: ++ case TCA_ESFQ_HASH_SRC: ++ case TCA_ESFQ_HASH_FWMARK: ++ return kind; ++ default: ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); ++ return TCA_ESFQ_HASH_CLASSIC; ++ } ++ } ++} ++ ++static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) ++{ ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ esfq_index p = ~0U/2; ++ int i; ++ ++ if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) ++ return -EINVAL; ++ ++ q->perturbation = 0; ++ q->hash_kind = TCA_ESFQ_HASH_CLASSIC; ++ q->max_depth = 0; ++ if (opt == NULL) { ++ q->perturb_period = 0; ++ q->hash_divisor = 1024; ++ q->tail = q->limit = q->depth = 128; ++ ++ } else { ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ if (ctl->quantum) ++ q->quantum = ctl->quantum; ++ q->perturb_period = ctl->perturb_period*HZ; ++ q->hash_divisor = ctl->divisor ? : 1024; ++ q->tail = q->limit = q->depth = ctl->flows ? : 128; ++ ++ if ( q->depth > p - 1 ) ++ return -EINVAL; ++ ++ if (ctl->limit) ++ q->limit = min_t(u32, ctl->limit, q->depth); ++ ++ if (ctl->hash_kind) { ++ q->hash_kind = esfq_check_hash(ctl->hash_kind); ++ } ++ } ++ ++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->ht) ++ goto err_case; ++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); ++ if (!q->dep) ++ goto err_case; ++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->next) ++ goto err_case; ++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); ++ if (!q->allot) ++ goto err_case; ++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); ++ if (!q->hash) ++ goto err_case; ++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); ++ if (!q->qs) ++ goto err_case; ++ ++ for (i=0; i< q->hash_divisor; i++) ++ q->ht[i] = q->depth; ++ for (i=0; idepth; i++) { ++ skb_queue_head_init(&q->qs[i]); ++ q->dep[i+q->depth].next = i+q->depth; ++ q->dep[i+q->depth].prev = i+q->depth; ++ } ++ ++ for (i=0; idepth; i++) ++ esfq_link(q, i); ++ return 0; ++err_case: ++ esfq_q_destroy(q); ++ return -ENOBUFS; ++} ++ ++static int esfq_init(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ int err; ++ ++ q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(q, opt))) ++ return err; ++ ++ init_timer(&q->perturb_timer); ++ q->perturb_timer.data = (unsigned long)sch; ++ q->perturb_timer.function = esfq_perturbation; ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++ ++ return 0; ++} ++ ++static int esfq_change(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct esfq_sched_data new; ++ struct sk_buff *skb; ++ int err; ++ ++ /* set up new queue */ ++ memset(&new, 0, sizeof(struct esfq_sched_data)); ++ new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(&new, opt))) ++ return err; ++ ++ /* copy all packets from the old queue to the new queue */ ++ sch_tree_lock(sch); ++ while ((skb = esfq_q_dequeue(q)) != NULL) ++ esfq_q_enqueue(skb, &new, ESFQ_TAIL); ++ ++ /* clean up the old queue */ ++ esfq_q_destroy(q); ++ ++ /* copy elements of the new queue into the old queue */ ++ q->perturb_period = new.perturb_period; ++ q->quantum = new.quantum; ++ q->limit = new.limit; ++ q->depth = new.depth; ++ q->hash_divisor = new.hash_divisor; ++ q->hash_kind = new.hash_kind; ++ q->tail = new.tail; ++ q->max_depth = new.max_depth; ++ q->ht = new.ht; ++ q->dep = new.dep; ++ q->next = new.next; ++ q->allot = new.allot; ++ q->hash = new.hash; ++ q->qs = new.qs; ++ ++ /* finish up */ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } else { ++ q->perturbation = 0; ++ } ++ sch_tree_unlock(sch); ++ return 0; ++} ++ ++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned char *b = skb_tail_pointer(skb); ++ struct tc_esfq_qopt opt; ++ ++ opt.quantum = q->quantum; ++ opt.perturb_period = q->perturb_period/HZ; ++ ++ opt.limit = q->limit; ++ opt.divisor = q->hash_divisor; ++ opt.flows = q->depth; ++ opt.hash_kind = q->hash_kind; ++ ++ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); ++ ++ return skb->len; ++ ++nla_put_failure: ++ nlmsg_trim(skb, b); ++ return -1; ++} ++ ++static struct Qdisc_ops esfq_qdisc_ops = ++{ ++ .next = NULL, ++ .cl_ops = NULL, ++ .id = "esfq", ++ .priv_size = sizeof(struct esfq_sched_data), ++ .enqueue = esfq_enqueue, ++ .dequeue = esfq_dequeue, ++ .requeue = esfq_requeue, ++ .drop = esfq_drop, ++ .init = esfq_init, ++ .reset = esfq_reset, ++ .destroy = esfq_destroy, ++ .change = esfq_change, ++ .dump = esfq_dump, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init esfq_module_init(void) ++{ ++ return register_qdisc(&esfq_qdisc_ops); ++} ++static void __exit esfq_module_exit(void) ++{ ++ unregister_qdisc(&esfq_qdisc_ops); ++} ++module_init(esfq_module_init) ++module_exit(esfq_module_exit) ++MODULE_LICENSE("GPL"); diff --git a/hostap-kernel-2.6.18.patch b/hostap-kernel-2.6.18.patch new file mode 100644 index 00000000..e1447fc1 --- /dev/null +++ b/hostap-kernel-2.6.18.patch @@ -0,0 +1,198 @@ +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_80211_tx.c linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_80211_tx.c +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_80211_tx.c 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_80211_tx.c 2006-09-21 01:30:18.000000000 -0400 +@@ -69,6 +69,9 @@ + iface = netdev_priv(dev); + local = iface->local; + ++ if (local->iw_mode == IW_MODE_MONITOR) ++ goto xmit; ++ + if (skb->len < ETH_HLEN) { + printk(KERN_DEBUG "%s: hostap_data_start_xmit: short skb " + "(len=%d)\n", dev->name, skb->len); +@@ -234,6 +237,7 @@ + memcpy(skb_put(skb, ETH_ALEN), &hdr.addr4, ETH_ALEN); + } + ++xmit: + iface->stats.tx_packets++; + iface->stats.tx_bytes += skb->len; + +@@ -404,8 +408,6 @@ + } + + if (skb->len < 24) { +- printk(KERN_DEBUG "%s: hostap_master_start_xmit: short skb " +- "(len=%d)\n", dev->name, skb->len); + ret = 0; + iface->stats.tx_dropped++; + goto fail; +Only in linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap: hostap_cs.c.orig +Only in linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap: hostap_cs.c.rej +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_hw.c linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_hw.c +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_hw.c 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_hw.c 2006-09-21 01:30:18.000000000 -0400 +@@ -1005,6 +1005,35 @@ + return fid; + } + ++static int prism2_monitor_enable(struct net_device *dev) ++{ ++ if (hostap_set_word(dev, HFA384X_RID_CNFPORTTYPE, 5)) { ++ printk(KERN_DEBUG "Port type setting for monitor mode " ++ "failed\n"); ++ return -EOPNOTSUPP; ++ } ++ ++ if (hfa384x_cmd(dev, HFA384X_CMDCODE_TEST | (0x0a << 8), ++ 0, NULL, NULL)) { ++ printk(KERN_DEBUG "Could not enter testmode 0x0a\n"); ++ return -EOPNOTSUPP; ++ } ++ ++ if (hostap_set_word(dev, HFA384X_RID_CNFWEPFLAGS, ++ HFA384X_WEPFLAGS_PRIVACYINVOKED | ++ HFA384X_WEPFLAGS_HOSTENCRYPT | ++ HFA384X_WEPFLAGS_HOSTDECRYPT)) { ++ printk(KERN_DEBUG "WEP flags setting failed\n"); ++ return -EOPNOTSUPP; ++ } ++ ++ if (hostap_set_word(dev, HFA384X_RID_PROMISCUOUSMODE, 1)) { ++ printk(KERN_DEBUG "Could not set promiscuous mode\n"); ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} + + static int prism2_reset_port(struct net_device *dev) + { +@@ -1031,6 +1060,10 @@ + "port\n", dev->name); + } + ++ if (local->iw_mode == IW_MODE_MONITOR) ++ /* force mode 0x0a after port 0 reset */ ++ return prism2_monitor_enable(dev); ++ + /* It looks like at least some STA firmware versions reset + * fragmentation threshold back to 2346 after enable command. Restore + * the configured value, if it differs from this default. */ +@@ -1466,6 +1499,10 @@ + return 1; + } + ++ if (local->iw_mode == IW_MODE_MONITOR) ++ /* force mode 0x0a after port 0 reset */ ++ prism2_monitor_enable(dev); ++ + local->hw_ready = 1; + local->hw_reset_tries = 0; + local->hw_resetting = 0; +@@ -3156,6 +3193,7 @@ + local->func->hw_config = prism2_hw_config; + local->func->hw_reset = prism2_hw_reset; + local->func->hw_shutdown = prism2_hw_shutdown; ++ local->func->monitor_enable = prism2_monitor_enable; + local->func->reset_port = prism2_reset_port; + local->func->schedule_reset = prism2_schedule_reset; + #ifdef PRISM2_DOWNLOAD_SUPPORT +Only in linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap: hostap_hw.c.orig +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_ioctl.c linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_ioctl.c +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_ioctl.c 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_ioctl.c 2006-09-21 01:30:18.000000000 -0400 +@@ -1104,33 +1104,7 @@ + + printk(KERN_DEBUG "Enabling monitor mode\n"); + hostap_monitor_set_type(local); +- +- if (hostap_set_word(dev, HFA384X_RID_CNFPORTTYPE, +- HFA384X_PORTTYPE_PSEUDO_IBSS)) { +- printk(KERN_DEBUG "Port type setting for monitor mode " +- "failed\n"); +- return -EOPNOTSUPP; +- } +- +- /* Host decrypt is needed to get the IV and ICV fields; +- * however, monitor mode seems to remove WEP flag from frame +- * control field */ +- if (hostap_set_word(dev, HFA384X_RID_CNFWEPFLAGS, +- HFA384X_WEPFLAGS_HOSTENCRYPT | +- HFA384X_WEPFLAGS_HOSTDECRYPT)) { +- printk(KERN_DEBUG "WEP flags setting failed\n"); +- return -EOPNOTSUPP; +- } +- +- if (local->func->reset_port(dev) || +- local->func->cmd(dev, HFA384X_CMDCODE_TEST | +- (HFA384X_TEST_MONITOR << 8), +- 0, NULL, NULL)) { +- printk(KERN_DEBUG "Setting monitor mode failed\n"); +- return -EOPNOTSUPP; +- } +- +- return 0; ++ return local->func->reset_port(dev); + } + + +@@ -1199,7 +1173,7 @@ + local->iw_mode = *mode; + + if (local->iw_mode == IW_MODE_MONITOR) +- hostap_monitor_mode_enable(local); ++ return hostap_monitor_mode_enable(local); + else if (local->iw_mode == IW_MODE_MASTER && !local->host_encrypt && + !local->fw_encrypt_ok) { + printk(KERN_DEBUG "%s: defaulting to host-based encryption as " +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_main.c linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_main.c +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_main.c 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_main.c 2006-09-21 01:30:18.000000000 -0400 +@@ -331,7 +331,7 @@ + if (local->iw_mode == IW_MODE_REPEAT) + return HFA384X_PORTTYPE_WDS; + if (local->iw_mode == IW_MODE_MONITOR) +- return HFA384X_PORTTYPE_PSEUDO_IBSS; ++ return 5; /*HFA384X_PORTTYPE_PSEUDO_IBSS;*/ + return HFA384X_PORTTYPE_HOSTAP; + } + +Only in linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap: hostap_main.c.orig +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_pci.c linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_pci.c +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_pci.c 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_pci.c 2006-09-21 01:30:18.000000000 -0400 +@@ -48,6 +48,8 @@ + { 0x1260, 0x3873, PCI_ANY_ID, PCI_ANY_ID }, + /* Samsung MagicLAN SWL-2210P */ + { 0x167d, 0xa000, PCI_ANY_ID, PCI_ANY_ID }, ++ /* NETGEAR MA311 */ ++ { 0x1385, 0x3872, PCI_ANY_ID, PCI_ANY_ID }, + { 0 } + }; + +Only in linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap: hostap_pci.c.orig +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_plx.c linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_plx.c +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_plx.c 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_plx.c 2006-09-21 01:30:18.000000000 -0400 +@@ -101,6 +101,7 @@ + { 0xc250, 0x0002 } /* EMTAC A2424i */, + { 0xd601, 0x0002 } /* Z-Com XI300 */, + { 0xd601, 0x0005 } /* Zcomax XI-325H 200mW */, ++ { 0xd601, 0x0010 } /* Zcomax XI-325H 100mW */, + { 0, 0} + }; + +Only in linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap: hostap_plx.c.orig +diff -ur linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_wlan.h linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_wlan.h +--- linux-2.6.18-gentoo/drivers/net/wireless/hostap/hostap_wlan.h 2006-09-21 01:26:27.000000000 -0400 ++++ linux-2.6.18-gentoo-rawtx/drivers/net/wireless/hostap/hostap_wlan.h 2006-09-21 01:30:18.000000000 -0400 +@@ -575,6 +575,7 @@ + int (*hw_config)(struct net_device *dev, int initial); + void (*hw_reset)(struct net_device *dev); + void (*hw_shutdown)(struct net_device *dev, int no_disable); ++ int (*monitor_enable)(struct net_device *dev); + int (*reset_port)(struct net_device *dev); + void (*schedule_reset)(local_info_t *local); + int (*download)(local_info_t *local, diff --git a/kernel-PF_RING.patch b/kernel-PF_RING.patch index 7cedc71c..19de0f66 100644 --- a/kernel-PF_RING.patch +++ b/kernel-PF_RING.patch @@ -256,78 +256,20 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4- diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile --- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000 +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000 -@@ -42,6 +42,7 @@ - obj-$(CONFIG_DECNET) += decnet/ - obj-$(CONFIG_ECONET) += econet/ - obj-$(CONFIG_VLAN_8021Q) += 8021q/ +@@ -45,6 +45,7 @@ + ifneq ($(CONFIG_VLAN_8021Q),) + obj-y += 8021q/ + endif +obj-$(CONFIG_RING) += ring/ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ - obj-$(CONFIG_IEEE80211) += ieee80211/ -diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile.ORG linux-2.6.21.4-1-686-smp-ring3/net/Makefile.ORG ---- linux-2.6.21.4/net/Makefile.ORG 1970-01-01 00:00:00.000000000 +0000 -+++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile.ORG 2007-06-10 16:43:04.386423079 +0000 -@@ -0,0 +1,54 @@ -+# -+# Makefile for the linux networking. -+# -+# 2 Sep 2000, Christoph Hellwig -+# Rewritten to use lists instead of if-statements. -+# -+ -+obj-y := nonet.o -+ -+obj-$(CONFIG_NET) := socket.o core/ -+ -+tmp-$(CONFIG_COMPAT) := compat.o -+obj-$(CONFIG_NET) += $(tmp-y) -+ -+# LLC has to be linked before the files in net/802/ -+obj-$(CONFIG_LLC) += llc/ -+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ -+obj-$(CONFIG_NETFILTER) += netfilter/ -+obj-$(CONFIG_INET) += ipv4/ -+obj-$(CONFIG_XFRM) += xfrm/ -+obj-$(CONFIG_UNIX) += unix/ -+ifneq ($(CONFIG_IPV6),) -+obj-y += ipv6/ -+endif -+obj-$(CONFIG_PACKET) += packet/ -+obj-$(CONFIG_NET_KEY) += key/ -+obj-$(CONFIG_NET_SCHED) += sched/ -+obj-$(CONFIG_BRIDGE) += bridge/ -+obj-$(CONFIG_IPX) += ipx/ -+obj-$(CONFIG_ATALK) += appletalk/ -+obj-$(CONFIG_WAN_ROUTER) += wanrouter/ -+obj-$(CONFIG_X25) += x25/ -+obj-$(CONFIG_LAPB) += lapb/ -+obj-$(CONFIG_NETROM) += netrom/ -+obj-$(CONFIG_ROSE) += rose/ -+obj-$(CONFIG_AX25) += ax25/ -+obj-$(CONFIG_IRDA) += irda/ -+obj-$(CONFIG_BT) += bluetooth/ -+obj-$(CONFIG_SUNRPC) += sunrpc/ -+obj-$(CONFIG_RXRPC) += rxrpc/ -+obj-$(CONFIG_ATM) += atm/ -+obj-$(CONFIG_DECNET) += decnet/ -+obj-$(CONFIG_ECONET) += econet/ -+obj-$(CONFIG_VLAN_8021Q) += 8021q/ -+obj-$(CONFIG_IP_DCCP) += dccp/ -+obj-$(CONFIG_IP_SCTP) += sctp/ -+obj-$(CONFIG_IEEE80211) += ieee80211/ -+obj-$(CONFIG_TIPC) += tipc/ -+obj-$(CONFIG_NETLABEL) += netlabel/ -+obj-$(CONFIG_IUCV) += iucv/ -+ -+ifeq ($(CONFIG_NET),y) -+obj-$(CONFIG_SYSCTL) += sysctl_net.o -+endif + obj-y += wireless/ diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c --- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000 +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000 -@@ -117,6 +117,56 @@ - #include - #include +@@ -133,6 +133,56 @@ + + #include "net-sysfs.h" +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) + @@ -382,18 +324,17 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21 /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. -@@ -1474,6 +1524,10 @@ +@@ -1809,6 +1859,9 @@ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) + if(ring_handler) ring_handler(skb, 0, 1); +#endif /* CONFIG_RING */ -+ - /* Grab device queue */ - spin_lock(&dev->queue_lock); - q = dev->qdisc; -@@ -1574,6 +1628,13 @@ + spinlock_t *root_lock = qdisc_lock(q); + + spin_lock(root_lock); +@@ -1908,6 +1961,13 @@ unsigned long flags; /* if netpoll wants it, pretend we never saw it */ @@ -407,8 +348,8 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21 if (netpoll_rx(skb)) return NET_RX_DROP; -@@ -1764,6 +1825,13 @@ - struct net_device *orig_dev; +@@ -2193,6 +2253,13 @@ + struct net_device *null_or_orig; int ret = NET_RX_DROP; __be16 type; +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) @@ -420,3582 +361,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21 + /* if we've gotten here through NAPI, check netpoll */ - if (skb->dev->poll && netpoll_rx(skb)) -diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c.ORG linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c.ORG ---- linux-2.6.21.4/net/core/dev.c.ORG 1970-01-01 00:00:00.000000000 +0000 -+++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c.ORG 2007-06-10 16:43:04.354421694 +0000 -@@ -0,0 +1,3571 @@ -+/* -+ * NET3 Protocol independent device support routines. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Derived from the non IP parts of dev.c 1.0.19 -+ * Authors: Ross Biro -+ * Fred N. van Kempen, -+ * Mark Evans, -+ * -+ * Additional Authors: -+ * Florian la Roche -+ * Alan Cox -+ * David Hinds -+ * Alexey Kuznetsov -+ * Adam Sulmicki -+ * Pekka Riikonen -+ * -+ * Changes: -+ * D.J. Barrow : Fixed bug where dev->refcnt gets set -+ * to 2 if register_netdev gets called -+ * before net_dev_init & also removed a -+ * few lines of code in the process. -+ * Alan Cox : device private ioctl copies fields back. -+ * Alan Cox : Transmit queue code does relevant -+ * stunts to keep the queue safe. -+ * Alan Cox : Fixed double lock. -+ * Alan Cox : Fixed promisc NULL pointer trap -+ * ???????? : Support the full private ioctl range -+ * Alan Cox : Moved ioctl permission check into -+ * drivers -+ * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI -+ * Alan Cox : 100 backlog just doesn't cut it when -+ * you start doing multicast video 8) -+ * Alan Cox : Rewrote net_bh and list manager. -+ * Alan Cox : Fix ETH_P_ALL echoback lengths. -+ * Alan Cox : Took out transmit every packet pass -+ * Saved a few bytes in the ioctl handler -+ * Alan Cox : Network driver sets packet type before -+ * calling netif_rx. Saves a function -+ * call a packet. -+ * Alan Cox : Hashed net_bh() -+ * Richard Kooijman: Timestamp fixes. -+ * Alan Cox : Wrong field in SIOCGIFDSTADDR -+ * Alan Cox : Device lock protection. -+ * Alan Cox : Fixed nasty side effect of device close -+ * changes. -+ * Rudi Cilibrasi : Pass the right thing to -+ * set_mac_address() -+ * Dave Miller : 32bit quantity for the device lock to -+ * make it work out on a Sparc. -+ * Bjorn Ekwall : Added KERNELD hack. -+ * Alan Cox : Cleaned up the backlog initialise. -+ * Craig Metz : SIOCGIFCONF fix if space for under -+ * 1 device. -+ * Thomas Bogendoerfer : Return ENODEV for dev_open, if there -+ * is no device open function. -+ * Andi Kleen : Fix error reporting for SIOCGIFCONF -+ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF -+ * Cyrus Durgin : Cleaned for KMOD -+ * Adam Sulmicki : Bug Fix : Network Device Unload -+ * A network device unload needs to purge -+ * the backlog queue. -+ * Paul Rusty Russell : SIOCSIFNAME -+ * Pekka Riikonen : Netdev boot-time settings code -+ * Andrew Morton : Make unregister_netdevice wait -+ * indefinitely on dev->refcnt -+ * J Hadi Salim : - Backlog queue sampling -+ * - netif_rx() feedback -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * The list of packet types we will receive (as opposed to discard) -+ * and the routines to invoke. -+ * -+ * Why 16. Because with 16 the only overlap we get on a hash of the -+ * low nibble of the protocol value is RARP/SNAP/X.25. -+ * -+ * NOTE: That is no longer true with the addition of VLAN tags. Not -+ * sure which should go first, but I bet it won't make much -+ * difference if we are running VLANs. The good news is that -+ * this protocol won't be in the list unless compiled in, so -+ * the average user (w/out VLANs) will not be adversely affected. -+ * --BLG -+ * -+ * 0800 IP -+ * 8100 802.1Q VLAN -+ * 0001 802.3 -+ * 0002 AX.25 -+ * 0004 802.2 -+ * 8035 RARP -+ * 0005 SNAP -+ * 0805 X.25 -+ * 0806 ARP -+ * 8137 IPX -+ * 0009 Localtalk -+ * 86DD IPv6 -+ */ -+ -+static DEFINE_SPINLOCK(ptype_lock); -+static struct list_head ptype_base[16]; /* 16 way hashed list */ -+static struct list_head ptype_all; /* Taps */ -+ -+#ifdef CONFIG_NET_DMA -+static struct dma_client *net_dma_client; -+static unsigned int net_dma_count; -+static spinlock_t net_dma_event_lock; -+#endif -+ -+/* -+ * The @dev_base list is protected by @dev_base_lock and the rtnl -+ * semaphore. -+ * -+ * Pure readers hold dev_base_lock for reading. -+ * -+ * Writers must hold the rtnl semaphore while they loop through the -+ * dev_base list, and hold dev_base_lock for writing when they do the -+ * actual updates. This allows pure readers to access the list even -+ * while a writer is preparing to update it. -+ * -+ * To put it another way, dev_base_lock is held for writing only to -+ * protect against pure readers; the rtnl semaphore provides the -+ * protection against other writers. -+ * -+ * See, for example usages, register_netdevice() and -+ * unregister_netdevice(), which must be called with the rtnl -+ * semaphore held. -+ */ -+struct net_device *dev_base; -+static struct net_device **dev_tail = &dev_base; -+DEFINE_RWLOCK(dev_base_lock); -+ -+EXPORT_SYMBOL(dev_base); -+EXPORT_SYMBOL(dev_base_lock); -+ -+#define NETDEV_HASHBITS 8 -+static struct hlist_head dev_name_head[1<type == htons(ETH_P_ALL)) { -+ netdev_nit++; -+ list_add_rcu(&pt->list, &ptype_all); -+ } else { -+ hash = ntohs(pt->type) & 15; -+ list_add_rcu(&pt->list, &ptype_base[hash]); -+ } -+ spin_unlock_bh(&ptype_lock); -+} -+ -+/** -+ * __dev_remove_pack - remove packet handler -+ * @pt: packet type declaration -+ * -+ * Remove a protocol handler that was previously added to the kernel -+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed -+ * from the kernel lists and can be freed or reused once this function -+ * returns. -+ * -+ * The packet type might still be in use by receivers -+ * and must not be freed until after all the CPU's have gone -+ * through a quiescent state. -+ */ -+void __dev_remove_pack(struct packet_type *pt) -+{ -+ struct list_head *head; -+ struct packet_type *pt1; -+ -+ spin_lock_bh(&ptype_lock); -+ -+ if (pt->type == htons(ETH_P_ALL)) { -+ netdev_nit--; -+ head = &ptype_all; -+ } else -+ head = &ptype_base[ntohs(pt->type) & 15]; -+ -+ list_for_each_entry(pt1, head, list) { -+ if (pt == pt1) { -+ list_del_rcu(&pt->list); -+ goto out; -+ } -+ } -+ -+ printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); -+out: -+ spin_unlock_bh(&ptype_lock); -+} -+/** -+ * dev_remove_pack - remove packet handler -+ * @pt: packet type declaration -+ * -+ * Remove a protocol handler that was previously added to the kernel -+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed -+ * from the kernel lists and can be freed or reused once this function -+ * returns. -+ * -+ * This call sleeps to guarantee that no CPU is looking at the packet -+ * type after return. -+ */ -+void dev_remove_pack(struct packet_type *pt) -+{ -+ __dev_remove_pack(pt); -+ -+ synchronize_net(); -+} -+ -+/****************************************************************************** -+ -+ Device Boot-time Settings Routines -+ -+*******************************************************************************/ -+ -+/* Boot time configuration table */ -+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; -+ -+/** -+ * netdev_boot_setup_add - add new setup entry -+ * @name: name of the device -+ * @map: configured settings for the device -+ * -+ * Adds new setup entry to the dev_boot_setup list. The function -+ * returns 0 on error and 1 on success. This is a generic routine to -+ * all netdevices. -+ */ -+static int netdev_boot_setup_add(char *name, struct ifmap *map) -+{ -+ struct netdev_boot_setup *s; -+ int i; -+ -+ s = dev_boot_setup; -+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { -+ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { -+ memset(s[i].name, 0, sizeof(s[i].name)); -+ strcpy(s[i].name, name); -+ memcpy(&s[i].map, map, sizeof(s[i].map)); -+ break; -+ } -+ } -+ -+ return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -+} -+ -+/** -+ * netdev_boot_setup_check - check boot time settings -+ * @dev: the netdevice -+ * -+ * Check boot time settings for the device. -+ * The found settings are set for the device to be used -+ * later in the device probing. -+ * Returns 0 if no settings found, 1 if they are. -+ */ -+int netdev_boot_setup_check(struct net_device *dev) -+{ -+ struct netdev_boot_setup *s = dev_boot_setup; -+ int i; -+ -+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { -+ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && -+ !strncmp(dev->name, s[i].name, strlen(s[i].name))) { -+ dev->irq = s[i].map.irq; -+ dev->base_addr = s[i].map.base_addr; -+ dev->mem_start = s[i].map.mem_start; -+ dev->mem_end = s[i].map.mem_end; -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+ -+/** -+ * netdev_boot_base - get address from boot time settings -+ * @prefix: prefix for network device -+ * @unit: id for network device -+ * -+ * Check boot time settings for the base address of device. -+ * The found settings are set for the device to be used -+ * later in the device probing. -+ * Returns 0 if no settings found. -+ */ -+unsigned long netdev_boot_base(const char *prefix, int unit) -+{ -+ const struct netdev_boot_setup *s = dev_boot_setup; -+ char name[IFNAMSIZ]; -+ int i; -+ -+ sprintf(name, "%s%d", prefix, unit); -+ -+ /* -+ * If device already registered then return base of 1 -+ * to indicate not to probe for this interface -+ */ -+ if (__dev_get_by_name(name)) -+ return 1; -+ -+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) -+ if (!strcmp(name, s[i].name)) -+ return s[i].map.base_addr; -+ return 0; -+} -+ -+/* -+ * Saves at boot time configured settings for any netdevice. -+ */ -+int __init netdev_boot_setup(char *str) -+{ -+ int ints[5]; -+ struct ifmap map; -+ -+ str = get_options(str, ARRAY_SIZE(ints), ints); -+ if (!str || !*str) -+ return 0; -+ -+ /* Save settings */ -+ memset(&map, 0, sizeof(map)); -+ if (ints[0] > 0) -+ map.irq = ints[1]; -+ if (ints[0] > 1) -+ map.base_addr = ints[2]; -+ if (ints[0] > 2) -+ map.mem_start = ints[3]; -+ if (ints[0] > 3) -+ map.mem_end = ints[4]; -+ -+ /* Add new entry to the list */ -+ return netdev_boot_setup_add(str, &map); -+} -+ -+__setup("netdev=", netdev_boot_setup); -+ -+/******************************************************************************* -+ -+ Device Interface Subroutines -+ -+*******************************************************************************/ -+ -+/** -+ * __dev_get_by_name - find a device by its name -+ * @name: name to find -+ * -+ * Find an interface by name. Must be called under RTNL semaphore -+ * or @dev_base_lock. If the name is found a pointer to the device -+ * is returned. If the name is not found then %NULL is returned. The -+ * reference counters are not incremented so the caller must be -+ * careful with locks. -+ */ -+ -+struct net_device *__dev_get_by_name(const char *name) -+{ -+ struct hlist_node *p; -+ -+ hlist_for_each(p, dev_name_hash(name)) { -+ struct net_device *dev -+ = hlist_entry(p, struct net_device, name_hlist); -+ if (!strncmp(dev->name, name, IFNAMSIZ)) -+ return dev; -+ } -+ return NULL; -+} -+ -+/** -+ * dev_get_by_name - find a device by its name -+ * @name: name to find -+ * -+ * Find an interface by name. This can be called from any -+ * context and does its own locking. The returned handle has -+ * the usage count incremented and the caller must use dev_put() to -+ * release it when it is no longer needed. %NULL is returned if no -+ * matching device is found. -+ */ -+ -+struct net_device *dev_get_by_name(const char *name) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_name(name); -+ if (dev) -+ dev_hold(dev); -+ read_unlock(&dev_base_lock); -+ return dev; -+} -+ -+/** -+ * __dev_get_by_index - find a device by its ifindex -+ * @ifindex: index of device -+ * -+ * Search for an interface by index. Returns %NULL if the device -+ * is not found or a pointer to the device. The device has not -+ * had its reference counter increased so the caller must be careful -+ * about locking. The caller must hold either the RTNL semaphore -+ * or @dev_base_lock. -+ */ -+ -+struct net_device *__dev_get_by_index(int ifindex) -+{ -+ struct hlist_node *p; -+ -+ hlist_for_each(p, dev_index_hash(ifindex)) { -+ struct net_device *dev -+ = hlist_entry(p, struct net_device, index_hlist); -+ if (dev->ifindex == ifindex) -+ return dev; -+ } -+ return NULL; -+} -+ -+ -+/** -+ * dev_get_by_index - find a device by its ifindex -+ * @ifindex: index of device -+ * -+ * Search for an interface by index. Returns NULL if the device -+ * is not found or a pointer to the device. The device returned has -+ * had a reference added and the pointer is safe until the user calls -+ * dev_put to indicate they have finished with it. -+ */ -+ -+struct net_device *dev_get_by_index(int ifindex) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_index(ifindex); -+ if (dev) -+ dev_hold(dev); -+ read_unlock(&dev_base_lock); -+ return dev; -+} -+ -+/** -+ * dev_getbyhwaddr - find a device by its hardware address -+ * @type: media type of device -+ * @ha: hardware address -+ * -+ * Search for an interface by MAC address. Returns NULL if the device -+ * is not found or a pointer to the device. The caller must hold the -+ * rtnl semaphore. The returned device has not had its ref count increased -+ * and the caller must therefore be careful about locking -+ * -+ * BUGS: -+ * If the API was consistent this would be __dev_get_by_hwaddr -+ */ -+ -+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) -+{ -+ struct net_device *dev; -+ -+ ASSERT_RTNL(); -+ -+ for (dev = dev_base; dev; dev = dev->next) -+ if (dev->type == type && -+ !memcmp(dev->dev_addr, ha, dev->addr_len)) -+ break; -+ return dev; -+} -+ -+EXPORT_SYMBOL(dev_getbyhwaddr); -+ -+struct net_device *dev_getfirstbyhwtype(unsigned short type) -+{ -+ struct net_device *dev; -+ -+ rtnl_lock(); -+ for (dev = dev_base; dev; dev = dev->next) { -+ if (dev->type == type) { -+ dev_hold(dev); -+ break; -+ } -+ } -+ rtnl_unlock(); -+ return dev; -+} -+ -+EXPORT_SYMBOL(dev_getfirstbyhwtype); -+ -+/** -+ * dev_get_by_flags - find any device with given flags -+ * @if_flags: IFF_* values -+ * @mask: bitmask of bits in if_flags to check -+ * -+ * Search for any interface with the given flags. Returns NULL if a device -+ * is not found or a pointer to the device. The device returned has -+ * had a reference added and the pointer is safe until the user calls -+ * dev_put to indicate they have finished with it. -+ */ -+ -+struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ for (dev = dev_base; dev != NULL; dev = dev->next) { -+ if (((dev->flags ^ if_flags) & mask) == 0) { -+ dev_hold(dev); -+ break; -+ } -+ } -+ read_unlock(&dev_base_lock); -+ return dev; -+} -+ -+/** -+ * dev_valid_name - check if name is okay for network device -+ * @name: name string -+ * -+ * Network device names need to be valid file names to -+ * to allow sysfs to work. We also disallow any kind of -+ * whitespace. -+ */ -+int dev_valid_name(const char *name) -+{ -+ if (*name == '\0') -+ return 0; -+ if (strlen(name) >= IFNAMSIZ) -+ return 0; -+ if (!strcmp(name, ".") || !strcmp(name, "..")) -+ return 0; -+ -+ while (*name) { -+ if (*name == '/' || isspace(*name)) -+ return 0; -+ name++; -+ } -+ return 1; -+} -+ -+/** -+ * dev_alloc_name - allocate a name for a device -+ * @dev: device -+ * @name: name format string -+ * -+ * Passed a format string - eg "lt%d" it will try and find a suitable -+ * id. It scans list of devices to build up a free map, then chooses -+ * the first empty slot. The caller must hold the dev_base or rtnl lock -+ * while allocating the name and adding the device in order to avoid -+ * duplicates. -+ * Limited to bits_per_byte * page size devices (ie 32K on most platforms). -+ * Returns the number of the unit assigned or a negative errno code. -+ */ -+ -+int dev_alloc_name(struct net_device *dev, const char *name) -+{ -+ int i = 0; -+ char buf[IFNAMSIZ]; -+ const char *p; -+ const int max_netdevices = 8*PAGE_SIZE; -+ long *inuse; -+ struct net_device *d; -+ -+ p = strnchr(name, IFNAMSIZ-1, '%'); -+ if (p) { -+ /* -+ * Verify the string as this thing may have come from -+ * the user. There must be either one "%d" and no other "%" -+ * characters. -+ */ -+ if (p[1] != 'd' || strchr(p + 2, '%')) -+ return -EINVAL; -+ -+ /* Use one page as a bit array of possible slots */ -+ inuse = (long *) get_zeroed_page(GFP_ATOMIC); -+ if (!inuse) -+ return -ENOMEM; -+ -+ for (d = dev_base; d; d = d->next) { -+ if (!sscanf(d->name, name, &i)) -+ continue; -+ if (i < 0 || i >= max_netdevices) -+ continue; -+ -+ /* avoid cases where sscanf is not exact inverse of printf */ -+ snprintf(buf, sizeof(buf), name, i); -+ if (!strncmp(buf, d->name, IFNAMSIZ)) -+ set_bit(i, inuse); -+ } -+ -+ i = find_first_zero_bit(inuse, max_netdevices); -+ free_page((unsigned long) inuse); -+ } -+ -+ snprintf(buf, sizeof(buf), name, i); -+ if (!__dev_get_by_name(buf)) { -+ strlcpy(dev->name, buf, IFNAMSIZ); -+ return i; -+ } -+ -+ /* It is possible to run out of possible slots -+ * when the name is long and there isn't enough space left -+ * for the digits, or if all bits are used. -+ */ -+ return -ENFILE; -+} -+ -+ -+/** -+ * dev_change_name - change name of a device -+ * @dev: device -+ * @newname: name (or format string) must be at least IFNAMSIZ -+ * -+ * Change name of a device, can pass format strings "eth%d". -+ * for wildcarding. -+ */ -+int dev_change_name(struct net_device *dev, char *newname) -+{ -+ int err = 0; -+ -+ ASSERT_RTNL(); -+ -+ if (dev->flags & IFF_UP) -+ return -EBUSY; -+ -+ if (!dev_valid_name(newname)) -+ return -EINVAL; -+ -+ if (strchr(newname, '%')) { -+ err = dev_alloc_name(dev, newname); -+ if (err < 0) -+ return err; -+ strcpy(newname, dev->name); -+ } -+ else if (__dev_get_by_name(newname)) -+ return -EEXIST; -+ else -+ strlcpy(dev->name, newname, IFNAMSIZ); -+ -+ device_rename(&dev->dev, dev->name); -+ hlist_del(&dev->name_hlist); -+ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); -+ raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); -+ -+ return err; -+} -+ -+/** -+ * netdev_features_change - device changes features -+ * @dev: device to cause notification -+ * -+ * Called to indicate a device has changed features. -+ */ -+void netdev_features_change(struct net_device *dev) -+{ -+ raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); -+} -+EXPORT_SYMBOL(netdev_features_change); -+ -+/** -+ * netdev_state_change - device changes state -+ * @dev: device to cause notification -+ * -+ * Called to indicate a device has changed state. This function calls -+ * the notifier chains for netdev_chain and sends a NEWLINK message -+ * to the routing socket. -+ */ -+void netdev_state_change(struct net_device *dev) -+{ -+ if (dev->flags & IFF_UP) { -+ raw_notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGE, dev); -+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0); -+ } -+} -+ -+/** -+ * dev_load - load a network module -+ * @name: name of interface -+ * -+ * If a network interface is not present and the process has suitable -+ * privileges this function loads the module. If module loading is not -+ * available in this kernel then it becomes a nop. -+ */ -+ -+void dev_load(const char *name) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_name(name); -+ read_unlock(&dev_base_lock); -+ -+ if (!dev && capable(CAP_SYS_MODULE)) -+ request_module("%s", name); -+} -+ -+static int default_rebuild_header(struct sk_buff *skb) -+{ -+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", -+ skb->dev ? skb->dev->name : "NULL!!!"); -+ kfree_skb(skb); -+ return 1; -+} -+ -+ -+/** -+ * dev_open - prepare an interface for use. -+ * @dev: device to open -+ * -+ * Takes a device from down to up state. The device's private open -+ * function is invoked and then the multicast lists are loaded. Finally -+ * the device is moved into the up state and a %NETDEV_UP message is -+ * sent to the netdev notifier chain. -+ * -+ * Calling this function on an active interface is a nop. On a failure -+ * a negative errno code is returned. -+ */ -+int dev_open(struct net_device *dev) -+{ -+ int ret = 0; -+ -+ /* -+ * Is it already up? -+ */ -+ -+ if (dev->flags & IFF_UP) -+ return 0; -+ -+ /* -+ * Is it even present? -+ */ -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ -+ /* -+ * Call device private open method -+ */ -+ set_bit(__LINK_STATE_START, &dev->state); -+ if (dev->open) { -+ ret = dev->open(dev); -+ if (ret) -+ clear_bit(__LINK_STATE_START, &dev->state); -+ } -+ -+ /* -+ * If it went open OK then: -+ */ -+ -+ if (!ret) { -+ /* -+ * Set the flags. -+ */ -+ dev->flags |= IFF_UP; -+ -+ /* -+ * Initialize multicasting status -+ */ -+ dev_mc_upload(dev); -+ -+ /* -+ * Wakeup transmit queue engine -+ */ -+ dev_activate(dev); -+ -+ /* -+ * ... and announce new interface. -+ */ -+ raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); -+ } -+ return ret; -+} -+ -+/** -+ * dev_close - shutdown an interface. -+ * @dev: device to shutdown -+ * -+ * This function moves an active device into down state. A -+ * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device -+ * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier -+ * chain. -+ */ -+int dev_close(struct net_device *dev) -+{ -+ if (!(dev->flags & IFF_UP)) -+ return 0; -+ -+ /* -+ * Tell people we are going down, so that they can -+ * prepare to death, when device is still operating. -+ */ -+ raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); -+ -+ dev_deactivate(dev); -+ -+ clear_bit(__LINK_STATE_START, &dev->state); -+ -+ /* Synchronize to scheduled poll. We cannot touch poll list, -+ * it can be even on different cpu. So just clear netif_running(), -+ * and wait when poll really will happen. Actually, the best place -+ * for this is inside dev->stop() after device stopped its irq -+ * engine, but this requires more changes in devices. */ -+ -+ smp_mb__after_clear_bit(); /* Commit netif_running(). */ -+ while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { -+ /* No hurry. */ -+ msleep(1); -+ } -+ -+ /* -+ * Call the device specific close. This cannot fail. -+ * Only if device is UP -+ * -+ * We allow it to be called even after a DETACH hot-plug -+ * event. -+ */ -+ if (dev->stop) -+ dev->stop(dev); -+ -+ /* -+ * Device is now down. -+ */ -+ -+ dev->flags &= ~IFF_UP; -+ -+ /* -+ * Tell people we are down -+ */ -+ raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); -+ -+ return 0; -+} -+ -+ -+/* -+ * Device change register/unregister. These are not inline or static -+ * as we export them to the world. -+ */ -+ -+/** -+ * register_netdevice_notifier - register a network notifier block -+ * @nb: notifier -+ * -+ * Register a notifier to be called when network device events occur. -+ * The notifier passed is linked into the kernel structures and must -+ * not be reused until it has been unregistered. A negative errno code -+ * is returned on a failure. -+ * -+ * When registered all registration and up events are replayed -+ * to the new notifier to allow device to have a race free -+ * view of the network device list. -+ */ -+ -+int register_netdevice_notifier(struct notifier_block *nb) -+{ -+ struct net_device *dev; -+ int err; -+ -+ rtnl_lock(); -+ err = raw_notifier_chain_register(&netdev_chain, nb); -+ if (!err) { -+ for (dev = dev_base; dev; dev = dev->next) { -+ nb->notifier_call(nb, NETDEV_REGISTER, dev); -+ -+ if (dev->flags & IFF_UP) -+ nb->notifier_call(nb, NETDEV_UP, dev); -+ } -+ } -+ rtnl_unlock(); -+ return err; -+} -+ -+/** -+ * unregister_netdevice_notifier - unregister a network notifier block -+ * @nb: notifier -+ * -+ * Unregister a notifier previously registered by -+ * register_netdevice_notifier(). The notifier is unlinked into the -+ * kernel structures and may then be reused. A negative errno code -+ * is returned on a failure. -+ */ -+ -+int unregister_netdevice_notifier(struct notifier_block *nb) -+{ -+ int err; -+ -+ rtnl_lock(); -+ err = raw_notifier_chain_unregister(&netdev_chain, nb); -+ rtnl_unlock(); -+ return err; -+} -+ -+/** -+ * call_netdevice_notifiers - call all network notifier blocks -+ * @val: value passed unmodified to notifier function -+ * @v: pointer passed unmodified to notifier function -+ * -+ * Call all network notifier blocks. Parameters and return value -+ * are as for raw_notifier_call_chain(). -+ */ -+ -+int call_netdevice_notifiers(unsigned long val, void *v) -+{ -+ return raw_notifier_call_chain(&netdev_chain, val, v); -+} -+ -+/* When > 0 there are consumers of rx skb time stamps */ -+static atomic_t netstamp_needed = ATOMIC_INIT(0); -+ -+void net_enable_timestamp(void) -+{ -+ atomic_inc(&netstamp_needed); -+} -+ -+void net_disable_timestamp(void) -+{ -+ atomic_dec(&netstamp_needed); -+} -+ -+void __net_timestamp(struct sk_buff *skb) -+{ -+ struct timeval tv; -+ -+ do_gettimeofday(&tv); -+ skb_set_timestamp(skb, &tv); -+} -+EXPORT_SYMBOL(__net_timestamp); -+ -+static inline void net_timestamp(struct sk_buff *skb) -+{ -+ if (atomic_read(&netstamp_needed)) -+ __net_timestamp(skb); -+ else { -+ skb->tstamp.off_sec = 0; -+ skb->tstamp.off_usec = 0; -+ } -+} -+ -+/* -+ * Support routine. Sends outgoing frames to any network -+ * taps currently in use. -+ */ -+ -+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct packet_type *ptype; -+ -+ net_timestamp(skb); -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(ptype, &ptype_all, list) { -+ /* Never send packets back to the socket -+ * they originated from - MvS (miquels@drinkel.ow.org) -+ */ -+ if ((ptype->dev == dev || !ptype->dev) && -+ (ptype->af_packet_priv == NULL || -+ (struct sock *)ptype->af_packet_priv != skb->sk)) { -+ struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); -+ if (!skb2) -+ break; -+ -+ /* skb->nh should be correctly -+ set by sender, so that the second statement is -+ just protection against buggy protocols. -+ */ -+ skb2->mac.raw = skb2->data; -+ -+ if (skb2->nh.raw < skb2->data || -+ skb2->nh.raw > skb2->tail) { -+ if (net_ratelimit()) -+ printk(KERN_CRIT "protocol %04x is " -+ "buggy, dev %s\n", -+ skb2->protocol, dev->name); -+ skb2->nh.raw = skb2->data; -+ } -+ -+ skb2->h.raw = skb2->nh.raw; -+ skb2->pkt_type = PACKET_OUTGOING; -+ ptype->func(skb2, skb->dev, ptype, skb->dev); -+ } -+ } -+ rcu_read_unlock(); -+} -+ -+ -+void __netif_schedule(struct net_device *dev) -+{ -+ if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { -+ unsigned long flags; -+ struct softnet_data *sd; -+ -+ local_irq_save(flags); -+ sd = &__get_cpu_var(softnet_data); -+ dev->next_sched = sd->output_queue; -+ sd->output_queue = dev; -+ raise_softirq_irqoff(NET_TX_SOFTIRQ); -+ local_irq_restore(flags); -+ } -+} -+EXPORT_SYMBOL(__netif_schedule); -+ -+void __netif_rx_schedule(struct net_device *dev) -+{ -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ dev_hold(dev); -+ list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); -+ if (dev->quota < 0) -+ dev->quota += dev->weight; -+ else -+ dev->quota = dev->weight; -+ __raise_softirq_irqoff(NET_RX_SOFTIRQ); -+ local_irq_restore(flags); -+} -+EXPORT_SYMBOL(__netif_rx_schedule); -+ -+void dev_kfree_skb_any(struct sk_buff *skb) -+{ -+ if (in_irq() || irqs_disabled()) -+ dev_kfree_skb_irq(skb); -+ else -+ dev_kfree_skb(skb); -+} -+EXPORT_SYMBOL(dev_kfree_skb_any); -+ -+ -+/* Hot-plugging. */ -+void netif_device_detach(struct net_device *dev) -+{ -+ if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && -+ netif_running(dev)) { -+ netif_stop_queue(dev); -+ } -+} -+EXPORT_SYMBOL(netif_device_detach); -+ -+void netif_device_attach(struct net_device *dev) -+{ -+ if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && -+ netif_running(dev)) { -+ netif_wake_queue(dev); -+ __netdev_watchdog_up(dev); -+ } -+} -+EXPORT_SYMBOL(netif_device_attach); -+ -+ -+/* -+ * Invalidate hardware checksum when packet is to be mangled, and -+ * complete checksum manually on outgoing path. -+ */ -+int skb_checksum_help(struct sk_buff *skb) -+{ -+ __wsum csum; -+ int ret = 0, offset = skb->h.raw - skb->data; -+ -+ if (skb->ip_summed == CHECKSUM_COMPLETE) -+ goto out_set_summed; -+ -+ if (unlikely(skb_shinfo(skb)->gso_size)) { -+ /* Let GSO fix up the checksum. */ -+ goto out_set_summed; -+ } -+ -+ if (skb_cloned(skb)) { -+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); -+ if (ret) -+ goto out; -+ } -+ -+ BUG_ON(offset > (int)skb->len); -+ csum = skb_checksum(skb, offset, skb->len-offset, 0); -+ -+ offset = skb->tail - skb->h.raw; -+ BUG_ON(offset <= 0); -+ BUG_ON(skb->csum_offset + 2 > offset); -+ -+ *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); -+ -+out_set_summed: -+ skb->ip_summed = CHECKSUM_NONE; -+out: -+ return ret; -+} -+ -+/** -+ * skb_gso_segment - Perform segmentation on skb. -+ * @skb: buffer to segment -+ * @features: features for the output path (see dev->features) -+ * -+ * This function segments the given skb and returns a list of segments. -+ * -+ * It may return NULL if the skb requires no segmentation. This is -+ * only possible when GSO is used for verifying header integrity. -+ */ -+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) -+{ -+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); -+ struct packet_type *ptype; -+ __be16 type = skb->protocol; -+ int err; -+ -+ BUG_ON(skb_shinfo(skb)->frag_list); -+ -+ skb->mac.raw = skb->data; -+ skb->mac_len = skb->nh.raw - skb->data; -+ __skb_pull(skb, skb->mac_len); -+ -+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { -+ if (skb_header_cloned(skb) && -+ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) -+ return ERR_PTR(err); -+ } -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { -+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) { -+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { -+ err = ptype->gso_send_check(skb); -+ segs = ERR_PTR(err); -+ if (err || skb_gso_ok(skb, features)) -+ break; -+ __skb_push(skb, skb->data - skb->nh.raw); -+ } -+ segs = ptype->gso_segment(skb, features); -+ break; -+ } -+ } -+ rcu_read_unlock(); -+ -+ __skb_push(skb, skb->data - skb->mac.raw); -+ -+ return segs; -+} -+ -+EXPORT_SYMBOL(skb_gso_segment); -+ -+/* Take action when hardware reception checksum errors are detected. */ -+#ifdef CONFIG_BUG -+void netdev_rx_csum_fault(struct net_device *dev) -+{ -+ if (net_ratelimit()) { -+ printk(KERN_ERR "%s: hw csum failure.\n", -+ dev ? dev->name : ""); -+ dump_stack(); -+ } -+} -+EXPORT_SYMBOL(netdev_rx_csum_fault); -+#endif -+ -+/* Actually, we should eliminate this check as soon as we know, that: -+ * 1. IOMMU is present and allows to map all the memory. -+ * 2. No high memory really exists on this machine. -+ */ -+ -+static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) -+{ -+#ifdef CONFIG_HIGHMEM -+ int i; -+ -+ if (dev->features & NETIF_F_HIGHDMA) -+ return 0; -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -+ if (PageHighMem(skb_shinfo(skb)->frags[i].page)) -+ return 1; -+ -+#endif -+ return 0; -+} -+ -+struct dev_gso_cb { -+ void (*destructor)(struct sk_buff *skb); -+}; -+ -+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) -+ -+static void dev_gso_skb_destructor(struct sk_buff *skb) -+{ -+ struct dev_gso_cb *cb; -+ -+ do { -+ struct sk_buff *nskb = skb->next; -+ -+ skb->next = nskb->next; -+ nskb->next = NULL; -+ kfree_skb(nskb); -+ } while (skb->next); -+ -+ cb = DEV_GSO_CB(skb); -+ if (cb->destructor) -+ cb->destructor(skb); -+} -+ -+/** -+ * dev_gso_segment - Perform emulated hardware segmentation on skb. -+ * @skb: buffer to segment -+ * -+ * This function segments the given skb and stores the list of segments -+ * in skb->next. -+ */ -+static int dev_gso_segment(struct sk_buff *skb) -+{ -+ struct net_device *dev = skb->dev; -+ struct sk_buff *segs; -+ int features = dev->features & ~(illegal_highdma(dev, skb) ? -+ NETIF_F_SG : 0); -+ -+ segs = skb_gso_segment(skb, features); -+ -+ /* Verifying header integrity only. */ -+ if (!segs) -+ return 0; -+ -+ if (unlikely(IS_ERR(segs))) -+ return PTR_ERR(segs); -+ -+ skb->next = segs; -+ DEV_GSO_CB(skb)->destructor = skb->destructor; -+ skb->destructor = dev_gso_skb_destructor; -+ -+ return 0; -+} -+ -+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ if (likely(!skb->next)) { -+ if (netdev_nit) -+ dev_queue_xmit_nit(skb, dev); -+ -+ if (netif_needs_gso(dev, skb)) { -+ if (unlikely(dev_gso_segment(skb))) -+ goto out_kfree_skb; -+ if (skb->next) -+ goto gso; -+ } -+ -+ return dev->hard_start_xmit(skb, dev); -+ } -+ -+gso: -+ do { -+ struct sk_buff *nskb = skb->next; -+ int rc; -+ -+ skb->next = nskb->next; -+ nskb->next = NULL; -+ rc = dev->hard_start_xmit(nskb, dev); -+ if (unlikely(rc)) { -+ nskb->next = skb->next; -+ skb->next = nskb; -+ return rc; -+ } -+ if (unlikely(netif_queue_stopped(dev) && skb->next)) -+ return NETDEV_TX_BUSY; -+ } while (skb->next); -+ -+ skb->destructor = DEV_GSO_CB(skb)->destructor; -+ -+out_kfree_skb: -+ kfree_skb(skb); -+ return 0; -+} -+ -+#define HARD_TX_LOCK(dev, cpu) { \ -+ if ((dev->features & NETIF_F_LLTX) == 0) { \ -+ netif_tx_lock(dev); \ -+ } \ -+} -+ -+#define HARD_TX_UNLOCK(dev) { \ -+ if ((dev->features & NETIF_F_LLTX) == 0) { \ -+ netif_tx_unlock(dev); \ -+ } \ -+} -+ -+/** -+ * dev_queue_xmit - transmit a buffer -+ * @skb: buffer to transmit -+ * -+ * Queue a buffer for transmission to a network device. The caller must -+ * have set the device and priority and built the buffer before calling -+ * this function. The function can be called from an interrupt. -+ * -+ * A negative errno code is returned on a failure. A success does not -+ * guarantee the frame will be transmitted as it may be dropped due -+ * to congestion or traffic shaping. -+ * -+ * ----------------------------------------------------------------------------------- -+ * I notice this method can also return errors from the queue disciplines, -+ * including NET_XMIT_DROP, which is a positive value. So, errors can also -+ * be positive. -+ * -+ * Regardless of the return value, the skb is consumed, so it is currently -+ * difficult to retry a send to this method. (You can bump the ref count -+ * before sending to hold a reference for retry if you are careful.) -+ * -+ * When calling this method, interrupts MUST be enabled. This is because -+ * the BH enable code must have IRQs enabled so that it will not deadlock. -+ * --BLG -+ */ -+ -+int dev_queue_xmit(struct sk_buff *skb) -+{ -+ struct net_device *dev = skb->dev; -+ struct Qdisc *q; -+ int rc = -ENOMEM; -+ -+ /* GSO will handle the following emulations directly. */ -+ if (netif_needs_gso(dev, skb)) -+ goto gso; -+ -+ if (skb_shinfo(skb)->frag_list && -+ !(dev->features & NETIF_F_FRAGLIST) && -+ __skb_linearize(skb)) -+ goto out_kfree_skb; -+ -+ /* Fragmented skb is linearized if device does not support SG, -+ * or if at least one of fragments is in highmem and device -+ * does not support DMA from it. -+ */ -+ if (skb_shinfo(skb)->nr_frags && -+ (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && -+ __skb_linearize(skb)) -+ goto out_kfree_skb; -+ -+ /* If packet is not checksummed and device does not support -+ * checksumming for this protocol, complete checksumming here. -+ */ -+ if (skb->ip_summed == CHECKSUM_PARTIAL && -+ (!(dev->features & NETIF_F_GEN_CSUM) && -+ (!(dev->features & NETIF_F_IP_CSUM) || -+ skb->protocol != htons(ETH_P_IP)))) -+ if (skb_checksum_help(skb)) -+ goto out_kfree_skb; -+ -+gso: -+ spin_lock_prefetch(&dev->queue_lock); -+ -+ /* Disable soft irqs for various locks below. Also -+ * stops preemption for RCU. -+ */ -+ rcu_read_lock_bh(); -+ -+ /* Updates of qdisc are serialized by queue_lock. -+ * The struct Qdisc which is pointed to by qdisc is now a -+ * rcu structure - it may be accessed without acquiring -+ * a lock (but the structure may be stale.) The freeing of the -+ * qdisc will be deferred until it's known that there are no -+ * more references to it. -+ * -+ * If the qdisc has an enqueue function, we still need to -+ * hold the queue_lock before calling it, since queue_lock -+ * also serializes access to the device queue. -+ */ -+ -+ q = rcu_dereference(dev->qdisc); -+#ifdef CONFIG_NET_CLS_ACT -+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); -+#endif -+ if (q->enqueue) { -+ /* Grab device queue */ -+ spin_lock(&dev->queue_lock); -+ q = dev->qdisc; -+ if (q->enqueue) { -+ rc = q->enqueue(skb, q); -+ qdisc_run(dev); -+ spin_unlock(&dev->queue_lock); -+ -+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; -+ goto out; -+ } -+ spin_unlock(&dev->queue_lock); -+ } -+ -+ /* The device has no queue. Common case for software devices: -+ loopback, all the sorts of tunnels... -+ -+ Really, it is unlikely that netif_tx_lock protection is necessary -+ here. (f.e. loopback and IP tunnels are clean ignoring statistics -+ counters.) -+ However, it is possible, that they rely on protection -+ made by us here. -+ -+ Check this and shot the lock. It is not prone from deadlocks. -+ Either shot noqueue qdisc, it is even simpler 8) -+ */ -+ if (dev->flags & IFF_UP) { -+ int cpu = smp_processor_id(); /* ok because BHs are off */ -+ -+ if (dev->xmit_lock_owner != cpu) { -+ -+ HARD_TX_LOCK(dev, cpu); -+ -+ if (!netif_queue_stopped(dev)) { -+ rc = 0; -+ if (!dev_hard_start_xmit(skb, dev)) { -+ HARD_TX_UNLOCK(dev); -+ goto out; -+ } -+ } -+ HARD_TX_UNLOCK(dev); -+ if (net_ratelimit()) -+ printk(KERN_CRIT "Virtual device %s asks to " -+ "queue packet!\n", dev->name); -+ } else { -+ /* Recursion is detected! It is possible, -+ * unfortunately */ -+ if (net_ratelimit()) -+ printk(KERN_CRIT "Dead loop on virtual device " -+ "%s, fix it urgently!\n", dev->name); -+ } -+ } -+ -+ rc = -ENETDOWN; -+ rcu_read_unlock_bh(); -+ -+out_kfree_skb: -+ kfree_skb(skb); -+ return rc; -+out: -+ rcu_read_unlock_bh(); -+ return rc; -+} -+ -+ -+/*======================================================================= -+ Receiver routines -+ =======================================================================*/ -+ -+int netdev_max_backlog = 1000; -+int netdev_budget = 300; -+int weight_p = 64; /* old backlog weight */ -+ -+DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; -+ -+ -+/** -+ * netif_rx - post buffer to the network code -+ * @skb: buffer to post -+ * -+ * This function receives a packet from a device driver and queues it for -+ * the upper (protocol) levels to process. It always succeeds. The buffer -+ * may be dropped during processing for congestion control or by the -+ * protocol layers. -+ * -+ * return values: -+ * NET_RX_SUCCESS (no congestion) -+ * NET_RX_CN_LOW (low congestion) -+ * NET_RX_CN_MOD (moderate congestion) -+ * NET_RX_CN_HIGH (high congestion) -+ * NET_RX_DROP (packet was dropped) -+ * -+ */ -+ -+int netif_rx(struct sk_buff *skb) -+{ -+ struct softnet_data *queue; -+ unsigned long flags; -+ -+ /* if netpoll wants it, pretend we never saw it */ -+ if (netpoll_rx(skb)) -+ return NET_RX_DROP; -+ -+ if (!skb->tstamp.off_sec) -+ net_timestamp(skb); -+ -+ /* -+ * The code is rearranged so that the path is the most -+ * short when CPU is congested, but is still operating. -+ */ -+ local_irq_save(flags); -+ queue = &__get_cpu_var(softnet_data); -+ -+ __get_cpu_var(netdev_rx_stat).total++; -+ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { -+ if (queue->input_pkt_queue.qlen) { -+enqueue: -+ dev_hold(skb->dev); -+ __skb_queue_tail(&queue->input_pkt_queue, skb); -+ local_irq_restore(flags); -+ return NET_RX_SUCCESS; -+ } -+ -+ netif_rx_schedule(&queue->backlog_dev); -+ goto enqueue; -+ } -+ -+ __get_cpu_var(netdev_rx_stat).dropped++; -+ local_irq_restore(flags); -+ -+ kfree_skb(skb); -+ return NET_RX_DROP; -+} -+ -+int netif_rx_ni(struct sk_buff *skb) -+{ -+ int err; -+ -+ preempt_disable(); -+ err = netif_rx(skb); -+ if (local_softirq_pending()) -+ do_softirq(); -+ preempt_enable(); -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(netif_rx_ni); -+ -+static inline struct net_device *skb_bond(struct sk_buff *skb) -+{ -+ struct net_device *dev = skb->dev; -+ -+ if (dev->master) { -+ if (skb_bond_should_drop(skb)) { -+ kfree_skb(skb); -+ return NULL; -+ } -+ skb->dev = dev->master; -+ } -+ -+ return dev; -+} -+ -+static void net_tx_action(struct softirq_action *h) -+{ -+ struct softnet_data *sd = &__get_cpu_var(softnet_data); -+ -+ if (sd->completion_queue) { -+ struct sk_buff *clist; -+ -+ local_irq_disable(); -+ clist = sd->completion_queue; -+ sd->completion_queue = NULL; -+ local_irq_enable(); -+ -+ while (clist) { -+ struct sk_buff *skb = clist; -+ clist = clist->next; -+ -+ BUG_TRAP(!atomic_read(&skb->users)); -+ __kfree_skb(skb); -+ } -+ } -+ -+ if (sd->output_queue) { -+ struct net_device *head; -+ -+ local_irq_disable(); -+ head = sd->output_queue; -+ sd->output_queue = NULL; -+ local_irq_enable(); -+ -+ while (head) { -+ struct net_device *dev = head; -+ head = head->next_sched; -+ -+ smp_mb__before_clear_bit(); -+ clear_bit(__LINK_STATE_SCHED, &dev->state); -+ -+ if (spin_trylock(&dev->queue_lock)) { -+ qdisc_run(dev); -+ spin_unlock(&dev->queue_lock); -+ } else { -+ netif_schedule(dev); -+ } -+ } -+ } -+} -+ -+static __inline__ int deliver_skb(struct sk_buff *skb, -+ struct packet_type *pt_prev, -+ struct net_device *orig_dev) -+{ -+ atomic_inc(&skb->users); -+ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); -+} -+ -+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); -+struct net_bridge; -+struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, -+ unsigned char *addr); -+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); -+ -+static __inline__ int handle_bridge(struct sk_buff **pskb, -+ struct packet_type **pt_prev, int *ret, -+ struct net_device *orig_dev) -+{ -+ struct net_bridge_port *port; -+ -+ if ((*pskb)->pkt_type == PACKET_LOOPBACK || -+ (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) -+ return 0; -+ -+ if (*pt_prev) { -+ *ret = deliver_skb(*pskb, *pt_prev, orig_dev); -+ *pt_prev = NULL; -+ } -+ -+ return br_handle_frame_hook(port, pskb); -+} -+#else -+#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) -+#endif -+ -+#ifdef CONFIG_NET_CLS_ACT -+/* TODO: Maybe we should just force sch_ingress to be compiled in -+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions -+ * a compare and 2 stores extra right now if we dont have it on -+ * but have CONFIG_NET_CLS_ACT -+ * NOTE: This doesnt stop any functionality; if you dont have -+ * the ingress scheduler, you just cant add policies on ingress. -+ * -+ */ -+static int ing_filter(struct sk_buff *skb) -+{ -+ struct Qdisc *q; -+ struct net_device *dev = skb->dev; -+ int result = TC_ACT_OK; -+ -+ if (dev->qdisc_ingress) { -+ __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); -+ if (MAX_RED_LOOP < ttl++) { -+ printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", -+ skb->iif, skb->dev->ifindex); -+ return TC_ACT_SHOT; -+ } -+ -+ skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); -+ -+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); -+ -+ spin_lock(&dev->queue_lock); -+ if ((q = dev->qdisc_ingress) != NULL) -+ result = q->enqueue(skb, q); -+ spin_unlock(&dev->queue_lock); -+ -+ } -+ -+ return result; -+} -+#endif -+ -+int netif_receive_skb(struct sk_buff *skb) -+{ -+ struct packet_type *ptype, *pt_prev; -+ struct net_device *orig_dev; -+ int ret = NET_RX_DROP; -+ __be16 type; -+ -+ /* if we've gotten here through NAPI, check netpoll */ -+ if (skb->dev->poll && netpoll_rx(skb)) -+ return NET_RX_DROP; -+ -+ if (!skb->tstamp.off_sec) -+ net_timestamp(skb); -+ -+ if (!skb->iif) -+ skb->iif = skb->dev->ifindex; -+ -+ orig_dev = skb_bond(skb); -+ -+ if (!orig_dev) -+ return NET_RX_DROP; -+ -+ __get_cpu_var(netdev_rx_stat).total++; -+ -+ skb->h.raw = skb->nh.raw = skb->data; -+ skb->mac_len = skb->nh.raw - skb->mac.raw; -+ -+ pt_prev = NULL; -+ -+ rcu_read_lock(); -+ -+#ifdef CONFIG_NET_CLS_ACT -+ if (skb->tc_verd & TC_NCLS) { -+ skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); -+ goto ncls; -+ } -+#endif -+ -+ list_for_each_entry_rcu(ptype, &ptype_all, list) { -+ if (!ptype->dev || ptype->dev == skb->dev) { -+ if (pt_prev) -+ ret = deliver_skb(skb, pt_prev, orig_dev); -+ pt_prev = ptype; -+ } -+ } -+ -+#ifdef CONFIG_NET_CLS_ACT -+ if (pt_prev) { -+ ret = deliver_skb(skb, pt_prev, orig_dev); -+ pt_prev = NULL; /* noone else should process this after*/ -+ } else { -+ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); -+ } -+ -+ ret = ing_filter(skb); -+ -+ if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { -+ kfree_skb(skb); -+ goto out; -+ } -+ -+ skb->tc_verd = 0; -+ncls: -+#endif -+ -+ if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) -+ goto out; -+ -+ type = skb->protocol; -+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { -+ if (ptype->type == type && -+ (!ptype->dev || ptype->dev == skb->dev)) { -+ if (pt_prev) -+ ret = deliver_skb(skb, pt_prev, orig_dev); -+ pt_prev = ptype; -+ } -+ } -+ -+ if (pt_prev) { -+ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); -+ } else { -+ kfree_skb(skb); -+ /* Jamal, now you will not able to escape explaining -+ * me how you were going to use this. :-) -+ */ -+ ret = NET_RX_DROP; -+ } -+ -+out: -+ rcu_read_unlock(); -+ return ret; -+} -+ -+static int process_backlog(struct net_device *backlog_dev, int *budget) -+{ -+ int work = 0; -+ int quota = min(backlog_dev->quota, *budget); -+ struct softnet_data *queue = &__get_cpu_var(softnet_data); -+ unsigned long start_time = jiffies; -+ -+ backlog_dev->weight = weight_p; -+ for (;;) { -+ struct sk_buff *skb; -+ struct net_device *dev; -+ -+ local_irq_disable(); -+ skb = __skb_dequeue(&queue->input_pkt_queue); -+ if (!skb) -+ goto job_done; -+ local_irq_enable(); -+ -+ dev = skb->dev; -+ -+ netif_receive_skb(skb); -+ -+ dev_put(dev); -+ -+ work++; -+ -+ if (work >= quota || jiffies - start_time > 1) -+ break; -+ -+ } -+ -+ backlog_dev->quota -= work; -+ *budget -= work; -+ return -1; -+ -+job_done: -+ backlog_dev->quota -= work; -+ *budget -= work; -+ -+ list_del(&backlog_dev->poll_list); -+ smp_mb__before_clear_bit(); -+ netif_poll_enable(backlog_dev); -+ -+ local_irq_enable(); -+ return 0; -+} -+ -+static void net_rx_action(struct softirq_action *h) -+{ -+ struct softnet_data *queue = &__get_cpu_var(softnet_data); -+ unsigned long start_time = jiffies; -+ int budget = netdev_budget; -+ void *have; -+ -+ local_irq_disable(); -+ -+ while (!list_empty(&queue->poll_list)) { -+ struct net_device *dev; -+ -+ if (budget <= 0 || jiffies - start_time > 1) -+ goto softnet_break; -+ -+ local_irq_enable(); -+ -+ dev = list_entry(queue->poll_list.next, -+ struct net_device, poll_list); -+ have = netpoll_poll_lock(dev); -+ -+ if (dev->quota <= 0 || dev->poll(dev, &budget)) { -+ netpoll_poll_unlock(have); -+ local_irq_disable(); -+ list_move_tail(&dev->poll_list, &queue->poll_list); -+ if (dev->quota < 0) -+ dev->quota += dev->weight; -+ else -+ dev->quota = dev->weight; -+ } else { -+ netpoll_poll_unlock(have); -+ dev_put(dev); -+ local_irq_disable(); -+ } -+ } -+out: -+#ifdef CONFIG_NET_DMA -+ /* -+ * There may not be any more sk_buffs coming right now, so push -+ * any pending DMA copies to hardware -+ */ -+ if (net_dma_client) { -+ struct dma_chan *chan; -+ rcu_read_lock(); -+ list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) -+ dma_async_memcpy_issue_pending(chan); -+ rcu_read_unlock(); -+ } -+#endif -+ local_irq_enable(); -+ return; -+ -+softnet_break: -+ __get_cpu_var(netdev_rx_stat).time_squeeze++; -+ __raise_softirq_irqoff(NET_RX_SOFTIRQ); -+ goto out; -+} -+ -+static gifconf_func_t * gifconf_list [NPROTO]; -+ -+/** -+ * register_gifconf - register a SIOCGIF handler -+ * @family: Address family -+ * @gifconf: Function handler -+ * -+ * Register protocol dependent address dumping routines. The handler -+ * that is passed must not be freed or reused until it has been replaced -+ * by another handler. -+ */ -+int register_gifconf(unsigned int family, gifconf_func_t * gifconf) -+{ -+ if (family >= NPROTO) -+ return -EINVAL; -+ gifconf_list[family] = gifconf; -+ return 0; -+} -+ -+ -+/* -+ * Map an interface index to its name (SIOCGIFNAME) -+ */ -+ -+/* -+ * We need this ioctl for efficient implementation of the -+ * if_indextoname() function required by the IPv6 API. Without -+ * it, we would have to search all the interfaces to find a -+ * match. --pb -+ */ -+ -+static int dev_ifname(struct ifreq __user *arg) -+{ -+ struct net_device *dev; -+ struct ifreq ifr; -+ -+ /* -+ * Fetch the caller's info block. -+ */ -+ -+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) -+ return -EFAULT; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_index(ifr.ifr_ifindex); -+ if (!dev) { -+ read_unlock(&dev_base_lock); -+ return -ENODEV; -+ } -+ -+ strcpy(ifr.ifr_name, dev->name); -+ read_unlock(&dev_base_lock); -+ -+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) -+ return -EFAULT; -+ return 0; -+} -+ -+/* -+ * Perform a SIOCGIFCONF call. This structure will change -+ * size eventually, and there is nothing I can do about it. -+ * Thus we will need a 'compatibility mode'. -+ */ -+ -+static int dev_ifconf(char __user *arg) -+{ -+ struct ifconf ifc; -+ struct net_device *dev; -+ char __user *pos; -+ int len; -+ int total; -+ int i; -+ -+ /* -+ * Fetch the caller's info block. -+ */ -+ -+ if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) -+ return -EFAULT; -+ -+ pos = ifc.ifc_buf; -+ len = ifc.ifc_len; -+ -+ /* -+ * Loop over the interfaces, and write an info block for each. -+ */ -+ -+ total = 0; -+ for (dev = dev_base; dev; dev = dev->next) { -+ for (i = 0; i < NPROTO; i++) { -+ if (gifconf_list[i]) { -+ int done; -+ if (!pos) -+ done = gifconf_list[i](dev, NULL, 0); -+ else -+ done = gifconf_list[i](dev, pos + total, -+ len - total); -+ if (done < 0) -+ return -EFAULT; -+ total += done; -+ } -+ } -+ } -+ -+ /* -+ * All done. Write the updated control block back to the caller. -+ */ -+ ifc.ifc_len = total; -+ -+ /* -+ * Both BSD and Solaris return 0 here, so we do too. -+ */ -+ return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; -+} -+ -+#ifdef CONFIG_PROC_FS -+/* -+ * This is invoked by the /proc filesystem handler to display a device -+ * in detail. -+ */ -+static __inline__ struct net_device *dev_get_idx(loff_t pos) -+{ -+ struct net_device *dev; -+ loff_t i; -+ -+ for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); -+ -+ return i == pos ? dev : NULL; -+} -+ -+void *dev_seq_start(struct seq_file *seq, loff_t *pos) -+{ -+ read_lock(&dev_base_lock); -+ return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; -+} -+ -+void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; -+} -+ -+void dev_seq_stop(struct seq_file *seq, void *v) -+{ -+ read_unlock(&dev_base_lock); -+} -+ -+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -+{ -+ if (dev->get_stats) { -+ struct net_device_stats *stats = dev->get_stats(dev); -+ -+ seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " -+ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", -+ dev->name, stats->rx_bytes, stats->rx_packets, -+ stats->rx_errors, -+ stats->rx_dropped + stats->rx_missed_errors, -+ stats->rx_fifo_errors, -+ stats->rx_length_errors + stats->rx_over_errors + -+ stats->rx_crc_errors + stats->rx_frame_errors, -+ stats->rx_compressed, stats->multicast, -+ stats->tx_bytes, stats->tx_packets, -+ stats->tx_errors, stats->tx_dropped, -+ stats->tx_fifo_errors, stats->collisions, -+ stats->tx_carrier_errors + -+ stats->tx_aborted_errors + -+ stats->tx_window_errors + -+ stats->tx_heartbeat_errors, -+ stats->tx_compressed); -+ } else -+ seq_printf(seq, "%6s: No statistics available.\n", dev->name); -+} -+ -+/* -+ * Called from the PROCfs module. This now uses the new arbitrary sized -+ * /proc/net interface to create /proc/net/dev -+ */ -+static int dev_seq_show(struct seq_file *seq, void *v) -+{ -+ if (v == SEQ_START_TOKEN) -+ seq_puts(seq, "Inter-| Receive " -+ " | Transmit\n" -+ " face |bytes packets errs drop fifo frame " -+ "compressed multicast|bytes packets errs " -+ "drop fifo colls carrier compressed\n"); -+ else -+ dev_seq_printf_stats(seq, v); -+ return 0; -+} -+ -+static struct netif_rx_stats *softnet_get_online(loff_t *pos) -+{ -+ struct netif_rx_stats *rc = NULL; -+ -+ while (*pos < NR_CPUS) -+ if (cpu_online(*pos)) { -+ rc = &per_cpu(netdev_rx_stat, *pos); -+ break; -+ } else -+ ++*pos; -+ return rc; -+} -+ -+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -+{ -+ return softnet_get_online(pos); -+} -+ -+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return softnet_get_online(pos); -+} -+ -+static void softnet_seq_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static int softnet_seq_show(struct seq_file *seq, void *v) -+{ -+ struct netif_rx_stats *s = v; -+ -+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", -+ s->total, s->dropped, s->time_squeeze, 0, -+ 0, 0, 0, 0, /* was fastroute */ -+ s->cpu_collision ); -+ return 0; -+} -+ -+static struct seq_operations dev_seq_ops = { -+ .start = dev_seq_start, -+ .next = dev_seq_next, -+ .stop = dev_seq_stop, -+ .show = dev_seq_show, -+}; -+ -+static int dev_seq_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &dev_seq_ops); -+} -+ -+static const struct file_operations dev_seq_fops = { -+ .owner = THIS_MODULE, -+ .open = dev_seq_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static struct seq_operations softnet_seq_ops = { -+ .start = softnet_seq_start, -+ .next = softnet_seq_next, -+ .stop = softnet_seq_stop, -+ .show = softnet_seq_show, -+}; -+ -+static int softnet_seq_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &softnet_seq_ops); -+} -+ -+static const struct file_operations softnet_seq_fops = { -+ .owner = THIS_MODULE, -+ .open = softnet_seq_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+#ifdef CONFIG_WIRELESS_EXT -+extern int wireless_proc_init(void); -+#else -+#define wireless_proc_init() 0 -+#endif -+ -+static int __init dev_proc_init(void) -+{ -+ int rc = -ENOMEM; -+ -+ if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) -+ goto out; -+ if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) -+ goto out_dev; -+ if (wireless_proc_init()) -+ goto out_softnet; -+ rc = 0; -+out: -+ return rc; -+out_softnet: -+ proc_net_remove("softnet_stat"); -+out_dev: -+ proc_net_remove("dev"); -+ goto out; -+} -+#else -+#define dev_proc_init() 0 -+#endif /* CONFIG_PROC_FS */ -+ -+ -+/** -+ * netdev_set_master - set up master/slave pair -+ * @slave: slave device -+ * @master: new master device -+ * -+ * Changes the master device of the slave. Pass %NULL to break the -+ * bonding. The caller must hold the RTNL semaphore. On a failure -+ * a negative errno code is returned. On success the reference counts -+ * are adjusted, %RTM_NEWLINK is sent to the routing socket and the -+ * function returns zero. -+ */ -+int netdev_set_master(struct net_device *slave, struct net_device *master) -+{ -+ struct net_device *old = slave->master; -+ -+ ASSERT_RTNL(); -+ -+ if (master) { -+ if (old) -+ return -EBUSY; -+ dev_hold(master); -+ } -+ -+ slave->master = master; -+ -+ synchronize_net(); -+ -+ if (old) -+ dev_put(old); -+ -+ if (master) -+ slave->flags |= IFF_SLAVE; -+ else -+ slave->flags &= ~IFF_SLAVE; -+ -+ rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); -+ return 0; -+} -+ -+/** -+ * dev_set_promiscuity - update promiscuity count on a device -+ * @dev: device -+ * @inc: modifier -+ * -+ * Add or remove promiscuity from a device. While the count in the device -+ * remains above zero the interface remains promiscuous. Once it hits zero -+ * the device reverts back to normal filtering operation. A negative inc -+ * value is used to drop promiscuity on the device. -+ */ -+void dev_set_promiscuity(struct net_device *dev, int inc) -+{ -+ unsigned short old_flags = dev->flags; -+ -+ if ((dev->promiscuity += inc) == 0) -+ dev->flags &= ~IFF_PROMISC; -+ else -+ dev->flags |= IFF_PROMISC; -+ if (dev->flags != old_flags) { -+ dev_mc_upload(dev); -+ printk(KERN_INFO "device %s %s promiscuous mode\n", -+ dev->name, (dev->flags & IFF_PROMISC) ? "entered" : -+ "left"); -+ audit_log(current->audit_context, GFP_ATOMIC, -+ AUDIT_ANOM_PROMISCUOUS, -+ "dev=%s prom=%d old_prom=%d auid=%u", -+ dev->name, (dev->flags & IFF_PROMISC), -+ (old_flags & IFF_PROMISC), -+ audit_get_loginuid(current->audit_context)); -+ } -+} -+ -+/** -+ * dev_set_allmulti - update allmulti count on a device -+ * @dev: device -+ * @inc: modifier -+ * -+ * Add or remove reception of all multicast frames to a device. While the -+ * count in the device remains above zero the interface remains listening -+ * to all interfaces. Once it hits zero the device reverts back to normal -+ * filtering operation. A negative @inc value is used to drop the counter -+ * when releasing a resource needing all multicasts. -+ */ -+ -+void dev_set_allmulti(struct net_device *dev, int inc) -+{ -+ unsigned short old_flags = dev->flags; -+ -+ dev->flags |= IFF_ALLMULTI; -+ if ((dev->allmulti += inc) == 0) -+ dev->flags &= ~IFF_ALLMULTI; -+ if (dev->flags ^ old_flags) -+ dev_mc_upload(dev); -+} -+ -+unsigned dev_get_flags(const struct net_device *dev) -+{ -+ unsigned flags; -+ -+ flags = (dev->flags & ~(IFF_PROMISC | -+ IFF_ALLMULTI | -+ IFF_RUNNING | -+ IFF_LOWER_UP | -+ IFF_DORMANT)) | -+ (dev->gflags & (IFF_PROMISC | -+ IFF_ALLMULTI)); -+ -+ if (netif_running(dev)) { -+ if (netif_oper_up(dev)) -+ flags |= IFF_RUNNING; -+ if (netif_carrier_ok(dev)) -+ flags |= IFF_LOWER_UP; -+ if (netif_dormant(dev)) -+ flags |= IFF_DORMANT; -+ } -+ -+ return flags; -+} -+ -+int dev_change_flags(struct net_device *dev, unsigned flags) -+{ -+ int ret; -+ int old_flags = dev->flags; -+ -+ /* -+ * Set the flags on our device. -+ */ -+ -+ dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | -+ IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | -+ IFF_AUTOMEDIA)) | -+ (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | -+ IFF_ALLMULTI)); -+ -+ /* -+ * Load in the correct multicast list now the flags have changed. -+ */ -+ -+ dev_mc_upload(dev); -+ -+ /* -+ * Have we downed the interface. We handle IFF_UP ourselves -+ * according to user attempts to set it, rather than blindly -+ * setting it. -+ */ -+ -+ ret = 0; -+ if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ -+ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); -+ -+ if (!ret) -+ dev_mc_upload(dev); -+ } -+ -+ if (dev->flags & IFF_UP && -+ ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | -+ IFF_VOLATILE))) -+ raw_notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGE, dev); -+ -+ if ((flags ^ dev->gflags) & IFF_PROMISC) { -+ int inc = (flags & IFF_PROMISC) ? +1 : -1; -+ dev->gflags ^= IFF_PROMISC; -+ dev_set_promiscuity(dev, inc); -+ } -+ -+ /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI -+ is important. Some (broken) drivers set IFF_PROMISC, when -+ IFF_ALLMULTI is requested not asking us and not reporting. -+ */ -+ if ((flags ^ dev->gflags) & IFF_ALLMULTI) { -+ int inc = (flags & IFF_ALLMULTI) ? +1 : -1; -+ dev->gflags ^= IFF_ALLMULTI; -+ dev_set_allmulti(dev, inc); -+ } -+ -+ if (old_flags ^ dev->flags) -+ rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); -+ -+ return ret; -+} -+ -+int dev_set_mtu(struct net_device *dev, int new_mtu) -+{ -+ int err; -+ -+ if (new_mtu == dev->mtu) -+ return 0; -+ -+ /* MTU must be positive. */ -+ if (new_mtu < 0) -+ return -EINVAL; -+ -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ -+ err = 0; -+ if (dev->change_mtu) -+ err = dev->change_mtu(dev, new_mtu); -+ else -+ dev->mtu = new_mtu; -+ if (!err && dev->flags & IFF_UP) -+ raw_notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGEMTU, dev); -+ return err; -+} -+ -+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) -+{ -+ int err; -+ -+ if (!dev->set_mac_address) -+ return -EOPNOTSUPP; -+ if (sa->sa_family != dev->type) -+ return -EINVAL; -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ err = dev->set_mac_address(dev, sa); -+ if (!err) -+ raw_notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGEADDR, dev); -+ return err; -+} -+ -+/* -+ * Perform the SIOCxIFxxx calls. -+ */ -+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) -+{ -+ int err; -+ struct net_device *dev = __dev_get_by_name(ifr->ifr_name); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ switch (cmd) { -+ case SIOCGIFFLAGS: /* Get interface flags */ -+ ifr->ifr_flags = dev_get_flags(dev); -+ return 0; -+ -+ case SIOCSIFFLAGS: /* Set interface flags */ -+ return dev_change_flags(dev, ifr->ifr_flags); -+ -+ case SIOCGIFMETRIC: /* Get the metric on the interface -+ (currently unused) */ -+ ifr->ifr_metric = 0; -+ return 0; -+ -+ case SIOCSIFMETRIC: /* Set the metric on the interface -+ (currently unused) */ -+ return -EOPNOTSUPP; -+ -+ case SIOCGIFMTU: /* Get the MTU of a device */ -+ ifr->ifr_mtu = dev->mtu; -+ return 0; -+ -+ case SIOCSIFMTU: /* Set the MTU of a device */ -+ return dev_set_mtu(dev, ifr->ifr_mtu); -+ -+ case SIOCGIFHWADDR: -+ if (!dev->addr_len) -+ memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); -+ else -+ memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, -+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); -+ ifr->ifr_hwaddr.sa_family = dev->type; -+ return 0; -+ -+ case SIOCSIFHWADDR: -+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr); -+ -+ case SIOCSIFHWBROADCAST: -+ if (ifr->ifr_hwaddr.sa_family != dev->type) -+ return -EINVAL; -+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, -+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); -+ raw_notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGEADDR, dev); -+ return 0; -+ -+ case SIOCGIFMAP: -+ ifr->ifr_map.mem_start = dev->mem_start; -+ ifr->ifr_map.mem_end = dev->mem_end; -+ ifr->ifr_map.base_addr = dev->base_addr; -+ ifr->ifr_map.irq = dev->irq; -+ ifr->ifr_map.dma = dev->dma; -+ ifr->ifr_map.port = dev->if_port; -+ return 0; -+ -+ case SIOCSIFMAP: -+ if (dev->set_config) { -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ return dev->set_config(dev, &ifr->ifr_map); -+ } -+ return -EOPNOTSUPP; -+ -+ case SIOCADDMULTI: -+ if (!dev->set_multicast_list || -+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC) -+ return -EINVAL; -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, -+ dev->addr_len, 1); -+ -+ case SIOCDELMULTI: -+ if (!dev->set_multicast_list || -+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC) -+ return -EINVAL; -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, -+ dev->addr_len, 1); -+ -+ case SIOCGIFINDEX: -+ ifr->ifr_ifindex = dev->ifindex; -+ return 0; -+ -+ case SIOCGIFTXQLEN: -+ ifr->ifr_qlen = dev->tx_queue_len; -+ return 0; -+ -+ case SIOCSIFTXQLEN: -+ if (ifr->ifr_qlen < 0) -+ return -EINVAL; -+ dev->tx_queue_len = ifr->ifr_qlen; -+ return 0; -+ -+ case SIOCSIFNAME: -+ ifr->ifr_newname[IFNAMSIZ-1] = '\0'; -+ return dev_change_name(dev, ifr->ifr_newname); -+ -+ /* -+ * Unknown or private ioctl -+ */ -+ -+ default: -+ if ((cmd >= SIOCDEVPRIVATE && -+ cmd <= SIOCDEVPRIVATE + 15) || -+ cmd == SIOCBONDENSLAVE || -+ cmd == SIOCBONDRELEASE || -+ cmd == SIOCBONDSETHWADDR || -+ cmd == SIOCBONDSLAVEINFOQUERY || -+ cmd == SIOCBONDINFOQUERY || -+ cmd == SIOCBONDCHANGEACTIVE || -+ cmd == SIOCGMIIPHY || -+ cmd == SIOCGMIIREG || -+ cmd == SIOCSMIIREG || -+ cmd == SIOCBRADDIF || -+ cmd == SIOCBRDELIF || -+ cmd == SIOCWANDEV) { -+ err = -EOPNOTSUPP; -+ if (dev->do_ioctl) { -+ if (netif_device_present(dev)) -+ err = dev->do_ioctl(dev, ifr, -+ cmd); -+ else -+ err = -ENODEV; -+ } -+ } else -+ err = -EINVAL; -+ -+ } -+ return err; -+} -+ -+/* -+ * This function handles all "interface"-type I/O control requests. The actual -+ * 'doing' part of this is dev_ifsioc above. -+ */ -+ -+/** -+ * dev_ioctl - network device ioctl -+ * @cmd: command to issue -+ * @arg: pointer to a struct ifreq in user space -+ * -+ * Issue ioctl functions to devices. This is normally called by the -+ * user space syscall interfaces but can sometimes be useful for -+ * other purposes. The return value is the return from the syscall if -+ * positive or a negative errno code on error. -+ */ -+ -+int dev_ioctl(unsigned int cmd, void __user *arg) -+{ -+ struct ifreq ifr; -+ int ret; -+ char *colon; -+ -+ /* One special case: SIOCGIFCONF takes ifconf argument -+ and requires shared lock, because it sleeps writing -+ to user space. -+ */ -+ -+ if (cmd == SIOCGIFCONF) { -+ rtnl_lock(); -+ ret = dev_ifconf((char __user *) arg); -+ rtnl_unlock(); -+ return ret; -+ } -+ if (cmd == SIOCGIFNAME) -+ return dev_ifname((struct ifreq __user *)arg); -+ -+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) -+ return -EFAULT; -+ -+ ifr.ifr_name[IFNAMSIZ-1] = 0; -+ -+ colon = strchr(ifr.ifr_name, ':'); -+ if (colon) -+ *colon = 0; -+ -+ /* -+ * See which interface the caller is talking about. -+ */ -+ -+ switch (cmd) { -+ /* -+ * These ioctl calls: -+ * - can be done by all. -+ * - atomic and do not require locking. -+ * - return a value -+ */ -+ case SIOCGIFFLAGS: -+ case SIOCGIFMETRIC: -+ case SIOCGIFMTU: -+ case SIOCGIFHWADDR: -+ case SIOCGIFSLAVE: -+ case SIOCGIFMAP: -+ case SIOCGIFINDEX: -+ case SIOCGIFTXQLEN: -+ dev_load(ifr.ifr_name); -+ read_lock(&dev_base_lock); -+ ret = dev_ifsioc(&ifr, cmd); -+ read_unlock(&dev_base_lock); -+ if (!ret) { -+ if (colon) -+ *colon = ':'; -+ if (copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ } -+ return ret; -+ -+ case SIOCETHTOOL: -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ethtool(&ifr); -+ rtnl_unlock(); -+ if (!ret) { -+ if (colon) -+ *colon = ':'; -+ if (copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ } -+ return ret; -+ -+ /* -+ * These ioctl calls: -+ * - require superuser power. -+ * - require strict serialization. -+ * - return a value -+ */ -+ case SIOCGMIIPHY: -+ case SIOCGMIIREG: -+ case SIOCSIFNAME: -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ifsioc(&ifr, cmd); -+ rtnl_unlock(); -+ if (!ret) { -+ if (colon) -+ *colon = ':'; -+ if (copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ } -+ return ret; -+ -+ /* -+ * These ioctl calls: -+ * - require superuser power. -+ * - require strict serialization. -+ * - do not return a value -+ */ -+ case SIOCSIFFLAGS: -+ case SIOCSIFMETRIC: -+ case SIOCSIFMTU: -+ case SIOCSIFMAP: -+ case SIOCSIFHWADDR: -+ case SIOCSIFSLAVE: -+ case SIOCADDMULTI: -+ case SIOCDELMULTI: -+ case SIOCSIFHWBROADCAST: -+ case SIOCSIFTXQLEN: -+ case SIOCSMIIREG: -+ case SIOCBONDENSLAVE: -+ case SIOCBONDRELEASE: -+ case SIOCBONDSETHWADDR: -+ case SIOCBONDCHANGEACTIVE: -+ case SIOCBRADDIF: -+ case SIOCBRDELIF: -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ /* fall through */ -+ case SIOCBONDSLAVEINFOQUERY: -+ case SIOCBONDINFOQUERY: -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ifsioc(&ifr, cmd); -+ rtnl_unlock(); -+ return ret; -+ -+ case SIOCGIFMEM: -+ /* Get the per device memory space. We can add this but -+ * currently do not support it */ -+ case SIOCSIFMEM: -+ /* Set the per device memory buffer space. -+ * Not applicable in our case */ -+ case SIOCSIFLINK: -+ return -EINVAL; -+ -+ /* -+ * Unknown or private ioctl. -+ */ -+ default: -+ if (cmd == SIOCWANDEV || -+ (cmd >= SIOCDEVPRIVATE && -+ cmd <= SIOCDEVPRIVATE + 15)) { -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ifsioc(&ifr, cmd); -+ rtnl_unlock(); -+ if (!ret && copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ return ret; -+ } -+#ifdef CONFIG_WIRELESS_EXT -+ /* Take care of Wireless Extensions */ -+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { -+ /* If command is `set a parameter', or -+ * `get the encoding parameters', check if -+ * the user has the right to do it */ -+ if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE -+ || cmd == SIOCGIWENCODEEXT) { -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ } -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ /* Follow me in net/core/wireless.c */ -+ ret = wireless_process_ioctl(&ifr, cmd); -+ rtnl_unlock(); -+ if (IW_IS_GET(cmd) && -+ copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ return ret; -+ } -+#endif /* CONFIG_WIRELESS_EXT */ -+ return -EINVAL; -+ } -+} -+ -+ -+/** -+ * dev_new_index - allocate an ifindex -+ * -+ * Returns a suitable unique value for a new device interface -+ * number. The caller must hold the rtnl semaphore or the -+ * dev_base_lock to be sure it remains unique. -+ */ -+static int dev_new_index(void) -+{ -+ static int ifindex; -+ for (;;) { -+ if (++ifindex <= 0) -+ ifindex = 1; -+ if (!__dev_get_by_index(ifindex)) -+ return ifindex; -+ } -+} -+ -+static int dev_boot_phase = 1; -+ -+/* Delayed registration/unregisteration */ -+static DEFINE_SPINLOCK(net_todo_list_lock); -+static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); -+ -+static inline void net_set_todo(struct net_device *dev) -+{ -+ spin_lock(&net_todo_list_lock); -+ list_add_tail(&dev->todo_list, &net_todo_list); -+ spin_unlock(&net_todo_list_lock); -+} -+ -+/** -+ * register_netdevice - register a network device -+ * @dev: device to register -+ * -+ * Take a completed network device structure and add it to the kernel -+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier -+ * chain. 0 is returned on success. A negative errno code is returned -+ * on a failure to set up the device, or if the name is a duplicate. -+ * -+ * Callers must hold the rtnl semaphore. You may want -+ * register_netdev() instead of this. -+ * -+ * BUGS: -+ * The locking appears insufficient to guarantee two parallel registers -+ * will not get the same name. -+ */ -+ -+int register_netdevice(struct net_device *dev) -+{ -+ struct hlist_head *head; -+ struct hlist_node *p; -+ int ret; -+ -+ BUG_ON(dev_boot_phase); -+ ASSERT_RTNL(); -+ -+ might_sleep(); -+ -+ /* When net_device's are persistent, this will be fatal. */ -+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); -+ -+ spin_lock_init(&dev->queue_lock); -+ spin_lock_init(&dev->_xmit_lock); -+ dev->xmit_lock_owner = -1; -+#ifdef CONFIG_NET_CLS_ACT -+ spin_lock_init(&dev->ingress_lock); -+#endif -+ -+ dev->iflink = -1; -+ -+ /* Init, if this function is available */ -+ if (dev->init) { -+ ret = dev->init(dev); -+ if (ret) { -+ if (ret > 0) -+ ret = -EIO; -+ goto out; -+ } -+ } -+ -+ if (!dev_valid_name(dev->name)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ dev->ifindex = dev_new_index(); -+ if (dev->iflink == -1) -+ dev->iflink = dev->ifindex; -+ -+ /* Check for existence of name */ -+ head = dev_name_hash(dev->name); -+ hlist_for_each(p, head) { -+ struct net_device *d -+ = hlist_entry(p, struct net_device, name_hlist); -+ if (!strncmp(d->name, dev->name, IFNAMSIZ)) { -+ ret = -EEXIST; -+ goto out; -+ } -+ } -+ -+ /* Fix illegal SG+CSUM combinations. */ -+ if ((dev->features & NETIF_F_SG) && -+ !(dev->features & NETIF_F_ALL_CSUM)) { -+ printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", -+ dev->name); -+ dev->features &= ~NETIF_F_SG; -+ } -+ -+ /* TSO requires that SG is present as well. */ -+ if ((dev->features & NETIF_F_TSO) && -+ !(dev->features & NETIF_F_SG)) { -+ printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", -+ dev->name); -+ dev->features &= ~NETIF_F_TSO; -+ } -+ if (dev->features & NETIF_F_UFO) { -+ if (!(dev->features & NETIF_F_HW_CSUM)) { -+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " -+ "NETIF_F_HW_CSUM feature.\n", -+ dev->name); -+ dev->features &= ~NETIF_F_UFO; -+ } -+ if (!(dev->features & NETIF_F_SG)) { -+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " -+ "NETIF_F_SG feature.\n", -+ dev->name); -+ dev->features &= ~NETIF_F_UFO; -+ } -+ } -+ -+ /* -+ * nil rebuild_header routine, -+ * that should be never called and used as just bug trap. -+ */ -+ -+ if (!dev->rebuild_header) -+ dev->rebuild_header = default_rebuild_header; -+ -+ ret = netdev_register_sysfs(dev); -+ if (ret) -+ goto out; -+ dev->reg_state = NETREG_REGISTERED; -+ -+ /* -+ * Default initial state at registry is that the -+ * device is present. -+ */ -+ -+ set_bit(__LINK_STATE_PRESENT, &dev->state); -+ -+ dev->next = NULL; -+ dev_init_scheduler(dev); -+ write_lock_bh(&dev_base_lock); -+ *dev_tail = dev; -+ dev_tail = &dev->next; -+ hlist_add_head(&dev->name_hlist, head); -+ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); -+ dev_hold(dev); -+ write_unlock_bh(&dev_base_lock); -+ -+ /* Notify protocols, that a new device appeared. */ -+ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); -+ -+ ret = 0; -+ -+out: -+ return ret; -+} -+ -+/** -+ * register_netdev - register a network device -+ * @dev: device to register -+ * -+ * Take a completed network device structure and add it to the kernel -+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier -+ * chain. 0 is returned on success. A negative errno code is returned -+ * on a failure to set up the device, or if the name is a duplicate. -+ * -+ * This is a wrapper around register_netdev that takes the rtnl semaphore -+ * and expands the device name if you passed a format string to -+ * alloc_netdev. -+ */ -+int register_netdev(struct net_device *dev) -+{ -+ int err; -+ -+ rtnl_lock(); -+ -+ /* -+ * If the name is a format string the caller wants us to do a -+ * name allocation. -+ */ -+ if (strchr(dev->name, '%')) { -+ err = dev_alloc_name(dev, dev->name); -+ if (err < 0) -+ goto out; -+ } -+ -+ err = register_netdevice(dev); -+out: -+ rtnl_unlock(); -+ return err; -+} -+EXPORT_SYMBOL(register_netdev); -+ -+/* -+ * netdev_wait_allrefs - wait until all references are gone. -+ * -+ * This is called when unregistering network devices. -+ * -+ * Any protocol or device that holds a reference should register -+ * for netdevice notification, and cleanup and put back the -+ * reference if they receive an UNREGISTER event. -+ * We can get stuck here if buggy protocols don't correctly -+ * call dev_put. -+ */ -+static void netdev_wait_allrefs(struct net_device *dev) -+{ -+ unsigned long rebroadcast_time, warning_time; -+ -+ rebroadcast_time = warning_time = jiffies; -+ while (atomic_read(&dev->refcnt) != 0) { -+ if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { -+ rtnl_lock(); -+ -+ /* Rebroadcast unregister notification */ -+ raw_notifier_call_chain(&netdev_chain, -+ NETDEV_UNREGISTER, dev); -+ -+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, -+ &dev->state)) { -+ /* We must not have linkwatch events -+ * pending on unregister. If this -+ * happens, we simply run the queue -+ * unscheduled, resulting in a noop -+ * for this device. -+ */ -+ linkwatch_run_queue(); -+ } -+ -+ __rtnl_unlock(); -+ -+ rebroadcast_time = jiffies; -+ } -+ -+ msleep(250); -+ -+ if (time_after(jiffies, warning_time + 10 * HZ)) { -+ printk(KERN_EMERG "unregister_netdevice: " -+ "waiting for %s to become free. Usage " -+ "count = %d\n", -+ dev->name, atomic_read(&dev->refcnt)); -+ warning_time = jiffies; -+ } -+ } -+} -+ -+/* The sequence is: -+ * -+ * rtnl_lock(); -+ * ... -+ * register_netdevice(x1); -+ * register_netdevice(x2); -+ * ... -+ * unregister_netdevice(y1); -+ * unregister_netdevice(y2); -+ * ... -+ * rtnl_unlock(); -+ * free_netdev(y1); -+ * free_netdev(y2); -+ * -+ * We are invoked by rtnl_unlock() after it drops the semaphore. -+ * This allows us to deal with problems: -+ * 1) We can delete sysfs objects which invoke hotplug -+ * without deadlocking with linkwatch via keventd. -+ * 2) Since we run with the RTNL semaphore not held, we can sleep -+ * safely in order to wait for the netdev refcnt to drop to zero. -+ */ -+static DEFINE_MUTEX(net_todo_run_mutex); -+void netdev_run_todo(void) -+{ -+ struct list_head list; -+ -+ /* Need to guard against multiple cpu's getting out of order. */ -+ mutex_lock(&net_todo_run_mutex); -+ -+ /* Not safe to do outside the semaphore. We must not return -+ * until all unregister events invoked by the local processor -+ * have been completed (either by this todo run, or one on -+ * another cpu). -+ */ -+ if (list_empty(&net_todo_list)) -+ goto out; -+ -+ /* Snapshot list, allow later requests */ -+ spin_lock(&net_todo_list_lock); -+ list_replace_init(&net_todo_list, &list); -+ spin_unlock(&net_todo_list_lock); -+ -+ while (!list_empty(&list)) { -+ struct net_device *dev -+ = list_entry(list.next, struct net_device, todo_list); -+ list_del(&dev->todo_list); -+ -+ if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { -+ printk(KERN_ERR "network todo '%s' but state %d\n", -+ dev->name, dev->reg_state); -+ dump_stack(); -+ continue; -+ } -+ -+ netdev_unregister_sysfs(dev); -+ dev->reg_state = NETREG_UNREGISTERED; -+ -+ netdev_wait_allrefs(dev); -+ -+ /* paranoia */ -+ BUG_ON(atomic_read(&dev->refcnt)); -+ BUG_TRAP(!dev->ip_ptr); -+ BUG_TRAP(!dev->ip6_ptr); -+ BUG_TRAP(!dev->dn_ptr); -+ -+ /* It must be the very last action, -+ * after this 'dev' may point to freed up memory. -+ */ -+ if (dev->destructor) -+ dev->destructor(dev); -+ } -+ -+out: -+ mutex_unlock(&net_todo_run_mutex); -+} -+ -+/** -+ * alloc_netdev - allocate network device -+ * @sizeof_priv: size of private data to allocate space for -+ * @name: device name format string -+ * @setup: callback to initialize device -+ * -+ * Allocates a struct net_device with private data area for driver use -+ * and performs basic initialization. -+ */ -+struct net_device *alloc_netdev(int sizeof_priv, const char *name, -+ void (*setup)(struct net_device *)) -+{ -+ void *p; -+ struct net_device *dev; -+ int alloc_size; -+ -+ BUG_ON(strlen(name) >= sizeof(dev->name)); -+ -+ /* ensure 32-byte alignment of both the device and private area */ -+ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; -+ alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; -+ -+ p = kzalloc(alloc_size, GFP_KERNEL); -+ if (!p) { -+ printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); -+ return NULL; -+ } -+ -+ dev = (struct net_device *) -+ (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); -+ dev->padded = (char *)dev - (char *)p; -+ -+ if (sizeof_priv) -+ dev->priv = netdev_priv(dev); -+ -+ setup(dev); -+ strcpy(dev->name, name); -+ return dev; -+} -+EXPORT_SYMBOL(alloc_netdev); -+ -+/** -+ * free_netdev - free network device -+ * @dev: device -+ * -+ * This function does the last stage of destroying an allocated device -+ * interface. The reference to the device object is released. -+ * If this is the last reference then it will be freed. -+ */ -+void free_netdev(struct net_device *dev) -+{ -+#ifdef CONFIG_SYSFS -+ /* Compatibility with error handling in drivers */ -+ if (dev->reg_state == NETREG_UNINITIALIZED) { -+ kfree((char *)dev - dev->padded); -+ return; -+ } -+ -+ BUG_ON(dev->reg_state != NETREG_UNREGISTERED); -+ dev->reg_state = NETREG_RELEASED; -+ -+ /* will free via device release */ -+ put_device(&dev->dev); -+#else -+ kfree((char *)dev - dev->padded); -+#endif -+} -+ -+/* Synchronize with packet receive processing. */ -+void synchronize_net(void) -+{ -+ might_sleep(); -+ synchronize_rcu(); -+} -+ -+/** -+ * unregister_netdevice - remove device from the kernel -+ * @dev: device -+ * -+ * This function shuts down a device interface and removes it -+ * from the kernel tables. On success 0 is returned, on a failure -+ * a negative errno code is returned. -+ * -+ * Callers must hold the rtnl semaphore. You may want -+ * unregister_netdev() instead of this. -+ */ -+ -+void unregister_netdevice(struct net_device *dev) -+{ -+ struct net_device *d, **dp; -+ -+ BUG_ON(dev_boot_phase); -+ ASSERT_RTNL(); -+ -+ /* Some devices call without registering for initialization unwind. */ -+ if (dev->reg_state == NETREG_UNINITIALIZED) { -+ printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " -+ "was registered\n", dev->name, dev); -+ -+ WARN_ON(1); -+ return; -+ } -+ -+ BUG_ON(dev->reg_state != NETREG_REGISTERED); -+ -+ /* If device is running, close it first. */ -+ if (dev->flags & IFF_UP) -+ dev_close(dev); -+ -+ /* And unlink it from device chain. */ -+ for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { -+ if (d == dev) { -+ write_lock_bh(&dev_base_lock); -+ hlist_del(&dev->name_hlist); -+ hlist_del(&dev->index_hlist); -+ if (dev_tail == &dev->next) -+ dev_tail = dp; -+ *dp = d->next; -+ write_unlock_bh(&dev_base_lock); -+ break; -+ } -+ } -+ BUG_ON(!d); -+ -+ dev->reg_state = NETREG_UNREGISTERING; -+ -+ synchronize_net(); -+ -+ /* Shutdown queueing discipline. */ -+ dev_shutdown(dev); -+ -+ -+ /* Notify protocols, that we are about to destroy -+ this device. They should clean all the things. -+ */ -+ raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); -+ -+ /* -+ * Flush the multicast chain -+ */ -+ dev_mc_discard(dev); -+ -+ if (dev->uninit) -+ dev->uninit(dev); -+ -+ /* Notifier chain MUST detach us from master device. */ -+ BUG_TRAP(!dev->master); -+ -+ /* Finish processing unregister after unlock */ -+ net_set_todo(dev); -+ -+ synchronize_net(); -+ -+ dev_put(dev); -+} -+ -+/** -+ * unregister_netdev - remove device from the kernel -+ * @dev: device -+ * -+ * This function shuts down a device interface and removes it -+ * from the kernel tables. On success 0 is returned, on a failure -+ * a negative errno code is returned. -+ * -+ * This is just a wrapper for unregister_netdevice that takes -+ * the rtnl semaphore. In general you want to use this and not -+ * unregister_netdevice. -+ */ -+void unregister_netdev(struct net_device *dev) -+{ -+ rtnl_lock(); -+ unregister_netdevice(dev); -+ rtnl_unlock(); -+} -+ -+EXPORT_SYMBOL(unregister_netdev); -+ -+static int dev_cpu_callback(struct notifier_block *nfb, -+ unsigned long action, -+ void *ocpu) -+{ -+ struct sk_buff **list_skb; -+ struct net_device **list_net; -+ struct sk_buff *skb; -+ unsigned int cpu, oldcpu = (unsigned long)ocpu; -+ struct softnet_data *sd, *oldsd; -+ -+ if (action != CPU_DEAD) -+ return NOTIFY_OK; -+ -+ local_irq_disable(); -+ cpu = smp_processor_id(); -+ sd = &per_cpu(softnet_data, cpu); -+ oldsd = &per_cpu(softnet_data, oldcpu); -+ -+ /* Find end of our completion_queue. */ -+ list_skb = &sd->completion_queue; -+ while (*list_skb) -+ list_skb = &(*list_skb)->next; -+ /* Append completion queue from offline CPU. */ -+ *list_skb = oldsd->completion_queue; -+ oldsd->completion_queue = NULL; -+ -+ /* Find end of our output_queue. */ -+ list_net = &sd->output_queue; -+ while (*list_net) -+ list_net = &(*list_net)->next_sched; -+ /* Append output queue from offline CPU. */ -+ *list_net = oldsd->output_queue; -+ oldsd->output_queue = NULL; -+ -+ raise_softirq_irqoff(NET_TX_SOFTIRQ); -+ local_irq_enable(); -+ -+ /* Process offline CPU's input_pkt_queue */ -+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) -+ netif_rx(skb); -+ -+ return NOTIFY_OK; -+} -+ -+#ifdef CONFIG_NET_DMA -+/** -+ * net_dma_rebalance - -+ * This is called when the number of channels allocated to the net_dma_client -+ * changes. The net_dma_client tries to have one DMA channel per CPU. -+ */ -+static void net_dma_rebalance(void) -+{ -+ unsigned int cpu, i, n; -+ struct dma_chan *chan; -+ -+ if (net_dma_count == 0) { -+ for_each_online_cpu(cpu) -+ rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); -+ return; -+ } -+ -+ i = 0; -+ cpu = first_cpu(cpu_online_map); -+ -+ rcu_read_lock(); -+ list_for_each_entry(chan, &net_dma_client->channels, client_node) { -+ n = ((num_online_cpus() / net_dma_count) -+ + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); -+ -+ while(n) { -+ per_cpu(softnet_data, cpu).net_dma = chan; -+ cpu = next_cpu(cpu, cpu_online_map); -+ n--; -+ } -+ i++; -+ } -+ rcu_read_unlock(); -+} -+ -+/** -+ * netdev_dma_event - event callback for the net_dma_client -+ * @client: should always be net_dma_client -+ * @chan: DMA channel for the event -+ * @event: event type -+ */ -+static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, -+ enum dma_event event) -+{ -+ spin_lock(&net_dma_event_lock); -+ switch (event) { -+ case DMA_RESOURCE_ADDED: -+ net_dma_count++; -+ net_dma_rebalance(); -+ break; -+ case DMA_RESOURCE_REMOVED: -+ net_dma_count--; -+ net_dma_rebalance(); -+ break; -+ default: -+ break; -+ } -+ spin_unlock(&net_dma_event_lock); -+} -+ -+/** -+ * netdev_dma_regiser - register the networking subsystem as a DMA client -+ */ -+static int __init netdev_dma_register(void) -+{ -+ spin_lock_init(&net_dma_event_lock); -+ net_dma_client = dma_async_client_register(netdev_dma_event); -+ if (net_dma_client == NULL) -+ return -ENOMEM; -+ -+ dma_async_client_chan_request(net_dma_client, num_online_cpus()); -+ return 0; -+} -+ -+#else -+static int __init netdev_dma_register(void) { return -ENODEV; } -+#endif /* CONFIG_NET_DMA */ -+ -+/* -+ * Initialize the DEV module. At boot time this walks the device list and -+ * unhooks any devices that fail to initialise (normally hardware not -+ * present) and leaves us with a valid list of present and active devices. -+ * -+ */ -+ -+/* -+ * This is called single threaded during boot, so no need -+ * to take the rtnl semaphore. -+ */ -+static int __init net_dev_init(void) -+{ -+ int i, rc = -ENOMEM; -+ -+ BUG_ON(!dev_boot_phase); -+ -+ if (dev_proc_init()) -+ goto out; -+ -+ if (netdev_sysfs_init()) -+ goto out; -+ -+ INIT_LIST_HEAD(&ptype_all); -+ for (i = 0; i < 16; i++) -+ INIT_LIST_HEAD(&ptype_base[i]); -+ -+ for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) -+ INIT_HLIST_HEAD(&dev_name_head[i]); -+ -+ for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) -+ INIT_HLIST_HEAD(&dev_index_head[i]); -+ -+ /* -+ * Initialise the packet receive queues. -+ */ -+ -+ for_each_possible_cpu(i) { -+ struct softnet_data *queue; -+ -+ queue = &per_cpu(softnet_data, i); -+ skb_queue_head_init(&queue->input_pkt_queue); -+ queue->completion_queue = NULL; -+ INIT_LIST_HEAD(&queue->poll_list); -+ set_bit(__LINK_STATE_START, &queue->backlog_dev.state); -+ queue->backlog_dev.weight = weight_p; -+ queue->backlog_dev.poll = process_backlog; -+ atomic_set(&queue->backlog_dev.refcnt, 1); -+ } -+ -+ netdev_dma_register(); -+ -+ dev_boot_phase = 0; -+ -+ open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); -+ open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); -+ -+ hotcpu_notifier(dev_cpu_callback, 0); -+ dst_init(); -+ dev_mcast_init(); -+ rc = 0; -+out: -+ return rc; -+} -+ -+subsys_initcall(net_dev_init); -+ -+EXPORT_SYMBOL(__dev_get_by_index); -+EXPORT_SYMBOL(__dev_get_by_name); -+EXPORT_SYMBOL(__dev_remove_pack); -+EXPORT_SYMBOL(dev_valid_name); -+EXPORT_SYMBOL(dev_add_pack); -+EXPORT_SYMBOL(dev_alloc_name); -+EXPORT_SYMBOL(dev_close); -+EXPORT_SYMBOL(dev_get_by_flags); -+EXPORT_SYMBOL(dev_get_by_index); -+EXPORT_SYMBOL(dev_get_by_name); -+EXPORT_SYMBOL(dev_open); -+EXPORT_SYMBOL(dev_queue_xmit); -+EXPORT_SYMBOL(dev_remove_pack); -+EXPORT_SYMBOL(dev_set_allmulti); -+EXPORT_SYMBOL(dev_set_promiscuity); -+EXPORT_SYMBOL(dev_change_flags); -+EXPORT_SYMBOL(dev_set_mtu); -+EXPORT_SYMBOL(dev_set_mac_address); -+EXPORT_SYMBOL(free_netdev); -+EXPORT_SYMBOL(netdev_boot_setup_check); -+EXPORT_SYMBOL(netdev_set_master); -+EXPORT_SYMBOL(netdev_state_change); -+EXPORT_SYMBOL(netif_receive_skb); -+EXPORT_SYMBOL(netif_rx); -+EXPORT_SYMBOL(register_gifconf); -+EXPORT_SYMBOL(register_netdevice); -+EXPORT_SYMBOL(register_netdevice_notifier); -+EXPORT_SYMBOL(skb_checksum_help); -+EXPORT_SYMBOL(synchronize_net); -+EXPORT_SYMBOL(unregister_netdevice); -+EXPORT_SYMBOL(unregister_netdevice_notifier); -+EXPORT_SYMBOL(net_enable_timestamp); -+EXPORT_SYMBOL(net_disable_timestamp); -+EXPORT_SYMBOL(dev_get_flags); -+ -+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) -+EXPORT_SYMBOL(br_handle_frame_hook); -+EXPORT_SYMBOL(br_fdb_get_hook); -+EXPORT_SYMBOL(br_fdb_put_hook); -+#endif -+ -+#ifdef CONFIG_KMOD -+EXPORT_SYMBOL(dev_load); -+#endif -+ -+EXPORT_PER_CPU_SYMBOL(softnet_data); + if (netpoll_receive_skb(skb)) diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig --- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000 @@ -4028,7 +394,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c --- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000 -@@ -0,0 +1,4257 @@ +@@ -0,0 +1,4258 @@ +/* *************************************************************** + * + * (C) 2004-07 - Luca Deri @@ -6264,12 +2630,12 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu + return; + } + -+ BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); -+ BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); ++ BUG_ON(!atomic_read(&sk->sk_rmem_alloc)); ++ BUG_ON(!atomic_read(&sk->sk_wmem_alloc)); +#else + -+ BUG_TRAP(atomic_read(&sk->rmem_alloc)==0); -+ BUG_TRAP(atomic_read(&sk->wmem_alloc)==0); ++ BUG_ON(atomic_read(&sk->rmem_alloc)==0); ++ BUG_ON(atomic_read(&sk->wmem_alloc)==0); + + if (!sk->dead) { +#if defined(RING_DEBUG) @@ -6371,7 +2737,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu +/* ********************************** */ + +static void ring_proc_init(void) { -+ ring_proc_dir = proc_mkdir("pf_ring", proc_net); ++ ring_proc_dir = proc_mkdir("pf_ring", init_net.proc_net); + + if(ring_proc_dir) { + ring_proc_dir->owner = THIS_MODULE; @@ -6392,7 +2758,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu +static void ring_proc_term(void) { + if(ring_proc != NULL) { + remove_proc_entry("info", ring_proc_dir); -+ if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", proc_net); ++ if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", init_net.proc_net); + + printk("PF_RING: deregistered /proc/net/pf_ring\n"); + } @@ -6922,7 +3288,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu + skb->data -= displ; + + /* send it */ -+ if (pfr->reflector_dev->xmit_lock_owner != cpu) { ++ if (netdev_get_tx_queue(pfr->reflector_dev, 0)->xmit_lock_owner != cpu) { + /* Patch below courtesy of Matthew J. Roth */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)) + spin_lock_bh(&pfr->reflector_dev->xmit_lock); @@ -6992,9 +3358,10 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu + + hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec; +#else -+ if(skb->tstamp.off_sec == 0) __net_timestamp(skb); ++ if(skb->tstamp.tv64 == 0) __net_timestamp(skb); + -+ hdr->ts.tv_sec = skb->tstamp.off_sec, hdr->ts.tv_usec = skb->tstamp.off_usec; ++ struct timeval tv = ktime_to_timeval(skb->tstamp); ++ hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec; +#endif + hdr->caplen = skb->len+displ; + @@ -7341,7 +3708,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu + skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */ +#else + skb.dev = dev, skb.len = len, skb.data = data, -+ skb.data_len = len, skb.tstamp.off_sec = 0; /* Calculate the time */ ++ skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */ +#endif + + skb_ring_handler(&skb, 1, 0 /* fake skb */); @@ -7351,7 +3718,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu + +/* ********************************** */ + -+static int ring_create(struct socket *sock, int protocol) { ++static int ring_create(struct net *net, struct socket *sock, int protocol) { + struct sock *sk; + struct ring_opt *pfr; + int err; @@ -7384,7 +3751,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu +#else + // BD: API changed in 2.6.12, ref: + // http://svn.clkao.org/svnweb/linux/revision/?rev=28201 -+ sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1); ++ sk = sk_alloc(net, PF_RING, GFP_ATOMIC, &ring_proto); +#endif +#else + /* Kernel 2.4 */ @@ -7644,7 +4011,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu + printk("RING: searching device %s\n", sa->sa_data); +#endif + -+ if((dev = __dev_get_by_name(sa->sa_data)) == NULL) { ++ if((dev = __dev_get_by_name(&init_net, sa->sa_data)) == NULL) { +#if defined(RING_DEBUG) + printk("RING: search failed\n"); +#endif @@ -8028,7 +4395,7 @@ diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linu +#endif + + write_lock(&ring_mgmt_lock); -+ pfr->reflector_dev = dev_get_by_name(devName); ++ pfr->reflector_dev = dev_get_by_name(&init_net, devName); + write_unlock(&ring_mgmt_lock); + +#if defined(RING_DEBUG) diff --git a/kernel-alpha.config b/kernel-alpha.config index cdfec7d2..4ab8e6d2 100644 --- a/kernel-alpha.config +++ b/kernel-alpha.config @@ -1,2355 +1,92 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_ALPHA=y -CONFIG_64BIT=y -CONFIG_MMU=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_ISA_DMA=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -CONFIG_CLEAN_COMPILE=y -# CONFIG_STANDALONE is not set -CONFIG_BROKEN_ON_SMP=y - -# -# General setup -# -CONFIG_LOCALVERSION="" -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -# CONFIG_EMBEDDED is not set -CONFIG_KALLSYMS=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_OBSOLETE_MODPARM=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y - -# -# System setup -# -CONFIG_ALPHA_GENERIC=y -# CONFIG_ALPHA_ALCOR is not set -# CONFIG_ALPHA_XL is not set -# CONFIG_ALPHA_BOOK1 is not set -# CONFIG_ALPHA_AVANTI_CH is not set -# CONFIG_ALPHA_CABRIOLET is not set -# CONFIG_ALPHA_DP264 is not set -# CONFIG_ALPHA_EB164 is not set -# CONFIG_ALPHA_EB64P_CH is not set -# CONFIG_ALPHA_EB66 is not set -# CONFIG_ALPHA_EB66P is not set -# CONFIG_ALPHA_EIGER is not set -# CONFIG_ALPHA_JENSEN is not set -# CONFIG_ALPHA_LX164 is not set -# CONFIG_ALPHA_LYNX is not set -# CONFIG_ALPHA_MARVEL is not set -# CONFIG_ALPHA_MIATA is not set -# CONFIG_ALPHA_MIKASA is not set -# CONFIG_ALPHA_NAUTILUS is not set -# CONFIG_ALPHA_NONAME_CH is not set -# CONFIG_ALPHA_NORITAKE is not set -# CONFIG_ALPHA_PC164 is not set -# CONFIG_ALPHA_P2K is not set -# CONFIG_ALPHA_RAWHIDE is not set -# CONFIG_ALPHA_RUFFIAN is not set -# CONFIG_ALPHA_RX164 is not set -# CONFIG_ALPHA_SX164 is not set -# CONFIG_ALPHA_SABLE is not set -# CONFIG_ALPHA_SHARK is not set -# CONFIG_ALPHA_TAKARA is not set -# CONFIG_ALPHA_TITAN is not set -# CONFIG_ALPHA_WILDFIRE is not set -CONFIG_ISA=y -CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -CONFIG_ALPHA_CORE_AGP=y -CONFIG_ALPHA_BROKEN_IRQ_MASK=y -CONFIG_EARLY_PRINTK=y -CONFIG_EISA=y -# CONFIG_SMP is not set -# CONFIG_DISCONTIGMEM is not set -# CONFIG_VERBOSE_MCHECK is not set -CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y -CONFIG_EISA_PCI_EISA=y -CONFIG_EISA_VIRTUAL_ROOT=y -CONFIG_EISA_NAMES=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_PCCARD=m -# CONFIG_PCMCIA_DEBUG is not set -# CONFIG_PCMCIA_OBSOLETE is not set -CONFIG_YENTA=m -CONFIG_CARDBUS=y -CONFIG_PD6729=m -CONFIG_I82092=m -CONFIG_I82365=m -CONFIG_TCIC=m -CONFIG_PCMCIA_PROBE=y -CONFIG_SRM_ENV=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_AOUT=m -# CONFIG_OSF4_COMPAT is not set -# CONFIG_BINFMT_EM86 is not set -CONFIG_BINFMT_MISC=m - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_FW_LOADER=m -# CONFIG_DEBUG_DRIVER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -CONFIG_PARPORT_PC_FIFO=y -CONFIG_PARPORT_PC_SUPERIO=y -CONFIG_PARPORT_PC_PCMCIA=m -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play support -# -CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set - -# -# Protocols -# -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -CONFIG_BLK_DEV_FD=m -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m - -# -# Parallel IDE high-level drivers -# -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m - -# -# Parallel IDE protocol modules -# -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=m -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_CARMEL=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" - -# -# ATA/ATAPI/MFM/RLL support -# -CONFIG_IDE=y -CONFIG_IDE_MAX_HWIFS=4 -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=m -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_TASKFILE_IO=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=m -CONFIG_BLK_DEV_IDEPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y -# CONFIG_BLK_DEV_OFFBOARD is not set -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_OPTI621=m -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_ADMA=y -CONFIG_BLK_DEV_AEC62XX=m -CONFIG_BLK_DEV_ALI15X3=m -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=m -CONFIG_BLK_DEV_CMD64X=m -CONFIG_BLK_DEV_TRIFLEX=m -CONFIG_BLK_DEV_CY82C693=m -CONFIG_BLK_DEV_CS5520=m -CONFIG_BLK_DEV_CS5530=m -CONFIG_BLK_DEV_HPT34X=m -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=m -CONFIG_BLK_DEV_SC1200=m -CONFIG_BLK_DEV_PIIX=m -CONFIG_BLK_DEV_NS87415=m -CONFIG_BLK_DEV_PDC202XX_OLD=m -CONFIG_PDC202XX_BURST=y -CONFIG_BLK_DEV_PDC202XX_NEW=m -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_SVWKS=m -CONFIG_BLK_DEV_SIIMAGE=m -CONFIG_BLK_DEV_SLC90E66=m -CONFIG_BLK_DEV_TRM290=m -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_IDE_CHIPSETS=y - -# -# Note: most of these also require special kernel boot parameters -# -CONFIG_BLK_DEV_4DRIVES=y -CONFIG_BLK_DEV_ALI14XX=m -CONFIG_BLK_DEV_DTC2278=m -CONFIG_BLK_DEV_HT6560B=m -CONFIG_BLK_DEV_PDC4030=m -CONFIG_BLK_DEV_QD65XX=m -CONFIG_BLK_DEV_UMC8672=m -CONFIG_BLK_DEV_IDEDMA=y -CONFIG_IDEDMA_IVB=y -CONFIG_IDEDMA_AUTO=y -# CONFIG_DMA_NONPCI is not set -# CONFIG_BLK_DEV_HD is not set - -# -# SCSI device support -# -CONFIG_SCSI=m -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_REPORT_LUNS=y -# CONFIG_SCSI_CONSTANTS is not set -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=m - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_MEGARAID=m -CONFIG_MEGARAID_NEWGEN=y -CONFIG_MEGARAID_MM=m -CONFIG_MEGARAID_MAILBOX=m -# CONFIG_SCSI_SATA is not set -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -# CONFIG_SCSI_CPQFCTS is not set -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -CONFIG_SCSI_EATA_LINKED_COMMANDS=y -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -CONFIG_SCSI_GENERIC_NCR5380_MMIO=m -CONFIG_SCSI_GENERIC_NCR53C400=y -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_IPR=m -# CONFIG_SCSI_IPR_TRACE is not set -# CONFIG_SCSI_IPR_DUMP is not set -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLOGIC_1280_1040=y -CONFIG_SCSI_QLA2XXX=m -CONFIG_SCSI_QLA21XX=m -CONFIG_SCSI_QLA22XX=m -CONFIG_SCSI_QLA2300=m -CONFIG_SCSI_QLA2322=m -CONFIG_SCSI_QLA6312=m -CONFIG_SCSI_QLA6322=m -# CONFIG_SCSI_SIM710 is not set -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC395x=m -# CONFIG_SCSI_DC390T is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y -CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -# CONFIG_SCSI_DEBUG is not set - -# -# PCMCIA SCSI adapter support -# -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_QLOGIC=m -CONFIG_PCMCIA_SYM53C500=m - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -CONFIG_CD_NO_IDESCSI=y -CONFIG_AZTCD=m -CONFIG_GSCD=m -CONFIG_SBPCD=m -# CONFIG_MCD is not set -# CONFIG_MCDX is not set -CONFIG_OPTCD=m -CONFIG_CM206=m -CONFIG_SJCD=m -CONFIG_ISP16_CDI=m -CONFIG_CDU31A=m -CONFIG_CDU535=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID5=m -CONFIG_MD_RAID6=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_IOCTL_V4=y -CONFIG_DM_CRYPT=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m - -# -# IEEE 1394 (FireWire) support -# -CONFIG_IEEE1394=m - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set -CONFIG_IEEE1394_OUI_DB=y -CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y -CONFIG_IEEE1394_CONFIG_ROM_IP1394=y - -# -# Device Drivers -# -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m - -# -# I2O device support -# - -# -# Macintosh device drivers -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=m -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_TUNNEL=m -CONFIG_IP_TCPDIAG=m - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 - -# -# IPVS transport protocol load balancing support -# -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_DECNET=m -# CONFIG_DECNET_SIOCGIFCONF is not set -# CONFIG_DECNET_ROUTER is not set -CONFIG_BRIDGE=m -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_BRIDGE_NETFILTER=y - -# -# SCTP Configuration (EXPERIMENTAL) -# -CONFIG_IPV6_SCTP__=m -CONFIG_IP_SCTP=m -# CONFIG_SCTP_DBG_MSG is not set -# CONFIG_SCTP_DBG_OBJCNT is not set -# CONFIG_SCTP_HMAC_NONE is not set -# CONFIG_SCTP_HMAC_SHA1 is not set -CONFIG_SCTP_HMAC_MD5=y -CONFIG_ATM=m -CONFIG_ATM_CLIP=m -CONFIG_ATM_CLIP_NO_ICMP=y -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -# CONFIG_ATM_BR2684_IPFILTER is not set -CONFIG_VLAN_8021Q=m -CONFIG_LLC=y -CONFIG_LLC2=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -# CONFIG_IPDDP_ENCAP is not set -# CONFIG_IPDDP_DECAP is not set -CONFIG_X25=m -CONFIG_LAPB=m -# CONFIG_NET_DIVERT is not set -CONFIG_ECONET=m -CONFIG_ECONET_AUNUDP=y -# CONFIG_ECONET_NATIVE is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -# CONFIG_NET_SCH_CLK_JIFFIES is not set -# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set -CONFIG_NET_SCH_CLK_CPU=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_WRR=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_ATM=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_ESFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -CONFIG_NET_PKTGEN=m -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -CONFIG_ARCNET=m -CONFIG_ARCNET_1201=m -CONFIG_ARCNET_1051=m -CONFIG_ARCNET_RAW=m -CONFIG_ARCNET_COM90xx=m -CONFIG_ARCNET_COM90xxIO=m -CONFIG_ARCNET_RIM_I=m -CONFIG_ARCNET_COM20020=m -CONFIG_ARCNET_COM20020_ISA=m -CONFIG_ARCNET_COM20020_PCI=m -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_IMQ=y -CONFIG_TUN=m -CONFIG_ETHERTAP=m -# CONFIG_NET_SB1000 is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -CONFIG_MII=m -CONFIG_HAPPYMEAL=m -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -# CONFIG_EL3 is not set -CONFIG_3C515=m -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -# CONFIG_LANCE is not set -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -CONFIG_ULTRA=m -# CONFIG_ULTRA32 is not set -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m - -# -# Tulip family network device support -# -CONFIG_NET_TULIP=y -CONFIG_DE2104X=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -CONFIG_TULIP_NAPI=y -CONFIG_TULIP_NAPI_HW_MITIGATION=y -CONFIG_DE4X5=m -CONFIG_WINBOND_840=m -CONFIG_DM9102=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -# CONFIG_AT1700 is not set -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -CONFIG_EWRK3=m -# CONFIG_EEXPRESS is not set -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_ZNET=m -# CONFIG_SEEQ8005 is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_ADAPTEC_STARFIRE_NAPI is not set -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_B44=m -# CONFIG_FORCEDETH is not set -CONFIG_CS89x0=m -CONFIG_DGRS=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_E100_NAPI=y -# CONFIG_LNE390 is not set -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_8139_RXBUF_IDX=2 -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_NET_POCKET=y -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_NET_GIGE=y -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -# CONFIG_R8169_NAPI is not set -CONFIG_SIS190=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m - -# -# Ethernet (10000 Mbit) -# -CONFIG_IXGB=m -# CONFIG_IXGB_NAPI is not set -CONFIG_FDDI=y -# CONFIG_DEFXX is not set -CONFIG_SKFP=m -CONFIG_HIPPI=y -CONFIG_ROADRUNNER=m -# CONFIG_ROADRUNNER_LARGE_RINGS is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y - -# -# Obsolete Wireless cards support (pre-802.11) -# -CONFIG_STRIP=m -# CONFIG_ARLAN is not set -CONFIG_WAVELAN=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_NETWAVE=m - -# -# Wireless 802.11 Frequency Hopping cards support -# -CONFIG_PCMCIA_RAYCS=m - -# -# Wireless 802.11b ISA/PCI cards support -# -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_TMD_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_ATMEL=m -CONFIG_PCI_ATMEL=m - -# -# Wireless 802.11b Pcmcia/Cardbus cards support -# -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_PCMCIA_ATMEL=m -CONFIG_PCMCIA_WL3501=m -CONFIG_NET_WIRELESS=y -CONFIG_PRISM54=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_SKISA=m -CONFIG_PROTEON=m -CONFIG_ABYSS=m -# CONFIG_SMCTR is not set -# CONFIG_NET_FC is not set -CONFIG_SHAPER=m -CONFIG_NETCONSOLE=m -CONFIG_NETPOLL_RX=y -CONFIG_NETPOLL_TRAP=y - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_PCMCIA_AXNET=m -CONFIG_ARCNET_COM20020_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -# CONFIG_ATM_ZATM is not set -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m -CONFIG_ATM_HE=m -CONFIG_ATM_HE_USE_SUNI=y - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y - -# -# Packet Radio protocols -# -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -CONFIG_MKISS=m -CONFIG_6PACK=m -CONFIG_BPQETHER=m -CONFIG_DMASCC=m -CONFIG_SCC=m -# CONFIG_SCC_DELAY is not set -CONFIG_SCC_TRXECHO=y -CONFIG_BAYCOM_SER_FDX=m -CONFIG_BAYCOM_SER_HDX=m -CONFIG_BAYCOM_PAR=m -CONFIG_YAM=m - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y - -# -# IrDA options -# -# CONFIG_IRDA_CACHE_LAST_LSAP is not set -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -CONFIG_IRTTY_SIR=m - -# -# Dongle support -# -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m - -# -# Old SIR device drivers -# -CONFIG_IRPORT_SIR=m - -# -# Old Serial dongle support -# -CONFIG_DONGLE_OLD=y -CONFIG_ESI_DONGLE_OLD=m -CONFIG_ACTISYS_DONGLE_OLD=m -CONFIG_TEKRAM_DONGLE_OLD=m -CONFIG_GIRBIL_DONGLE_OLD=m -CONFIG_LITELINK_DONGLE_OLD=m -CONFIG_MCP2120_DONGLE_OLD=m -CONFIG_OLD_BELKIN_DONGLE_OLD=m -CONFIG_ACT200L_DONGLE_OLD=m -CONFIG_MA600_DONGLE_OLD=m - -# -# FIR device drivers -# -CONFIG_USB_IRDA=m -CONFIG_SIGMATEL_FIR=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m -CONFIG_VIA_FIR=m - -# -# Bluetooth support -# -CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BT_HCIUSB=m -CONFIG_BT_HCIUSB_SCO=y -CONFIG_BT_HCIUART=m -CONFIG_BT_HCIUART_H4=y -CONFIG_BT_HCIUART_BCSP=y -CONFIG_BT_HCIUART_BCSP_TXCRC=y -CONFIG_BT_HCIBCM203X=m -CONFIG_BT_HCIBFUSB=m -CONFIG_BT_HCIDTL1=m -CONFIG_BT_HCIBT3C=m -CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m -CONFIG_BT_HCIVHCI=m - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# Input device support -# -CONFIG_INPUT=y - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_TSDEV=m -CONFIG_INPUT_TSDEV_SCREEN_X=240 -CONFIG_INPUT_TSDEV_SCREEN_Y=320 -CONFIG_INPUT_EVDEV=m -# CONFIG_INPUT_EVBUG is not set - -# -# Input I/O drivers -# -CONFIG_GAMEPORT=m -CONFIG_SOUND_GAMEPORT=m -CONFIG_GAMEPORT_NS558=m -CONFIG_GAMEPORT_L4=m -CONFIG_GAMEPORT_EMU10K1=m -CONFIG_GAMEPORT_VORTEX=m -CONFIG_GAMEPORT_FM801=m -CONFIG_GAMEPORT_CS461x=m -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=m -CONFIG_SERIO_CT82C710=m -CONFIG_SERIO_PARKBD=m -CONFIG_SERIO_PCIPS2=m -CONFIG_SERIO_RAW=m - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -CONFIG_KEYBOARD_SUNKBD=m -CONFIG_KEYBOARD_LKKBD=m -CONFIG_KEYBOARD_XTKBD=m -CONFIG_KEYBOARD_NEWTON=m -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=m -CONFIG_MOUSE_SERIAL=m -CONFIG_MOUSE_INPORT=m -CONFIG_MOUSE_ATIXL=y -CONFIG_MOUSE_LOGIBM=m -CONFIG_MOUSE_PC110PAD=m -CONFIG_MOUSE_VSXXXAA=m -CONFIG_INPUT_JOYSTICK=y -# CONFIG_JOYSTICK_ANALOG is not set -CONFIG_JOYSTICK_A3D=m -CONFIG_JOYSTICK_ADI=m -CONFIG_JOYSTICK_COBRA=m -CONFIG_JOYSTICK_GF2K=m -CONFIG_JOYSTICK_GRIP=m -CONFIG_JOYSTICK_GRIP_MP=m -CONFIG_JOYSTICK_GUILLEMOT=m -CONFIG_JOYSTICK_INTERACT=m -CONFIG_JOYSTICK_SIDEWINDER=m -CONFIG_JOYSTICK_TMDC=m -CONFIG_JOYSTICK_IFORCE=m -CONFIG_JOYSTICK_IFORCE_USB=y -CONFIG_JOYSTICK_IFORCE_232=y -CONFIG_JOYSTICK_WARRIOR=m -CONFIG_JOYSTICK_MAGELLAN=m -CONFIG_JOYSTICK_SPACEORB=m -CONFIG_JOYSTICK_SPACEBALL=m -CONFIG_JOYSTICK_STINGER=m -CONFIG_JOYSTICK_TWIDDLER=m -CONFIG_JOYSTICK_DB9=m -CONFIG_JOYSTICK_GAMECON=m -CONFIG_JOYSTICK_TURBOGRAFX=m -CONFIG_JOYSTICK_JOYDUMP=m -CONFIG_INPUT_JOYDUMP=m -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_TOUCHSCREEN_GUNZE=m -CONFIG_INPUT_MISC=y -CONFIG_INPUT_PCSPKR=m -CONFIG_INPUT_UINPUT=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y -CONFIG_SERIAL_NONSTANDARD=y -# CONFIG_COMPUTONE is not set -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -# CONFIG_DIGIEPCA is not set -# CONFIG_DIGI is not set -# CONFIG_ESPSERIAL is not set -# CONFIG_MOXA_INTELLIO is not set -# CONFIG_MOXA_SMARTIO is not set -# CONFIG_ISI is not set -CONFIG_SYNCLINK=m -CONFIG_SYNCLINKMP=m -CONFIG_N_HDLC=m -# CONFIG_RISCOM8 is not set -# CONFIG_SPECIALIX is not set -# CONFIG_SX is not set -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m - -# -# Serial drivers -# -CONFIG_SERIAL_8250=m -CONFIG_SERIAL_8250_CS=m -CONFIG_SERIAL_8250_NR_UARTS=8 -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_MANY_PORTS=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_SERIAL_8250_DETECT_IRQ=y -CONFIG_SERIAL_8250_MULTIPORT=y -CONFIG_SERIAL_8250_RSA=y - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=m -CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 -CONFIG_PRINTER=m -# CONFIG_LP_CONSOLE is not set -CONFIG_PPDEV=m -# CONFIG_TIPAR is not set - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_QIC02_TAPE=m -CONFIG_QIC02_DYNCONF=y - -# -# Setting runtime QIC-02 configuration is done with qic02conf -# - -# -# from the tpqic02-support package. It is available at -# - -# -# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/ -# - -# -# IPMI -# -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set - -# -# Watchdog Device Drivers -# -CONFIG_SOFT_WATCHDOG=m - -# -# ISA-based Watchdog Cards -# -CONFIG_PCWATCHDOG=m -CONFIG_MIXCOMWD=m -CONFIG_WDT=m -CONFIG_WDT_501=y -CONFIG_WDT_501_FAN=y - -# -# PCI-based Watchdog Cards -# -CONFIG_PCIPCWATCHDOG=m -# CONFIG_WDTPCI is not set - -# -# USB-based Watchdog Cards -# -CONFIG_USBPCWATCHDOG=m -# CONFIG_NVRAM is not set -CONFIG_RTC=m -CONFIG_GEN_RTC=m -CONFIG_GEN_RTC_X=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 - -# -# The compressor will be built as a module only! -# -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -CONFIG_FT_PROC_FS=y -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set - -# -# Hardware configuration -# -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=1 -CONFIG_AGP=m -CONFIG_AGP_ALPHA_CORE=m -CONFIG_DRM=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m - -# -# PCMCIA character devices -# -CONFIG_SYNCLINK_CS=m -CONFIG_RAW_DRIVER=m -CONFIG_MAX_RAW_DEVS=1024 - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_CHARDEV=m - -# -# I2C Algorithms -# -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ALGOPCA=m - -# -# I2C Hardware Bus support -# -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_ELV=m -# CONFIG_I2C_I801 is not set -# CONFIG_I2C_I810 is not set -CONFIG_I2C_ISA=m -# CONFIG_I2C_NFORCE2 is not set -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -CONFIG_SCx200_ACB=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS96X=m -CONFIG_I2C_STUB=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_I2C_VIA is not set -# CONFIG_I2C_VIAPRO is not set -CONFIG_I2C_VOODOO3=m -# CONFIG_I2C_PCA_ISA is not set - -# -# I2C Hardware Sensors Chip support -# -CONFIG_I2C_SENSOR=m -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM1031=m -CONFIG_SENSORS_ASB100=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_FSCHER=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_LM63=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM77=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM83=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM90=m -CONFIG_SENSORS_MAX1619=m -CONFIG_SENSORS_PC87360=m -CONFIG_SENSORS_SMSC47M1=m -# CONFIG_SENSORS_VIA686A is not set -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_W83L785TS=m -CONFIG_SENSORS_W83627HF=m -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_I2C_DEBUG_CHIP is not set - -# -# Dallas's 1-wire bus -# -CONFIG_W1=m -CONFIG_W1_MATROX=m -CONFIG_W1_DS9490=m -CONFIG_W1_DS9490_BRIDGE=m -CONFIG_W1_DS9490R_BRIDGE=m -CONFIG_W1_THERM=m -CONFIG_W1_SMEM=m - -# -# Misc devices -# - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# - -# -# Video Adapters -# -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5246A=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_DC30=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZORAN_LML33R10=m -CONFIG_VIDEO_SAA7134=m -CONFIG_VIDEO_MXB=m -CONFIG_VIDEO_DPC=m -# CONFIG_VIDEO_HEXIUM_ORION is not set -# CONFIG_VIDEO_HEXIUM_GEMINI is not set -CONFIG_VIDEO_CX88=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# Digital Video Broadcasting Devices -# -CONFIG_DVB=y -CONFIG_DVB_CORE=m - -# -# Supported Frontend Modules -# -CONFIG_DVB_TWINHAN_DST=m -CONFIG_DVB_STV0299=m -CONFIG_DVB_SP887X=m -CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc" -CONFIG_DVB_ALPS_TDLB7=m -CONFIG_DVB_ALPS_TDMB7=m -CONFIG_DVB_CX22702=m -CONFIG_DVB_ATMEL_AT76C651=m -CONFIG_DVB_CX24110=m -CONFIG_DVB_TDA8083=m -CONFIG_DVB_TDA80XX=m -CONFIG_DVB_GRUNDIG_29504_491=m -CONFIG_DVB_GRUNDIG_29504_401=m -# CONFIG_DVB_MT312 is not set -CONFIG_DVB_VES1820=m -CONFIG_DVB_VES1X93=m -# CONFIG_DVB_TDA1004X is not set -CONFIG_DVB_NXT6000=m -CONFIG_DVB_MT352=m -CONFIG_DVB_DIB3000MB=m - -# -# Supported SAA7146 based PCI Adapters -# -CONFIG_DVB_AV7110=m -# CONFIG_DVB_AV7110_FIRMWARE is not set -CONFIG_DVB_AV7110_OSD=y -CONFIG_DVB_BUDGET=m -CONFIG_DVB_BUDGET_CI=m -CONFIG_DVB_BUDGET_AV=m -CONFIG_DVB_BUDGET_PATCH=m - -# -# Supported USB Adapters -# -# CONFIG_DVB_TTUSB_BUDGET is not set -# CONFIG_DVB_TTUSB_DEC is not set - -# -# Supported FlexCopII (B2C2) Adapters -# -CONFIG_DVB_B2C2_SKYSTAR=m -CONFIG_DVB_B2C2_USB=m - -# -# Supported BT878 Adapters -# -CONFIG_DVB_BT8XX=m -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -CONFIG_VIDEO_VIDEOBUF=m -CONFIG_VIDEO_TUNER=m -CONFIG_VIDEO_BUF=m -CONFIG_VIDEO_BTCX=m -CONFIG_VIDEO_IR=m - -# -# Graphics support -# -CONFIG_FB=y -CONFIG_FB_PM2=m -CONFIG_FB_PM2_FIFO_DISCONNECT=y -CONFIG_FB_CYBER2000=m -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_TGA is not set -CONFIG_FB_RIVA=m -CONFIG_FB_RIVA_I2C=y -# CONFIG_FB_RIVA_DEBUG is not set -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_RADEON_OLD=m -CONFIG_FB_RADEON=m -CONFIG_FB_RADEON_I2C=y -# CONFIG_FB_RADEON_DEBUG is not set -CONFIG_FB_ATY128=m -CONFIG_FB_ATY=m -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_GENERIC_LCD=y -CONFIG_FB_ATY_GX=y -CONFIG_FB_SAVAGE=m -CONFIG_FB_SAVAGE_I2C=m -CONFIG_FB_SAVAGE_ACCEL=m -# CONFIG_FB_ATY_XL_INIT is not set -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_KYRO=m -CONFIG_FB_3DFX=m -# CONFIG_FB_3DFX_ACCEL is not set -CONFIG_FB_VOODOO1=m -CONFIG_FB_TRIDENT=m -# CONFIG_FB_TRIDENT_ACCEL is not set -CONFIG_FB_VIRTUAL=m - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y -CONFIG_MDA_CONSOLE=m -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_PCI_CONSOLE=y -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Logo configuration -# -CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set -CONFIG_LOGO_LINUX_CLUT224=y -# CONFIG_LOGO_DEC_CLUT224 is not set - -# -# Bootsplash configuration -# -# CONFIG_BOOTSPLASH is not set - -# -# Sound -# -CONFIG_SOUND=m - -# -# Advanced Linux Sound Architecture -# -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_RTCTIMER=m -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set - -# -# Generic devices -# -# CONFIG_SND_DUMMY is not set -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m - -# -# ISA devices -# -CONFIG_SND_AD1816A=m -CONFIG_SND_AD1848=m -CONFIG_SND_CS4231=m -CONFIG_SND_CS4232=m -CONFIG_SND_CS4236=m -CONFIG_SND_ES968=m -CONFIG_SND_ES1688=m -CONFIG_SND_ES18XX=m -CONFIG_SND_GUSCLASSIC=m -CONFIG_SND_GUSEXTREME=m -CONFIG_SND_GUSMAX=m -CONFIG_SND_INTERWAVE=m -CONFIG_SND_INTERWAVE_STB=m -CONFIG_SND_OPTI92X_AD1848=m -CONFIG_SND_OPTI92X_CS4231=m -CONFIG_SND_OPTI93X=m -CONFIG_SND_SB8=m -CONFIG_SND_SB16=m -CONFIG_SND_SBAWE=m -CONFIG_SND_SB16_CSP=y -CONFIG_SND_WAVEFRONT=m -CONFIG_SND_ALS100=m -CONFIG_SND_AZT2320=m -CONFIG_SND_CMI8330=m -CONFIG_SND_DT019X=m -CONFIG_SND_OPL3SA2=m -CONFIG_SND_SGALAXY=m -CONFIG_SND_SSCAPE=m - -# -# PCI devices -# -CONFIG_SND_ALI5451=m -CONFIG_SND_ATIIXP=m -CONFIG_SND_ATIIXP_MODEM=m -CONFIG_SND_AU8810=m -CONFIG_SND_AU8820=m -CONFIG_SND_AU8830=m -CONFIG_SND_AZT3328=m -CONFIG_SND_BT87X=m -CONFIG_SND_BT87X_OVERCLOCK=y -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CS4281=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_KORG1212=m -CONFIG_SND_MIXART=m -CONFIG_SND_NM256=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_HDSP=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_YMFPCI=m -CONFIG_SND_ALS4000=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_FM801=m -CONFIG_SND_FM801_TEA575X=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_INTEL8X0M=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VX222=m - -# -# ALSA USB devices -# -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_USX2Y=m - -# -# PCMCIA devices -# -CONFIG_SND_VXPOCKET=m -CONFIG_SND_VXP440=m -# CONFIG_SND_PDAUDIOCF is not set - -# -# Open Sound System -# -CONFIG_SOUND_PRIME=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -# CONFIG_SOUND_CMPCI_FM is not set -# CONFIG_SOUND_CMPCI_MIDI is not set -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -# CONFIG_SOUND_CMPCI_SPDIFLOOP is not set -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -# CONFIG_MIDI_EMU10K1 is not set -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -# CONFIG_MIDI_VIA82CXXX is not set -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -# CONFIG_SOUND_DMAP is not set -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -# CONFIG_SOUND_GUS16 is not set -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -# CONFIG_MAD16_OLDCARD is not set -CONFIG_SOUND_PAS=m -CONFIG_SOUND_PSS=m -CONFIG_PSS_MIXER=y -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -# CONFIG_SOUND_YMFPCI_LEGACY is not set -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0x0 -# CONFIG_AEDSP16_MSS is not set -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_AD1980=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -CONFIG_USB_BANDWIDTH=y -CONFIG_USB_DYNAMIC_MINORS=y -CONFIG_USB_SUSPEND=y - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_EHCI_SPLIT_ISO=y -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_UHCI_HCD=m -CONFIG_USB_SL811_HCD=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m - -# -# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_HID_FF=y -CONFIG_HID_PID=y -CONFIG_LOGITECH_FF=y -CONFIG_THRUSTMASTER_FF=y -CONFIG_USB_HIDDEV=y - -# -# USB HID Boot Protocol drivers -# -CONFIG_USB_KBD=m -CONFIG_USB_MOUSE=m -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m -CONFIG_USB_MTOUCH=m -CONFIG_USB_XPAD=m -CONFIG_USB_ATI_REMOTE=m -# -# USB Imaging devices -# -CONFIG_USB_MDC800=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# -CONFIG_USB_DABUSB=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_SN9C102=m -CONFIG_USB_STV680=m -CONFIG_USB_W9968CF=m - -# -# USB Network adaptors -# -CONFIG_USB_CATC=m -CONFIG_USB_KAWETH=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_USBNET=m - -# -# USB Host-to-Host Cables -# -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_BELKIN=y -CONFIG_USB_GENESYS=y -CONFIG_USB_NET1080=y -CONFIG_USB_PL2301=y -CONFIG_USB_KC2190=y - -# -# Intelligent USB Devices/Gadgets -# -CONFIG_USB_ARMLINUX=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_ZAURUS=y -CONFIG_USB_CDCETHER=y - -# -# USB Network Adapters -# -CONFIG_USB_AX8817X=y - -# -# USB port drivers -# -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -# CONFIG_USB_SERIAL_WHITEHEAT is not set -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_SAFE=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_EZUSB=y - -# -# USB Miscellaneous drivers -# -CONFIG_USB_EMI62=m -CONFIG_USB_EMI26=m -CONFIG_USB_TIGL=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_RIO500=m -CONFIG_USB_LEGOTOWER=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m -CONFIG_USB_LED=m -CONFIG_USB_SPEEDTOUCH=m -CONFIG_USB_TEST=m - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -CONFIG_MMC=m -# CONFIG_MMC_DEBUG is not set -CONFIG_MMC_BLOCK=m -CONFIG_MMC_WBSD=m - -# -# File systems - -# -CONFIG_EXT2_FS=m -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -CONFIG_FS_POSIX_ACL=y -CONFIG_XFS_FS=m -# CONFIG_XFS_RT is not set -CONFIG_XFS_QUOTA=y -CONFIG_XFS_SECURITY=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_MINIX_FS=m -CONFIG_ROMFS_FS=y -CONFIG_QUOTA=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_QUOTACTL=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_ZISOFS_FS=m -CONFIG_UDF_FS=m - -# -# DOS/FAT/NT Filesystems -# -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_UMSDOS_FS is not set -CONFIG_NTFS_FS=m -# CONFIG_NTFS_DEBUG is not set -CONFIG_NTFS_RW=y - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -# CONFIG_DEVPTS_FS_XATTR is not set -CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y -CONFIG_SUPERMOUNT=m -CONFIG_SUPERMOUNT_DEBUG=n -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -CONFIG_ADFS_FS=m -CONFIG_ADFS_FS_RW=y -CONFIG_AFFS_FS=m -CONFIG_HFS_FS=m -CONFIG_HFSPLUS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EFS_FS=m -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_VXFS_FS=m -CONFIG_HPFS_FS=m -CONFIG_QNX4FS_FS=m -CONFIG_QNX4FS_RW=y -CONFIG_SYSV_FS=m -CONFIG_UFS_FS=m -CONFIG_UFS_FS_WRITE=y - -# -# Network File Systems -# -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_V4=y -CONFIG_NFSD_TCP=y -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=m -CONFIG_SUNRPC=m -CONFIG_SUNRPC_GSS=m -CONFIG_RPCSEC_GSS_KRB5=m -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y -CONFIG_SMB_NLS_REMOTE="iso8859-2" -CONFIG_CIFS=m -CONFIG_CIFS_STATS=n -CONFIG_CIFS_XATTR=y -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_CODA_FS=m -# CONFIG_CODA_FS_OLD_API is not set -CONFIG_INTERMEZZO_FS=m -CONFIG_AFS_FS=m -CONFIG_LOCK_HARNESS=m -CONFIG_GFS_FS=m -CONFIG_LOCK_NOLOCK=m -CONFIG_LOCK_DLM=m -CONFIG_LOCK_GULM=m -CONFIG_RXRPC=m - - -# -# Cluster Support -# -CONFIG_CLUSTER=m -CONFIG_CLUSTER_DLM=m -CONFIG_CLUSTER_DLM_PROCLOCKS=y - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_OSF_PARTITION=y -CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-2" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Kernel hacking -# -CONFIG_ALPHA_LEGACY_START_ADDRESS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_MATHEMU=y -# CONFIG_DEBUG_SLAB is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_RWLOCK is not set -# CONFIG_DEBUG_SEMAPHORE is not set -# CONFIG_DEBUG_INFO is not set - -# -# Security options -# -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_CAPABILITIES=m -CONFIG_SECURITY_ROOTPLUG=m -CONFIG_SECURITY_SECLVL=m -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 -CONFIG_SECURITY_SELINUX_DEVELOP=y -# CONFIG_SECURITY_SELINUX_MLS is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_WHIRLPOOL=m -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_TEST is not set - -# -# Library routines -# -CONFIG_CRC_CCITT=m -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=m -CONFIG_ZLIB_DEFLATE=m -CONFIG_CRYPTO_MICHAEL_MIC=m - -# CONFIG_AGP_INTEL_MCH is not set -# -# Config for 2.6.6-rc2+cset-20040422_0013 -# - -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_AMD8111E_NAPI=y -CONFIG_S2IO=m -# CONFIG_S2IO_NAPI is not set -CONFIG_IPMI_SI=m -CONFIG_I2C_ALI1563=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_CYTHERM=m -CONFIG_USB_PHIDGETKIT=m -# CONFIG_SECURITY_SELINUX_DISABLE is not set - -CONFIG_CRYPTO_CRC32C=m -CONFIG_LIBCRC32C=m - -# CONFIG_ATM_FORE200E_USE_TASKLET is not set - -# for pramfs filesystem - -# CONFIG_PRAMFS is not set -# CONFIG_PRAMFS_NOWP is not set -# CONFIG_ROOT_PRAMFS is not set - -# CONFIG_LIRC_SUPPORT is not set - -CONFIG_SCSI_IPR=m -# CONFIG_SCSI_IPR_TRACE is not set -# CONFIG_SCSI_IPR_DUMP is not set - -CONFIG_SENSORS_RTC8564=m -CONFIG_USB_EGALAX=m -CONFIG_USB_PHIDGETSERVO=m - -CONFIG_KALLSYMS_ALL=y - -# CONFIG_I2O is not set -# CONFIG_FB_ASILIANT is not set -# 2.6.7 + cset-20040620_0609 - -CONFIG_BSD_PROCESS_ACCT_V3=y - -CONFIG_SCSI_3W_9XXX=m - -# CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set - -CONFIG_VIA_VELOCITY=m - -# CONFIG_CIFS_POSIX is not set - -# CONFIG_USB_STORAGE_RW_DETECT is not set - -# 2.6.7 + cset-20040621_0519 - -CONFIG_PREVENT_FIRMWARE_BUILD=y - -CONFIG_FAT_DEFAULT_CODEPAGE=437 -CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" - -CONFIG_NLS_ASCII=m - -CONFIG_VIDEO_OVCAMCHIP=m -# 2.6.7 + cset-20040622_1819 - -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_UB=m - -# CONFIG_BLK_DEV_IDE_SATA is not set - -CONFIG_SCSI_SATA_NV=m - -CONFIG_KALLSYMS_EXTRA_PASS=y -CONFIG_BLK_DEV_GNBD=m -CONFIG_CRC16=m - -# cset-20040701_0409 -CONFIG_CLS_U32_PERF=y -CONFIG_NET_CLS_IND=y -# CONFIG_NET_CLS_ACT is not set -# CONFIG_NET_ACT_POLICE is not set -# CONFIG_FB_CIRRUS is not set -# cset-20040702_0108 -CONFIG_CRYPTO_TEA=m - - -CONFIG_NET_SCH_NETEM=m -CONFIG_BT_HIDP=m +#- +#- *** FILE: arch/Kconfig *** +#- +OPROFILE alpha=m +OPROFILE_IBS alpha=n + +#- +#- *** FILE: arch/alpha/Kconfig *** +#- +ALPHA alpha=y +64BIT alpha=y +MMU alpha=y +RWSEM_XCHGADD_ALGORITHM alpha=y +ARCH_HAS_ILOG2_U32 alpha=n +ARCH_HAS_ILOG2_U64 alpha=n +GENERIC_FIND_NEXT_BIT alpha=y +GENERIC_CALIBRATE_DELAY alpha=y +ZONE_DMA alpha=y +GENERIC_ISA_DMA alpha=y +GENERIC_IOMAP alpha=n +GENERIC_HARDIRQS alpha=y +GENERIC_IRQ_PROBE alpha=y +AUTO_IRQ_AFFINITY alpha=y +#- file init/Kconfig goes here +ALPHA_GENERIC alpha=y +ALPHA_ALCOR alpha=n +ALPHA_XL alpha=n +ALPHA_BOOK1 alpha=n +ALPHA_AVANTI_CH alpha=n +ALPHA_CABRIOLET alpha=n +ALPHA_DP264 alpha=n +ALPHA_EB164 alpha=n +ALPHA_EB64P_CH alpha=n +ALPHA_EB66 alpha=n +ALPHA_EB66P alpha=n +ALPHA_EIGER alpha=n +ALPHA_JENSEN alpha=n +ALPHA_LX164 alpha=n +ALPHA_LYNX alpha=n +ALPHA_MARVEL alpha=n +ALPHA_MIATA alpha=n +ALPHA_MIKASA alpha=n +ALPHA_NAUTILUS alpha=n +ALPHA_NONAME_CH alpha=n +ALPHA_NORITAKE alpha=n +ALPHA_PC164 alpha=n +ALPHA_P2K alpha=n +ALPHA_RAWHIDE alpha=n +ALPHA_RUFFIAN alpha=n +ALPHA_RX164 alpha=n +ALPHA_SX164 alpha=n +ALPHA_SABLE alpha=n +ALPHA_SHARK alpha=n +ALPHA_TAKARA alpha=n +ALPHA_TITAN alpha=n +ALPHA_WILDFIRE alpha=n +ISA alpha=y +ISA_DMA_API alpha=y +PCI_DOMAINS alpha=y +ALPHA_CORE_AGP alpha=y +GENERIC_HWEIGHT alpha=y +ALPHA_BROKEN_IRQ_MASK alpha=y +EISA alpha=y +ARCH_MAY_HAVE_PC_FDC alpha=y +SMP alpha=y +HAVE_DEC_LOCK alpha=y +NR_CPUS alpha=32 +ARCH_DISCONTIGMEM_ENABLE alpha=n +#- file mm/Kconfig goes here +VERBOSE_MCHECK alpha=n +#- file drivers/pci/Kconfig goes here +#- file drivers/eisa/Kconfig goes here +#- file drivers/pcmcia/Kconfig goes here +SRM_ENV alpha=m +#- file fs/Kconfig.binfmt goes here +#- file net/Kconfig goes here +#- file drivers/Kconfig goes here +#- file fs/Kconfig goes here +#- file arch/alpha/Kconfig.debug goes here +#- file kernel/vserver/Kconfig goes here +#- file security/Kconfig goes here +#- file crypto/Kconfig goes here +#- file lib/Kconfig goes here + +#- +#- *** FILE: arch/alpha/Kconfig.debug *** +#- +#- file lib/Kconfig.debug goes here +EARLY_PRINTK alpha=y +ALPHA_LEGACY_START_ADDRESS alpha=y +MATHEMU alpha=y diff --git a/kernel-apparmor-after-grsec_full.patch b/kernel-apparmor-after-grsec_full.patch index 525925b1..72d50c3e 100644 --- a/kernel-apparmor-after-grsec_full.patch +++ b/kernel-apparmor-after-grsec_full.patch @@ -1,17 +1,17 @@ -diff -uprN linux-2.6.27./fs/afs/dir.c linux-2.6.27/fs/afs/dir.c ---- linux-2.6.27./fs/afs/dir.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/afs/dir.c 2008-10-29 14:28:53.282780285 +0100 -@@ -45,6 +45,7 @@ const struct file_operations afs_dir_fil - .release = afs_release, +diff -uprN a/fs/afs/dir.c b/fs/afs/dir.c +--- a/fs/afs/dir.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/afs/dir.c 2009-02-08 13:26:38.882622899 +0000 +@@ -46,6 +46,7 @@ const struct file_operations afs_dir_fil .readdir = afs_readdir, .lock = afs_lock, -+ .fsetattr = afs_fsetattr, + .llseek = generic_file_llseek, ++ .fsetattr = afs_fsetattr, }; const struct inode_operations afs_dir_inode_operations = { -diff -uprN linux-2.6.27./fs/afs/file.c linux-2.6.27/fs/afs/file.c ---- linux-2.6.27./fs/afs/file.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/afs/file.c 2008-10-29 14:28:53.282780285 +0100 +diff -uprN a/fs/afs/file.c b/fs/afs/file.c +--- a/fs/afs/file.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/afs/file.c 2009-02-08 13:26:38.882622899 +0000 @@ -36,6 +36,7 @@ const struct file_operations afs_file_op .fsync = afs_fsync, .lock = afs_lock, @@ -20,9 +20,9 @@ diff -uprN linux-2.6.27./fs/afs/file.c linux-2.6.27/fs/afs/file.c }; const struct inode_operations afs_file_inode_operations = { -diff -uprN linux-2.6.27./fs/afs/inode.c linux-2.6.27/fs/afs/inode.c ---- linux-2.6.27./fs/afs/inode.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/afs/inode.c 2008-10-29 14:28:53.282780285 +0100 +diff -uprN a/fs/afs/inode.c b/fs/afs/inode.c +--- a/fs/afs/inode.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/afs/inode.c 2009-02-08 13:26:38.882622899 +0000 @@ -358,7 +358,8 @@ void afs_clear_inode(struct inode *inode /* * set the attributes of an inode @@ -66,9 +66,9 @@ diff -uprN linux-2.6.27./fs/afs/inode.c linux-2.6.27/fs/afs/inode.c +{ + return afs_do_setattr(file->f_path.dentry, attr, file); +} -diff -uprN linux-2.6.27./fs/afs/internal.h linux-2.6.27/fs/afs/internal.h ---- linux-2.6.27./fs/afs/internal.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/afs/internal.h 2008-10-29 14:28:53.282780285 +0100 +diff -uprN a/fs/afs/internal.h b/fs/afs/internal.h +--- a/fs/afs/internal.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/afs/internal.h 2009-02-08 13:26:38.882622899 +0000 @@ -548,6 +548,7 @@ extern void afs_zap_data(struct afs_vnod extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); @@ -77,9 +77,9 @@ diff -uprN linux-2.6.27./fs/afs/internal.h linux-2.6.27/fs/afs/internal.h extern void afs_clear_inode(struct inode *); /* -diff -uprN linux-2.6.27./fs/attr.c linux-2.6.27/fs/attr.c ---- linux-2.6.27./fs/attr.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/attr.c 2008-10-29 14:28:53.289441230 +0100 +diff -uprN a/fs/attr.c b/fs/attr.c +--- a/fs/attr.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/attr.c 2009-02-08 13:26:38.889289652 +0000 @@ -100,7 +100,8 @@ int inode_setattr(struct inode * inode, } EXPORT_SYMBOL(inode_setattr); @@ -90,40 +90,39 @@ diff -uprN linux-2.6.27./fs/attr.c linux-2.6.27/fs/attr.c { struct inode *inode = dentry->d_inode; mode_t mode = inode->i_mode; -@@ -163,13 +164,28 @@ int notify_change(struct dentry * dentry +@@ -159,7 +160,7 @@ int notify_change(struct dentry * dentry + if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) + return 0; + +- error = security_inode_setattr(dentry, attr); ++ error = security_inode_setattr(dentry, mnt, attr); + if (error) + return error; + +@@ -167,7 +168,21 @@ int notify_change(struct dentry * dentry down_write(&dentry->d_inode->i_alloc_sem); if (inode->i_op && inode->i_op->setattr) { -- error = security_inode_setattr(dentry, attr); -- if (!error) -- error = inode->i_op->setattr(dentry, attr); -+ error = security_inode_setattr(dentry, mnt, attr); -+ if (!error) { -+ if (file && file->f_op && file->f_op->fsetattr) -+ error = file->f_op->fsetattr(file, attr); -+ else { -+ /* External file system still expect to be -+ * passed a file pointer via ia_file and -+ * have it announced via ATTR_FILE. This -+ * just makes it so they don't need to -+ * change their API just for us. External -+ * callers will have set these themselves. */ -+ if (file) { -+ attr->ia_valid |= ATTR_FILE; -+ attr->ia_file = file; -+ } -+ error = inode->i_op->setattr(dentry, attr); +- error = inode->i_op->setattr(dentry, attr); ++ if (file && file->f_op && file->f_op->fsetattr) { ++ error = file->f_op->fsetattr(file, attr); ++ } else { ++ /* External file system still expect to be ++ * passed a file pointer via ia_file and ++ * have it announced via ATTR_FILE. This ++ * just makes it so they don't need to ++ * change their API just for us. External ++ * callers will have set these themselves. */ ++ if (file) { ++ attr->ia_valid |= ATTR_FILE; ++ attr->ia_file = file; + } ++ error = inode->i_op->setattr(dentry, attr); + } } else { error = inode_change_ok(inode, attr); - if (!error) -- error = security_inode_setattr(dentry, attr); -+ error = security_inode_setattr(dentry, mnt, attr); if (!error) { - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) -@@ -187,5 +203,12 @@ int notify_change(struct dentry * dentry +@@ -187,5 +202,12 @@ int notify_change(struct dentry * dentry return error; } @@ -136,10 +135,10 @@ diff -uprN linux-2.6.27./fs/attr.c linux-2.6.27/fs/attr.c +} EXPORT_SYMBOL(notify_change); -diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c ---- linux-2.6.27./fs/dcache.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/dcache.c 2008-10-29 14:28:53.279441727 +0100 -@@ -1897,44 +1897,46 @@ static int prepend_name(char **buffer, i +diff -uprN a/fs/dcache.c b/fs/dcache.c +--- a/fs/dcache.c 2009-02-08 13:22:56.398269260 +0000 ++++ b/fs/dcache.c 2009-02-08 13:26:38.879284319 +0000 +@@ -1908,45 +1908,46 @@ static int prepend_name(char **buffer, i * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: buffer to return value in * @buflen: buffer length @@ -148,7 +147,8 @@ diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c - * Convert a dentry into an ASCII path name. If the entry has been deleted - * the string " (deleted)" is appended. Note that this is ambiguous. - * -- * Returns the buffer or an error code if the path was too long. +- * Returns a pointer into the buffer or an error code if the +- * path was too long. - * - * "buflen" should be positive. Caller holds the dcache_lock. + * Convert a dentry into an ASCII path name. If the entry has been deleted, @@ -207,7 +207,7 @@ diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c if (vfsmnt->mnt_parent == vfsmnt) { goto global_root; } -@@ -1944,27 +1946,51 @@ char *__d_path(const struct path *path, +@@ -1955,27 +1957,51 @@ char *__d_path(const struct path *path, } parent = dentry->d_parent; prefetch(parent); @@ -266,7 +266,7 @@ diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c goto out; } -@@ -2001,10 +2027,8 @@ char *d_path(const struct path *path, ch +@@ -2012,10 +2038,8 @@ char *d_path(const struct path *path, ch root = current->fs->root; path_get(&root); read_unlock(¤t->fs->lock); @@ -278,9 +278,9 @@ diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c path_put(&root); return res; } -@@ -2087,9 +2111,9 @@ Elong: +@@ -2098,9 +2122,9 @@ Elong: */ - asmlinkage long sys_getcwd(char __user *buf, unsigned long size) + SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) { - int error; - struct path pwd, root; @@ -291,7 +291,7 @@ diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c if (!page) return -ENOMEM; -@@ -2101,30 +2125,20 @@ asmlinkage long sys_getcwd(char __user * +@@ -2112,30 +2136,20 @@ SYSCALL_DEFINE2(getcwd, char __user *, b path_get(&root); read_unlock(¤t->fs->lock); @@ -334,9 +334,9 @@ diff -uprN linux-2.6.27./fs/dcache.c linux-2.6.27/fs/dcache.c out: path_put(&pwd); -diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c ---- linux-2.6.27./fs/ecryptfs/inode.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/ecryptfs/inode.c 2008-10-29 14:28:53.242566343 +0100 +diff -uprN a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c +--- a/fs/ecryptfs/inode.c 2009-02-08 13:22:56.398269260 +0000 ++++ b/fs/ecryptfs/inode.c 2009-02-08 13:26:38.839289678 +0000 @@ -403,19 +403,24 @@ static int ecryptfs_link(struct dentry * struct dentry *new_dentry) { @@ -364,7 +364,7 @@ diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c if (rc || !lower_new_dentry->d_inode) goto out_lock; rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); -@@ -440,11 +445,12 @@ static int ecryptfs_unlink(struct inode +@@ -474,11 +474,12 @@ { int rc = 0; struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); @@ -394,12 +394,12 @@ diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c lower_dir_dentry = lock_parent(lower_dentry); encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, strlen(symname), -@@ -479,7 +487,7 @@ static int ecryptfs_symlink(struct inode - rc = encoded_symlen; +@@ -517,7 +518,7 @@ + strlen(symname)); + if (rc) goto out_lock; - } - rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, -+ rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, ++ rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, lower_mnt encoded_symname); kfree(encoded_symname); if (rc || !lower_dentry->d_inode) @@ -479,7 +479,7 @@ diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c if (rc) goto out_lock; fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); -@@ -849,6 +870,7 @@ static int ecryptfs_setattr(struct dentr +@@ -850,6 +871,7 @@ static int ecryptfs_setattr(struct dentr { int rc = 0; struct dentry *lower_dentry; @@ -487,7 +487,7 @@ diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c struct inode *inode; struct inode *lower_inode; struct ecryptfs_crypt_stat *crypt_stat; -@@ -859,6 +881,7 @@ static int ecryptfs_setattr(struct dentr +@@ -860,6 +882,7 @@ static int ecryptfs_setattr(struct dentr inode = dentry->d_inode; lower_inode = ecryptfs_inode_to_lower(inode); lower_dentry = ecryptfs_dentry_to_lower(dentry); @@ -495,7 +495,7 @@ diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c mutex_lock(&crypt_stat->cs_mutex); if (S_ISDIR(dentry->d_inode->i_mode)) crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); -@@ -910,7 +933,7 @@ static int ecryptfs_setattr(struct dentr +@@ -911,7 +934,7 @@ static int ecryptfs_setattr(struct dentr ia->ia_valid &= ~ATTR_MODE; mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -504,10 +504,10 @@ diff -uprN linux-2.6.27./fs/ecryptfs/inode.c linux-2.6.27/fs/ecryptfs/inode.c mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode, NULL); -diff -uprN linux-2.6.27./fs/exec.c linux-2.6.27/fs/exec.c ---- linux-2.6.27./fs/exec.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/exec.c 2008-10-29 14:28:53.172364339 +0100 -@@ -1827,7 +1827,8 @@ int do_coredump(long signr, int exit_cod +diff -uprN a/fs/exec.c b/fs/exec.c +--- a/fs/exec.c 2009-02-08 13:22:56.401605319 +0000 ++++ b/fs/exec.c 2009-02-08 13:26:38.765741762 +0000 +@@ -1829,7 +1829,8 @@ int do_coredump(long signr, int exit_cod goto close_fail; if (!file->f_op->write) goto close_fail; @@ -517,10 +517,10 @@ diff -uprN linux-2.6.27./fs/exec.c linux-2.6.27/fs/exec.c goto close_fail; retval = binfmt->core_dump(signr, regs, file, core_limit); -diff -uprN linux-2.6.27./fs/fat/file.c linux-2.6.27/fs/fat/file.c ---- linux-2.6.27./fs/fat/file.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/fat/file.c 2008-10-29 14:28:53.175696536 +0100 -@@ -98,7 +98,7 @@ int fat_generic_ioctl(struct inode *inod +diff -uprN a/fs/fat/file.c b/fs/fat/file.c +--- a/fs/fat/file.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/fat/file.c 2009-02-08 13:26:38.769075001 +0000 +@@ -93,7 +93,7 @@ int fat_generic_ioctl(struct inode *inod * out the RO attribute for checking by the security * module, just because it maps to a file mode. */ @@ -529,9 +529,9 @@ diff -uprN linux-2.6.27./fs/fat/file.c linux-2.6.27/fs/fat/file.c if (err) goto up; -diff -uprN linux-2.6.27./fs/fuse/dir.c linux-2.6.27/fs/fuse/dir.c ---- linux-2.6.27./fs/fuse/dir.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/fuse/dir.c 2008-10-29 14:28:53.282780285 +0100 +diff -uprN a/fs/fuse/dir.c b/fs/fuse/dir.c +--- a/fs/fuse/dir.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/fuse/dir.c 2009-02-08 13:26:38.882622899 +0000 @@ -1105,21 +1105,22 @@ static int fuse_dir_fsync(struct file *f return file ? fuse_fsync_common(file, de, datasync, 1) : 0; } @@ -599,10 +599,10 @@ diff -uprN linux-2.6.27./fs/fuse/dir.c linux-2.6.27/fs/fuse/dir.c } static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, -diff -uprN linux-2.6.27./fs/fuse/file.c linux-2.6.27/fs/fuse/file.c ---- linux-2.6.27./fs/fuse/file.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/fuse/file.c 2008-10-29 14:28:53.282780285 +0100 -@@ -1465,6 +1465,11 @@ static loff_t fuse_file_llseek(struct fi +diff -uprN a/fs/fuse/file.c b/fs/fuse/file.c +--- a/fs/fuse/file.c 2009-02-08 13:22:56.404930729 +0000 ++++ b/fs/fuse/file.c 2009-02-08 13:26:38.882622899 +0000 +@@ -1470,6 +1470,11 @@ static loff_t fuse_file_llseek(struct fi return retval; } @@ -614,7 +614,7 @@ diff -uprN linux-2.6.27./fs/fuse/file.c linux-2.6.27/fs/fuse/file.c static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read = do_sync_read, -@@ -1478,6 +1483,7 @@ static const struct file_operations fuse +@@ -1483,6 +1488,7 @@ static const struct file_operations fuse .fsync = fuse_fsync, .lock = fuse_file_lock, .flock = fuse_file_flock, @@ -622,18 +622,18 @@ diff -uprN linux-2.6.27./fs/fuse/file.c linux-2.6.27/fs/fuse/file.c .splice_read = generic_file_splice_read, }; -@@ -1491,6 +1497,7 @@ static const struct file_operations fuse - .fsync = fuse_fsync, - .lock = fuse_file_lock, - .flock = fuse_file_flock, +@@ -1496,6 +1502,7 @@ static const struct file_operations fuse + .unlocked_ioctl = fuse_file_ioctl, + .compat_ioctl = fuse_file_compat_ioctl, + .poll = fuse_file_poll, + .fsetattr = fuse_fsetattr, /* no mmap and splice_read */ }; -diff -uprN linux-2.6.27./fs/fuse/fuse_i.h linux-2.6.27/fs/fuse/fuse_i.h ---- linux-2.6.27./fs/fuse/fuse_i.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/fuse/fuse_i.h 2008-10-29 14:28:53.282780285 +0100 -@@ -551,6 +551,10 @@ void fuse_truncate(struct address_space +diff -uprN a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h +--- a/fs/fuse/fuse_i.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/fuse/fuse_i.h 2009-02-08 13:26:38.882622899 +0000 +@@ -554,6 +554,10 @@ void fuse_truncate(struct address_space */ int fuse_dev_init(void); @@ -644,9 +644,9 @@ diff -uprN linux-2.6.27./fs/fuse/fuse_i.h linux-2.6.27/fs/fuse/fuse_i.h /** * Cleanup the client device */ -diff -uprN linux-2.6.27./fs/hpfs/namei.c linux-2.6.27/fs/hpfs/namei.c ---- linux-2.6.27./fs/hpfs/namei.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/hpfs/namei.c 2008-10-29 14:28:53.172364339 +0100 +diff -uprN a/fs/hpfs/namei.c b/fs/hpfs/namei.c +--- a/fs/hpfs/namei.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/hpfs/namei.c 2009-02-08 13:26:38.765741762 +0000 @@ -426,7 +426,7 @@ again: /*printk("HPFS: truncating file before delete.\n");*/ newattrs.ia_size = 0; @@ -656,9 +656,9 @@ diff -uprN linux-2.6.27./fs/hpfs/namei.c linux-2.6.27/fs/hpfs/namei.c put_write_access(inode); if (!err) goto again; -diff -uprN linux-2.6.27./fs/inotify_user.c linux-2.6.27/fs/inotify_user.c ---- linux-2.6.27./fs/inotify_user.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/inotify_user.c 2008-10-29 14:28:53.306108405 +0100 +diff -uprN a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c +--- a/fs/notify/inotify/inotify_user.c 2009-02-08 13:22:56.408263679 +0000 ++++ b/fs/notify/inotify/inotify_user.c 2009-02-08 13:26:38.905951221 +0000 @@ -372,7 +372,7 @@ static int find_inode(const char __user if (error) return error; @@ -668,10 +668,10 @@ diff -uprN linux-2.6.27./fs/inotify_user.c linux-2.6.27/fs/inotify_user.c if (error) path_put(path); return error; -diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c ---- linux-2.6.27./fs/namei.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/namei.c 2008-10-29 14:28:53.309441302 +0100 -@@ -227,7 +227,7 @@ int generic_permission(struct inode *ino +diff -uprN a/fs/namei.c b/fs/namei.c +--- a/fs/namei.c 2009-02-08 13:22:56.411597278 +0000 ++++ b/fs/namei.c 2009-02-08 13:26:38.909283473 +0000 +@@ -226,7 +226,7 @@ int generic_permission(struct inode *ino return -EACCES; } @@ -680,7 +680,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int retval; -@@ -269,7 +269,12 @@ int inode_permission(struct inode *inode +@@ -256,7 +256,12 @@ int inode_permission(struct inode *inode if (retval) return retval; @@ -694,7 +694,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (retval) return retval; -@@ -277,6 +282,15 @@ int inode_permission(struct inode *inode +@@ -264,6 +269,15 @@ int inode_permission(struct inode *inode mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); } @@ -710,7 +710,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c /** * vfs_permission - check for access rights to a given path * @nd: lookup result that describes the path -@@ -289,7 +303,7 @@ int inode_permission(struct inode *inode +@@ -276,7 +290,7 @@ int inode_permission(struct inode *inode */ int vfs_permission(struct nameidata *nd, int mask) { @@ -719,7 +719,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c } /** -@@ -306,7 +320,7 @@ int vfs_permission(struct nameidata *nd, +@@ -293,7 +307,7 @@ int vfs_permission(struct nameidata *nd, */ int file_permission(struct file *file, int mask) { @@ -728,7 +728,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c } /* -@@ -447,8 +461,9 @@ static struct dentry * cached_lookup(str +@@ -434,8 +448,9 @@ static struct dentry * cached_lookup(str * short-cut DAC fails, then call permission() to do more * complete permission check. */ @@ -739,7 +739,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c umode_t mode = inode->i_mode; if (inode->i_op && inode->i_op->permission) -@@ -473,7 +488,7 @@ static int exec_permission_lite(struct i +@@ -460,7 +475,7 @@ static int exec_permission_lite(struct i return -EACCES; ok: @@ -748,7 +748,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c } /* -@@ -870,7 +885,7 @@ static int __link_path_walk(const char * +@@ -857,7 +872,7 @@ static int __link_path_walk(const char * unsigned int c; nd->flags |= LOOKUP_CONTINUE; @@ -757,7 +757,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (err == -EAGAIN) err = vfs_permission(nd, MAY_EXEC); if (err) -@@ -1065,24 +1080,21 @@ static int do_path_lookup(int dfd, const +@@ -1052,24 +1067,21 @@ static int do_path_lookup(int dfd, const path_get(&fs->pwd); read_unlock(&fs->lock); } else { @@ -784,7 +784,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c path_get(&file->f_path); fput_light(file, fput_needed); -@@ -1245,7 +1257,7 @@ static struct dentry *lookup_hash(struct +@@ -1216,7 +1228,7 @@ static struct dentry *lookup_hash(struct { int err; @@ -793,7 +793,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (err) return ERR_PTR(err); return __lookup_hash(&nd->last, nd->path.dentry, nd); -@@ -1512,7 +1524,7 @@ int vfs_create(struct inode *dir, struct +@@ -1481,7 +1493,7 @@ int vfs_create(struct inode *dir, struct return -EACCES; /* shouldn't it be ENOSYS? */ mode &= S_IALLUGO; mode |= S_IFREG; @@ -802,7 +802,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; DQUOT_INIT(dir); -@@ -1588,7 +1600,7 @@ int may_open(struct nameidata *nd, int a +@@ -1557,7 +1569,7 @@ int may_open(struct nameidata *nd, int a if (!error) { DQUOT_INIT(inode); @@ -811,7 +811,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, NULL); } -@@ -1945,7 +1957,8 @@ fail: +@@ -1924,7 +1936,8 @@ fail: } EXPORT_SYMBOL_GPL(lookup_create); @@ -821,7 +821,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error = may_create(dir, dentry); -@@ -1962,7 +1975,7 @@ int vfs_mknod(struct inode *dir, struct +@@ -1941,7 +1954,7 @@ int vfs_mknod(struct inode *dir, struct if (error) return error; @@ -830,7 +830,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; -@@ -2023,11 +2036,12 @@ asmlinkage long sys_mknodat(int dfd, con +@@ -2002,11 +2015,12 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: @@ -846,7 +846,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c break; } mnt_drop_write(nd.path.mnt); -@@ -2046,7 +2060,8 @@ asmlinkage long sys_mknod(const char __u +@@ -2025,7 +2039,8 @@ SYSCALL_DEFINE3(mknod, const char __user return sys_mknodat(AT_FDCWD, filename, mode, dev); } @@ -856,7 +856,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error = may_create(dir, dentry); -@@ -2057,7 +2072,7 @@ int vfs_mkdir(struct inode *dir, struct +@@ -2036,7 +2051,7 @@ int vfs_mkdir(struct inode *dir, struct return -EPERM; mode &= (S_IRWXUGO|S_ISVTX); @@ -865,16 +865,16 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; -@@ -2089,7 +2104,7 @@ asmlinkage long sys_mkdirat(int dfd, con +@@ -2068,7 +2083,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; - error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.path.dentry->d_inode, dentry, nd.path.mnt, mode); mnt_drop_write(nd.path.mnt); - out_dput: - dput(dentry); -@@ -2133,7 +2148,7 @@ void dentry_unhash(struct dentry *dentry + + if (!error) +@@ -2112,7 +2127,7 @@ void dentry_unhash(struct dentry *dentry spin_unlock(&dcache_lock); } @@ -883,7 +883,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error = may_delete(dir, dentry, 1); -@@ -2143,6 +2158,10 @@ int vfs_rmdir(struct inode *dir, struct +@@ -2122,6 +2137,10 @@ int vfs_rmdir(struct inode *dir, struct if (!dir->i_op || !dir->i_op->rmdir) return -EPERM; @@ -894,7 +894,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c DQUOT_INIT(dir); mutex_lock(&dentry->d_inode->i_mutex); -@@ -2150,12 +2169,9 @@ int vfs_rmdir(struct inode *dir, struct +@@ -2129,12 +2148,9 @@ int vfs_rmdir(struct inode *dir, struct if (d_mountpoint(dentry)) error = -EBUSY; else { @@ -910,16 +910,16 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c } mutex_unlock(&dentry->d_inode->i_mutex); if (!error) { -@@ -2196,7 +2212,7 @@ static long do_rmdir(int dfd, const char +@@ -2178,7 +2194,7 @@ static long do_rmdir(int dfd, const char error = mnt_want_write(nd.path.mnt); if (error) goto exit3; - error = vfs_rmdir(nd.path.dentry->d_inode, dentry); + error = vfs_rmdir(nd.path.dentry->d_inode, dentry, nd.path.mnt); mnt_drop_write(nd.path.mnt); - exit3: - dput(dentry); -@@ -2213,7 +2229,7 @@ asmlinkage long sys_rmdir(const char __u + if (!error && (saved_dev || saved_ino)) + gr_handle_delete(saved_ino, saved_dev); +@@ -2195,7 +2211,7 @@ SYSCALL_DEFINE1(rmdir, const char __user return do_rmdir(AT_FDCWD, pathname); } @@ -928,7 +928,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error = may_delete(dir, dentry, 0); -@@ -2229,7 +2245,7 @@ int vfs_unlink(struct inode *dir, struct +@@ -2211,7 +2227,7 @@ int vfs_unlink(struct inode *dir, struct if (d_mountpoint(dentry)) error = -EBUSY; else { @@ -937,16 +937,16 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (!error) error = dir->i_op->unlink(dir, dentry); } -@@ -2278,7 +2294,7 @@ static long do_unlinkat(int dfd, const c +@@ -2263,7 +2279,7 @@ static long do_unlinkat(int dfd, const c error = mnt_want_write(nd.path.mnt); if (error) goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, dentry); + error = vfs_unlink(nd.path.dentry->d_inode, dentry, nd.path.mnt); + if (!error && (saved_ino || saved_dev)) + gr_handle_delete(saved_ino, saved_dev); mnt_drop_write(nd.path.mnt); - exit2: - dput(dentry); -@@ -2313,7 +2329,8 @@ asmlinkage long sys_unlink(const char __ +@@ -2298,7 +2314,8 @@ SYSCALL_DEFINE1(unlink, const char __use return do_unlinkat(AT_FDCWD, pathname); } @@ -956,7 +956,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error = may_create(dir, dentry); -@@ -2323,7 +2340,7 @@ int vfs_symlink(struct inode *dir, struc +@@ -2308,7 +2325,7 @@ int vfs_symlink(struct inode *dir, struc if (!dir->i_op || !dir->i_op->symlink) return -EPERM; @@ -965,16 +965,16 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; -@@ -2359,7 +2376,7 @@ asmlinkage long sys_symlinkat(const char +@@ -2344,7 +2361,7 @@ SYSCALL_DEFINE3(symlinkat, const char __ error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; - error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); + error = vfs_symlink(nd.path.dentry->d_inode, dentry, nd.path.mnt, from); + if (!error) + gr_handle_create(dentry, nd.path.mnt); mnt_drop_write(nd.path.mnt); - out_dput: - dput(dentry); -@@ -2377,7 +2394,7 @@ asmlinkage long sys_symlink(const char _ +@@ -2362,7 +2379,7 @@ SYSCALL_DEFINE2(symlink, const char __us return sys_symlinkat(oldname, AT_FDCWD, newname); } @@ -983,7 +983,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { struct inode *inode = old_dentry->d_inode; int error; -@@ -2402,7 +2419,8 @@ int vfs_link(struct dentry *old_dentry, +@@ -2387,7 +2404,8 @@ int vfs_link(struct dentry *old_dentry, if (S_ISDIR(inode->i_mode)) return -EPERM; @@ -993,7 +993,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; -@@ -2456,7 +2474,9 @@ asmlinkage long sys_linkat(int olddfd, c +@@ -2440,7 +2458,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, con error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; @@ -1001,10 +1001,10 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c + error = vfs_link(old_path.dentry, old_path.mnt, + nd.path.dentry->d_inode, + new_dentry, nd.path.mnt); + if (!error) + gr_handle_create(new_dentry, nd.path.mnt); mnt_drop_write(nd.path.mnt); - out_dput: - dput(new_dentry); -@@ -2509,7 +2529,8 @@ asmlinkage long sys_link(const char __us +@@ -2493,7 +2513,8 @@ SYSCALL_DEFINE2(link, const char __user * locking]. */ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, @@ -1014,7 +1014,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error = 0; struct inode *target; -@@ -2524,7 +2545,8 @@ static int vfs_rename_dir(struct inode * +@@ -2508,7 +2529,8 @@ static int vfs_rename_dir(struct inode * return error; } @@ -1024,7 +1024,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; -@@ -2552,12 +2574,14 @@ static int vfs_rename_dir(struct inode * +@@ -2536,12 +2558,14 @@ static int vfs_rename_dir(struct inode * } static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, @@ -1041,7 +1041,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c if (error) return error; -@@ -2580,7 +2604,8 @@ static int vfs_rename_other(struct inode +@@ -2564,7 +2588,8 @@ static int vfs_rename_other(struct inode } int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -1051,7 +1051,7 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c { int error; int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); -@@ -2609,9 +2634,11 @@ int vfs_rename(struct inode *old_dir, st +@@ -2593,9 +2618,11 @@ int vfs_rename(struct inode *old_dir, st old_name = fsnotify_oldname_init(old_dentry->d_name.name); if (is_dir) @@ -1062,10 +1062,10 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c - error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); + error = vfs_rename_other(old_dir, old_dentry, old_mnt, + new_dir, new_dentry, new_mnt); + if (!error) { const char *new_name = old_dentry->d_name.name; - fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, -@@ -2688,8 +2715,8 @@ asmlinkage long sys_renameat(int olddfd, +@@ -2676,8 +2703,8 @@ SYSCALL_DEFINE4(renameat, int, olddfd, c error = mnt_want_write(oldnd.path.mnt); if (error) goto exit5; @@ -1073,21 +1073,21 @@ diff -uprN linux-2.6.27./fs/namei.c linux-2.6.27/fs/namei.c - new_dir->d_inode, new_dentry); + error = vfs_rename(old_dir->d_inode, old_dentry, oldnd.path.mnt, + new_dir->d_inode, new_dentry, newnd.path.mnt); - mnt_drop_write(oldnd.path.mnt); - exit5: - dput(new_dentry); -@@ -2857,6 +2884,7 @@ EXPORT_SYMBOL(page_symlink_inode_operati - EXPORT_SYMBOL(path_lookup); + if (!error) + gr_handle_rename(old_dir->d_inode, new_dir->d_inode, old_dentry, + new_dentry, oldnd.path.mnt, new_dentry->d_inode ? 1 : 0); +@@ -2851,6 +2878,7 @@ EXPORT_SYMBOL(path_lookup); + EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); +EXPORT_SYMBOL(path_permission); EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); EXPORT_SYMBOL(unlock_rename); -diff -uprN linux-2.6.27./fs/namespace.c linux-2.6.27/fs/namespace.c ---- linux-2.6.27./fs/namespace.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/namespace.c 2008-10-29 14:28:53.279441727 +0100 -@@ -2352,3 +2352,33 @@ void __put_mnt_ns(struct mnt_namespace * +diff -uprN a/fs/namespace.c b/fs/namespace.c +--- a/fs/namespace.c 2009-02-08 13:22:56.411597278 +0000 ++++ b/fs/namespace.c 2009-02-08 13:26:38.935950875 +0000 +@@ -2348,3 +2348,33 @@ void __put_mnt_ns(struct mnt_namespace * release_mounts(&umount_list); kfree(ns); } @@ -1104,7 +1104,7 @@ diff -uprN linux-2.6.27./fs/namespace.c linux-2.6.27/fs/namespace.c + path_get(¤t->fs->root); + read_unlock(¤t->fs->lock); + spin_lock(&vfsmount_lock); -+ if (root.mnt) ++ if (root.mnt && root.mnt->mnt_ns) + ns_root.mnt = mntget(root.mnt->mnt_ns->root); + if (ns_root.mnt) + ns_root.dentry = dget(ns_root.mnt->mnt_root); @@ -1121,17 +1121,17 @@ diff -uprN linux-2.6.27./fs/namespace.c linux-2.6.27/fs/namespace.c + return res; +} +EXPORT_SYMBOL(d_namespace_path); -diff -uprN linux-2.6.27./fs/nfsd/nfs4recover.c linux-2.6.27/fs/nfsd/nfs4recover.c ---- linux-2.6.27./fs/nfsd/nfs4recover.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/nfsd/nfs4recover.c 2008-10-29 14:28:53.232566922 +0100 +diff -uprN a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c +--- a/fs/nfsd/nfs4recover.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/nfsd/nfs4recover.c 2009-02-08 13:26:38.829075718 +0000 @@ -158,7 +158,8 @@ nfsd4_create_clid_dir(struct nfs4_client - status = mnt_want_write(rec_dir.path.mnt); + status = mnt_want_write(rec_dir.mnt); if (status) goto out_put; -- status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); -+ status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, -+ rec_dir.path.mnt, S_IRWXU); - mnt_drop_write(rec_dir.path.mnt); +- status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); ++ status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, rec_dir.mnt, ++ S_IRWXU); + mnt_drop_write(rec_dir.mnt); out_put: dput(dentry); @@ -263,7 +264,7 @@ nfsd4_remove_clid_file(struct dentry *di @@ -1139,7 +1139,7 @@ diff -uprN linux-2.6.27./fs/nfsd/nfs4recover.c linux-2.6.27/fs/nfsd/nfs4recover. } mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_unlink(dir->d_inode, dentry); -+ status = vfs_unlink(dir->d_inode, dentry, rec_dir.path.mnt); ++ status = vfs_unlink(dir->d_inode, dentry, rec_dir.mnt); mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -1148,14 +1148,14 @@ diff -uprN linux-2.6.27./fs/nfsd/nfs4recover.c linux-2.6.27/fs/nfsd/nfs4recover. nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_rmdir(dir->d_inode, dentry); -+ status = vfs_rmdir(dir->d_inode, dentry, rec_dir.path.mnt); ++ status = vfs_rmdir(dir->d_inode, dentry, rec_dir.mnt); mutex_unlock(&dir->d_inode->i_mutex); return status; } -diff -uprN linux-2.6.27./fs/nfsd/nfs4xdr.c linux-2.6.27/fs/nfsd/nfs4xdr.c ---- linux-2.6.27./fs/nfsd/nfs4xdr.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/nfsd/nfs4xdr.c 2008-10-29 14:28:53.256107551 +0100 -@@ -1446,7 +1446,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s +diff -uprN a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +--- a/fs/nfsd/nfs4xdr.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/nfsd/nfs4xdr.c 2009-02-08 13:26:38.855950336 +0000 +@@ -1458,7 +1458,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s } if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_SUPPORTED_ATTRS)) { @@ -1164,10 +1164,10 @@ diff -uprN linux-2.6.27./fs/nfsd/nfs4xdr.c linux-2.6.27/fs/nfsd/nfs4xdr.c aclsupport = (err == 0); if (bmval0 & FATTR4_WORD0_ACL) { if (err == -EOPNOTSUPP) -diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c ---- linux-2.6.27./fs/nfsd/vfs.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/nfsd/vfs.c 2008-10-29 14:28:53.289441230 +0100 -@@ -388,7 +388,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str +diff -uprN a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +--- a/fs/nfsd/vfs.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/nfsd/vfs.c 2009-02-08 13:26:38.892617808 +0000 +@@ -387,7 +387,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str err = nfserr_notsync; if (!check_guard || guardtime == inode->i_ctime.tv_sec) { fh_lock(fhp); @@ -1176,7 +1176,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c err = nfserrno(host_err); fh_unlock(fhp); } -@@ -408,11 +408,12 @@ out_nfserr: +@@ -407,12 +407,13 @@ out_nfserr: #if defined(CONFIG_NFSD_V2_ACL) || \ defined(CONFIG_NFSD_V3_ACL) || \ defined(CONFIG_NFSD_V4) @@ -1185,20 +1185,23 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c + char *key, void **buf) { ssize_t buflen; + ssize_t ret; - buflen = vfs_getxattr(dentry, key, NULL, 0); + buflen = vfs_getxattr(dentry, mnt, key, NULL, 0, NULL); if (buflen <= 0) return buflen; -@@ -420,13 +421,14 @@ static ssize_t nfsd_getxattr(struct dent +@@ -420,7 +421,7 @@ static ssize_t nfsd_getxattr(struct dent if (!*buf) return -ENOMEM; -- return vfs_getxattr(dentry, key, *buf, buflen); -+ return vfs_getxattr(dentry, mnt, key, *buf, buflen, NULL); - } - #endif +- ret = vfs_getxattr(dentry, key, *buf, buflen); ++ ret = vfs_getxattr(dentry, mnt, key, *buf, buflen, NULL); + if (ret < 0) + kfree(*buf); + return ret; +@@ -429,7 +430,8 @@ static ssize_t nfsd_getxattr(struct dent #if defined(CONFIG_NFSD_V4) static int @@ -1208,7 +1211,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c { int len; size_t buflen; -@@ -445,7 +447,7 @@ set_nfsv4_acl_one(struct dentry *dentry, +@@ -448,7 +450,7 @@ set_nfsv4_acl_one(struct dentry *dentry, goto out; } @@ -1217,7 +1220,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c out: kfree(buf); return error; -@@ -458,6 +460,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst +@@ -461,6 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst __be32 error; int host_error; struct dentry *dentry; @@ -1225,7 +1228,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c struct inode *inode; struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; -@@ -468,6 +471,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst +@@ -471,6 +474,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst return error; dentry = fhp->fh_dentry; @@ -1233,7 +1236,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c inode = dentry->d_inode; if (S_ISDIR(inode->i_mode)) flags = NFS4_ACL_DIR; -@@ -478,12 +482,14 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst +@@ -481,12 +485,14 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst } else if (host_error < 0) goto out_nfserr; @@ -1250,7 +1253,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c out_release: posix_acl_release(pacl); -@@ -496,13 +502,13 @@ out_nfserr: +@@ -499,13 +505,13 @@ out_nfserr: } static struct posix_acl * @@ -1266,7 +1269,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (!buflen) buflen = -ENODATA; if (buflen <= 0) -@@ -514,14 +520,15 @@ _get_posix_acl(struct dentry *dentry, ch +@@ -517,14 +523,15 @@ _get_posix_acl(struct dentry *dentry, ch } int @@ -1284,7 +1287,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(pacl)) { -@@ -531,7 +538,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqst +@@ -534,7 +541,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqst } if (S_ISDIR(inode->i_mode)) { @@ -1293,7 +1296,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) dpacl = NULL; else if (IS_ERR(dpacl)) { -@@ -944,13 +951,13 @@ out: +@@ -947,13 +954,13 @@ out: return err; } @@ -1309,7 +1312,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c mutex_unlock(&dentry->d_inode->i_mutex); } -@@ -1009,7 +1016,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s +@@ -1012,7 +1019,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s /* clear setuid/setgid flag after write */ if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) @@ -1318,7 +1321,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (host_err >= 0 && stable) { static ino_t last_ino; -@@ -1187,6 +1194,7 @@ nfsd_create(struct svc_rqst *rqstp, stru +@@ -1190,6 +1197,7 @@ nfsd_create(struct svc_rqst *rqstp, stru int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; @@ -1326,7 +1329,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c struct inode *dirp; __be32 err; __be32 err2; -@@ -1204,6 +1212,7 @@ nfsd_create(struct svc_rqst *rqstp, stru +@@ -1207,6 +1215,7 @@ nfsd_create(struct svc_rqst *rqstp, stru goto out; dentry = fhp->fh_dentry; @@ -1334,7 +1337,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c dirp = dentry->d_inode; err = nfserr_notdir; -@@ -1220,7 +1229,7 @@ nfsd_create(struct svc_rqst *rqstp, stru +@@ -1223,7 +1232,7 @@ nfsd_create(struct svc_rqst *rqstp, stru host_err = PTR_ERR(dchild); if (IS_ERR(dchild)) goto out_nfserr; @@ -1343,7 +1346,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (err) goto out; } else { -@@ -1270,13 +1279,14 @@ nfsd_create(struct svc_rqst *rqstp, stru +@@ -1273,13 +1282,14 @@ nfsd_create(struct svc_rqst *rqstp, stru host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: @@ -1360,7 +1363,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c break; } if (host_err < 0) { -@@ -1284,7 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, stru +@@ -1287,7 +1297,7 @@ nfsd_create(struct svc_rqst *rqstp, stru goto out_nfserr; } @@ -1369,7 +1372,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c err = nfserrno(nfsd_sync_dir(dentry)); write_inode_now(dchild->d_inode, 1); } -@@ -1514,6 +1524,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str +@@ -1517,6 +1527,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str struct iattr *iap) { struct dentry *dentry, *dnew; @@ -1377,7 +1380,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c __be32 err, cerr; int host_err; -@@ -1538,6 +1549,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str +@@ -1541,6 +1552,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str if (host_err) goto out_nfserr; @@ -1385,7 +1388,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (unlikely(path[plen] != 0)) { char *path_alloced = kmalloc(plen+1, GFP_KERNEL); if (path_alloced == NULL) -@@ -1545,14 +1557,16 @@ nfsd_symlink(struct svc_rqst *rqstp, str +@@ -1548,14 +1560,16 @@ nfsd_symlink(struct svc_rqst *rqstp, str else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; @@ -1405,7 +1408,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c host_err = nfsd_sync_dir(dentry); } err = nfserrno(host_err); -@@ -1560,7 +1574,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str +@@ -1563,7 +1577,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str mnt_drop_write(fhp->fh_export->ex_path.mnt); @@ -1414,7 +1417,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c dput(dnew); if (err==0) err = cerr; out: -@@ -1615,7 +1629,8 @@ nfsd_link(struct svc_rqst *rqstp, struct +@@ -1618,7 +1632,8 @@ nfsd_link(struct svc_rqst *rqstp, struct err = nfserrno(host_err); goto out_dput; } @@ -1424,7 +1427,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (!host_err) { if (EX_ISSYNC(ffhp->fh_export)) { err = nfserrno(nfsd_sync_dir(ddir)); -@@ -1716,7 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, stru +@@ -1719,7 +1734,8 @@ nfsd_rename(struct svc_rqst *rqstp, stru if (host_err) goto out_dput_new; @@ -1434,7 +1437,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (!host_err && EX_ISSYNC(tfhp->fh_export)) { host_err = nfsd_sync_dir(tdentry); if (!host_err) -@@ -1754,6 +1770,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru +@@ -1757,6 +1773,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru char *fname, int flen) { struct dentry *dentry, *rdentry; @@ -1442,7 +1445,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c struct inode *dirp; __be32 err; int host_err; -@@ -1768,6 +1785,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru +@@ -1771,6 +1788,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = dentry->d_inode; @@ -1450,7 +1453,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); -@@ -1789,21 +1807,21 @@ nfsd_unlink(struct svc_rqst *rqstp, stru +@@ -1792,21 +1810,21 @@ nfsd_unlink(struct svc_rqst *rqstp, stru if (type != S_IFDIR) { /* It's UNLINK */ #ifdef MSNFS @@ -1476,7 +1479,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c host_err = nfsd_sync_dir(dentry); out_drop: -@@ -2036,7 +2054,8 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i +@@ -2143,7 +2161,8 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i return ERR_PTR(-EOPNOTSUPP); } @@ -1486,7 +1489,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c if (size < 0) return ERR_PTR(size); -@@ -2048,6 +2067,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i +@@ -2155,6 +2174,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i int nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) { @@ -1494,7 +1497,7 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c struct inode *inode = fhp->fh_dentry->d_inode; char *name; void *value = NULL; -@@ -2080,21 +2100,24 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i +@@ -2187,21 +2207,24 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i } else size = 0; @@ -1523,10 +1526,10 @@ diff -uprN linux-2.6.27./fs/nfsd/vfs.c linux-2.6.27/fs/nfsd/vfs.c getout: kfree(value); -diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c ---- linux-2.6.27./fs/open.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/open.c 2008-10-29 14:28:53.306108405 +0100 -@@ -197,8 +197,8 @@ out: +diff -uprN a/fs/open.c b/fs/open.c +--- a/fs/open.c 2009-02-08 13:22:56.414930486 +0000 ++++ b/fs/open.c 2009-02-08 13:26:38.905951221 +0000 +@@ -195,8 +195,8 @@ out: return error; } @@ -1537,7 +1540,7 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c { int err; struct iattr newattrs; -@@ -209,16 +209,15 @@ int do_truncate(struct dentry *dentry, l +@@ -207,16 +207,15 @@ int do_truncate(struct dentry *dentry, l newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | time_attrs; @@ -1557,7 +1560,7 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c mutex_unlock(&dentry->d_inode->i_mutex); return err; } -@@ -251,7 +250,7 @@ static long do_sys_truncate(const char _ +@@ -249,7 +248,7 @@ static long do_sys_truncate(const char _ if (error) goto dput_and_out; @@ -1566,7 +1569,7 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c if (error) goto mnt_drop_write_and_out; -@@ -274,7 +273,7 @@ static long do_sys_truncate(const char _ +@@ -272,7 +271,7 @@ static long do_sys_truncate(const char _ error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); @@ -1575,7 +1578,7 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c } put_write_and_out: -@@ -329,7 +328,8 @@ static long do_sys_ftruncate(unsigned in +@@ -327,7 +326,8 @@ static long do_sys_ftruncate(unsigned in error = locks_verify_truncate(inode, file, length); if (!error) @@ -1585,7 +1588,7 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c out_putf: fput(file); out: -@@ -474,7 +474,7 @@ asmlinkage long sys_faccessat(int dfd, c +@@ -493,7 +493,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con goto out_path_release; } @@ -1594,7 +1597,7 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; -@@ -517,7 +517,7 @@ asmlinkage long sys_chdir(const char __u +@@ -536,7 +536,7 @@ SYSCALL_DEFINE1(chdir, const char __user if (error) goto out; @@ -1603,16 +1606,16 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c if (error) goto dput_and_out; -@@ -546,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int +@@ -565,7 +565,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); + error = path_permission(&file->f_path, MAY_EXEC | MAY_ACCESS); - if (!error) - set_fs_pwd(current->fs, &file->f_path); - out_putf: -@@ -564,7 +564,7 @@ asmlinkage long sys_chroot(const char __ + + if (!error && !gr_chroot_fchdir(file->f_path.dentry, file->f_path.mnt)) + error = -EPERM; +@@ -583,7 +583,7 @@ SYSCALL_DEFINE1(chroot, const char __use if (error) goto out; @@ -1621,37 +1624,37 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c if (error) goto dput_and_out; -@@ -604,8 +604,8 @@ asmlinkage long sys_fchmod(unsigned int - if (mode == (mode_t) -1) - mode = inode->i_mode; +@@ -623,8 +623,8 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd + } + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME | ATTR_FILE; + err = fnotify_change(dentry, file->f_path.mnt, &newattrs, file); mutex_unlock(&inode->i_mutex); - mnt_drop_write(file->f_path.mnt); - out_putf: -@@ -635,7 +635,7 @@ asmlinkage long sys_fchmodat(int dfd, co - mode = inode->i_mode; + + out_drop_write: +@@ -653,7 +653,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path.dentry, &newattrs); + error = notify_change(path.dentry, path.mnt, &newattrs); mutex_unlock(&inode->i_mutex); - mnt_drop_write(path.mnt); - dput_and_out: -@@ -649,7 +649,8 @@ asmlinkage long sys_chmod(const char __u + + out_drop_write: +@@ -667,7 +667,8 @@ SYSCALL_DEFINE2(chmod, const char __user return sys_fchmodat(AT_FDCWD, filename, mode); } --static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +-static int chown_common(struct dentry * dentry, uid_t user, gid_t group, struct vfsmount *mnt) +static int chown_common(struct dentry * dentry, struct vfsmount *mnt, + uid_t user, gid_t group, struct file *file) { struct inode *inode = dentry->d_inode; int error; -@@ -667,8 +668,11 @@ static int chown_common(struct dentry * +@@ -685,8 +686,11 @@ static int chown_common(struct dentry * if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; @@ -1664,45 +1667,45 @@ diff -uprN linux-2.6.27./fs/open.c linux-2.6.27/fs/open.c mutex_unlock(&inode->i_mutex); return error; -@@ -685,7 +689,7 @@ asmlinkage long sys_chown(const char __u - error = mnt_want_write(path.mnt); - if (error) - goto out_release; -- error = chown_common(path.dentry, user, group); -+ error = chown_common(path.dentry, path.mnt, user, group, NULL); +@@ -703,7 +707,7 @@ SYSCALL_DEFINE3(chown, const char __user + error = cow_check_and_break(&path); + if (!error) + #endif +- error = chown_common(path.dentry, user, group, path.mnt); ++ error = chown_common(path.dentry, path.mnt, user, group, NULL); mnt_drop_write(path.mnt); out_release: path_put(&path); -@@ -710,7 +714,7 @@ asmlinkage long sys_fchownat(int dfd, co - error = mnt_want_write(path.mnt); - if (error) - goto out_release; -- error = chown_common(path.dentry, user, group); -+ error = chown_common(path.dentry, path.mnt, user, group, NULL); +@@ -728,7 +732,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons + error = cow_check_and_break(&path); + if (!error) + #endif +- error = chown_common(path.dentry, user, group, path.mnt); ++ error = chown_common(path.dentry, path.mnt, user, group, NULL); mnt_drop_write(path.mnt); out_release: path_put(&path); -@@ -729,7 +733,7 @@ asmlinkage long sys_lchown(const char __ - error = mnt_want_write(path.mnt); - if (error) - goto out_release; -- error = chown_common(path.dentry, user, group); -+ error = chown_common(path.dentry, path.mnt, user, group, NULL); +@@ -747,7 +751,7 @@ SYSCALL_DEFINE3(lchown, const char __use + error = cow_check_and_break(&path); + if (!error) + #endif +- error = chown_common(path.dentry, user, group, path.mnt); ++ error = chown_common(path.dentry, path.mnt, user, group, NULL); mnt_drop_write(path.mnt); out_release: path_put(&path); -@@ -753,7 +757,7 @@ asmlinkage long sys_fchown(unsigned int +@@ -770,7 +774,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd goto out_fput; dentry = file->f_path.dentry; audit_inode(NULL, dentry); -- error = chown_common(dentry, user, group); +- error = chown_common(dentry, user, group, file->f_path.mnt); + error = chown_common(dentry, file->f_path.mnt, user, group, file); mnt_drop_write(file->f_path.mnt); out_fput: fput(file); -diff -uprN linux-2.6.27./fs/reiserfs/xattr.c linux-2.6.27/fs/reiserfs/xattr.c ---- linux-2.6.27./fs/reiserfs/xattr.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/reiserfs/xattr.c 2008-10-29 14:28:53.222566406 +0100 +diff -uprN a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c +--- a/fs/reiserfs/xattr.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/reiserfs/xattr.c 2009-02-08 13:26:38.819075170 +0000 @@ -459,7 +459,7 @@ reiserfs_xattr_set(struct inode *inode, newattrs.ia_size = buffer_size; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; @@ -1739,9 +1742,9 @@ diff -uprN linux-2.6.27./fs/reiserfs/xattr.c linux-2.6.27/fs/reiserfs/xattr.c unlock_kernel(); out_dir: -diff -uprN linux-2.6.27./fs/seq_file.c linux-2.6.27/fs/seq_file.c ---- linux-2.6.27./fs/seq_file.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/seq_file.c 2008-10-29 14:28:53.279441727 +0100 +diff -uprN a/fs/seq_file.c b/fs/seq_file.c +--- a/fs/seq_file.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/fs/seq_file.c 2009-02-08 13:26:38.879284319 +0000 @@ -412,9 +412,7 @@ int seq_path_root(struct seq_file *m, st char *s = m->buf + m->count; char *p; @@ -1753,10 +1756,10 @@ diff -uprN linux-2.6.27./fs/seq_file.c linux-2.6.27/fs/seq_file.c err = PTR_ERR(p); if (!IS_ERR(p)) { s = mangle_path(s, p, esc); -diff -uprN linux-2.6.27./fs/stat.c linux-2.6.27/fs/stat.c ---- linux-2.6.27./fs/stat.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/stat.c 2008-10-29 14:28:53.205900152 +0100 -@@ -306,7 +306,7 @@ asmlinkage long sys_readlinkat(int dfd, +diff -uprN a/fs/stat.c b/fs/stat.c +--- a/fs/stat.c 2009-02-08 13:22:56.418263704 +0000 ++++ b/fs/stat.c 2009-02-08 13:26:38.801576209 +0000 +@@ -308,7 +308,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, co error = -EINVAL; if (inode->i_op && inode->i_op->readlink) { @@ -1765,9 +1768,9 @@ diff -uprN linux-2.6.27./fs/stat.c linux-2.6.27/fs/stat.c if (!error) { touch_atime(path.mnt, path.dentry); error = inode->i_op->readlink(path.dentry, -diff -uprN linux-2.6.27./fs/utimes.c linux-2.6.27/fs/utimes.c ---- linux-2.6.27./fs/utimes.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/utimes.c 2008-10-29 14:28:53.286112985 +0100 +diff -uprN a/fs/utimes.c b/fs/utimes.c +--- a/fs/utimes.c 2009-02-08 13:22:56.421602476 +0000 ++++ b/fs/utimes.c 2009-02-08 13:26:38.885950086 +0000 @@ -48,7 +48,8 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } @@ -1779,8 +1782,8 @@ diff -uprN linux-2.6.27./fs/utimes.c linux-2.6.27/fs/utimes.c int error; struct iattr newattrs; @@ -102,7 +103,7 @@ static int utimes_common(struct path *pa - } } + mutex_lock(&inode->i_mutex); - error = notify_change(path->dentry, &newattrs); + error = fnotify_change(path->dentry, path->mnt, &newattrs, f); @@ -1805,9 +1808,9 @@ diff -uprN linux-2.6.27./fs/utimes.c linux-2.6.27/fs/utimes.c path_put(&path); } -diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c ---- linux-2.6.27./fs/xattr.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/fs/xattr.c 2008-10-29 14:28:53.292774801 +0100 +diff -uprN a/fs/xattr.c b/fs/xattr.c +--- a/fs/xattr.c 2009-02-08 13:22:56.421602476 +0000 ++++ b/fs/xattr.c 2009-02-08 13:26:38.892617808 +0000 @@ -67,8 +67,8 @@ xattr_permission(struct inode *inode, co } @@ -1922,7 +1925,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c kfree(kvalue); return error; } -@@ -263,7 +267,7 @@ sys_setxattr(const char __user *pathname +@@ -263,7 +267,7 @@ SYSCALL_DEFINE5(setxattr, const char __u return error; error = mnt_want_write(path.mnt); if (!error) { @@ -1931,7 +1934,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c mnt_drop_write(path.mnt); } path_put(&path); -@@ -282,7 +286,7 @@ sys_lsetxattr(const char __user *pathnam +@@ -282,7 +286,7 @@ SYSCALL_DEFINE5(lsetxattr, const char __ return error; error = mnt_want_write(path.mnt); if (!error) { @@ -1940,7 +1943,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c mnt_drop_write(path.mnt); } path_put(&path); -@@ -304,7 +308,8 @@ sys_fsetxattr(int fd, const char __user +@@ -303,7 +307,8 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons audit_inode(NULL, dentry); error = mnt_want_write(f->f_path.mnt); if (!error) { @@ -1950,7 +1953,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c mnt_drop_write(f->f_path.mnt); } fput(f); -@@ -315,8 +320,8 @@ sys_fsetxattr(int fd, const char __user +@@ -314,8 +319,8 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons * Extended attribute GET operations */ static ssize_t @@ -1961,7 +1964,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c { ssize_t error; void *kvalue = NULL; -@@ -336,7 +341,7 @@ getxattr(struct dentry *d, const char __ +@@ -335,7 +340,7 @@ getxattr(struct dentry *d, const char __ return -ENOMEM; } @@ -1970,7 +1973,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c if (error > 0) { if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; -@@ -359,7 +364,7 @@ sys_getxattr(const char __user *pathname +@@ -357,7 +362,7 @@ SYSCALL_DEFINE4(getxattr, const char __u error = user_path(pathname, &path); if (error) return error; @@ -1979,7 +1982,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c path_put(&path); return error; } -@@ -374,7 +379,7 @@ sys_lgetxattr(const char __user *pathnam +@@ -371,7 +376,7 @@ SYSCALL_DEFINE4(lgetxattr, const char __ error = user_lpath(pathname, &path); if (error) return error; @@ -1988,7 +1991,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c path_put(&path); return error; } -@@ -389,7 +394,7 @@ sys_fgetxattr(int fd, const char __user +@@ -386,7 +391,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons if (!f) return error; audit_inode(NULL, f->f_path.dentry); @@ -1997,7 +2000,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c fput(f); return error; } -@@ -398,7 +403,8 @@ sys_fgetxattr(int fd, const char __user +@@ -395,7 +400,8 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons * Extended attribute LIST operations */ static ssize_t @@ -2007,7 +2010,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c { ssize_t error; char *klist = NULL; -@@ -411,7 +417,7 @@ listxattr(struct dentry *d, char __user +@@ -408,7 +414,7 @@ listxattr(struct dentry *d, char __user return -ENOMEM; } @@ -2016,7 +2019,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c if (error > 0) { if (size && copy_to_user(list, klist, error)) error = -EFAULT; -@@ -433,7 +439,7 @@ sys_listxattr(const char __user *pathnam +@@ -430,7 +436,7 @@ SYSCALL_DEFINE3(listxattr, const char __ error = user_path(pathname, &path); if (error) return error; @@ -2025,7 +2028,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c path_put(&path); return error; } -@@ -447,7 +453,7 @@ sys_llistxattr(const char __user *pathna +@@ -444,7 +450,7 @@ SYSCALL_DEFINE3(llistxattr, const char _ error = user_lpath(pathname, &path); if (error) return error; @@ -2034,7 +2037,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c path_put(&path); return error; } -@@ -462,7 +468,7 @@ sys_flistxattr(int fd, char __user *list +@@ -458,7 +464,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, cha if (!f) return error; audit_inode(NULL, f->f_path.dentry); @@ -2043,7 +2046,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c fput(f); return error; } -@@ -471,7 +477,8 @@ sys_flistxattr(int fd, char __user *list +@@ -467,7 +473,8 @@ SYSCALL_DEFINE3(flistxattr, int, fd, cha * Extended attribute REMOVE operations */ static long @@ -2053,7 +2056,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c { int error; char kname[XATTR_NAME_MAX + 1]; -@@ -482,7 +489,7 @@ removexattr(struct dentry *d, const char +@@ -478,7 +485,7 @@ removexattr(struct dentry *d, const char if (error < 0) return error; @@ -2061,8 +2064,8 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c + return vfs_removexattr(dentry, mnt, kname, file); } - asmlinkage long -@@ -496,7 +503,7 @@ sys_removexattr(const char __user *pathn + SYSCALL_DEFINE2(removexattr, const char __user *, pathname, +@@ -492,7 +499,7 @@ SYSCALL_DEFINE2(removexattr, const char return error; error = mnt_want_write(path.mnt); if (!error) { @@ -2071,7 +2074,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c mnt_drop_write(path.mnt); } path_put(&path); -@@ -514,7 +521,7 @@ sys_lremovexattr(const char __user *path +@@ -510,7 +517,7 @@ SYSCALL_DEFINE2(lremovexattr, const char return error; error = mnt_want_write(path.mnt); if (!error) { @@ -2080,7 +2083,7 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c mnt_drop_write(path.mnt); } path_put(&path); -@@ -535,7 +542,7 @@ sys_fremovexattr(int fd, const char __us +@@ -530,7 +537,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, c audit_inode(NULL, dentry); error = mnt_want_write(f->f_path.mnt); if (!error) { @@ -2089,9 +2092,9 @@ diff -uprN linux-2.6.27./fs/xattr.c linux-2.6.27/fs/xattr.c mnt_drop_write(f->f_path.mnt); } fput(f); -diff -uprN linux-2.6.27./include/linux/audit.h linux-2.6.27/include/linux/audit.h ---- linux-2.6.27./include/linux/audit.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/audit.h 2008-10-29 14:28:53.309441302 +0100 +diff -uprN a/include/linux/audit.h b/include/linux/audit.h +--- a/include/linux/audit.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/audit.h 2009-02-08 13:26:38.912617629 +0000 @@ -33,7 +33,7 @@ * 1200 - 1299 messages internal to the audit daemon * 1300 - 1399 audit event messages @@ -2115,7 +2118,7 @@ diff -uprN linux-2.6.27./include/linux/audit.h linux-2.6.27/include/linux/audit. #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 #define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */ -@@ -545,6 +552,9 @@ extern void audit_log(struct audit_ +@@ -547,6 +554,9 @@ extern void audit_log(struct audit_ __attribute__((format(printf,4,5))); extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); @@ -2125,10 +2128,10 @@ diff -uprN linux-2.6.27./include/linux/audit.h linux-2.6.27/include/linux/audit. extern void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) __attribute__((format(printf,2,3))); -diff -uprN linux-2.6.27./include/linux/dcache.h linux-2.6.27/include/linux/dcache.h ---- linux-2.6.27./include/linux/dcache.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/dcache.h 2008-10-29 14:28:53.279441727 +0100 -@@ -299,9 +299,12 @@ extern int d_validate(struct dentry *, s +diff -uprN a/include/linux/dcache.h b/include/linux/dcache.h +--- a/include/linux/dcache.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/dcache.h 2009-02-08 13:26:38.879284319 +0000 +@@ -300,9 +300,12 @@ extern int d_validate(struct dentry *, s /* * helper function for dentry_operations.d_dname() members */ @@ -2142,10 +2145,10 @@ diff -uprN linux-2.6.27./include/linux/dcache.h linux-2.6.27/include/linux/dcach extern char *d_path(const struct path *, char *, int); extern char *dentry_path(struct dentry *, char *, int); -diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h ---- linux-2.6.27./include/linux/fs.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/fs.h 2008-10-29 14:28:53.306108405 +0100 -@@ -361,6 +361,10 @@ struct iattr { +diff -uprN a/include/linux/fs.h b/include/linux/fs.h +--- a/include/linux/fs.h 2009-02-08 13:22:56.424935829 +0000 ++++ b/include/linux/fs.h 2009-02-08 13:26:38.909283473 +0000 +@@ -372,6 +372,10 @@ struct iattr { * Not an attribute, but an auxilary info for filesystems wanting to * implement an ftruncate() like method. NOTE: filesystem should * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). @@ -2156,7 +2159,7 @@ diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h */ struct file *ia_file; }; -@@ -1160,13 +1164,13 @@ extern void unlock_super(struct super_bl +@@ -1207,13 +1211,13 @@ extern void unlock_super(struct super_bl */ extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); @@ -2177,8 +2180,8 @@ diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h /* * VFS dentry helper functions. -@@ -1179,6 +1183,11 @@ extern void dentry_unhash(struct dentry - extern int file_permission(struct file *, int); +@@ -1240,6 +1244,11 @@ int fiemap_fill_next_extent(struct fiema + int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); /* + * VFS path helper functions. @@ -2189,7 +2192,7 @@ diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h * File types * * NOTE! These match bits 12..15 of stat.st_mode -@@ -1260,6 +1269,7 @@ struct file_operations { +@@ -1308,6 +1317,7 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); @@ -2197,7 +2200,7 @@ diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h }; struct inode_operations { -@@ -1612,8 +1622,8 @@ static inline int break_lease(struct ino +@@ -1671,8 +1681,8 @@ static inline int break_lease(struct ino /* fs/open.c */ @@ -2208,7 +2211,7 @@ diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); -@@ -1771,7 +1781,8 @@ extern int do_remount_sb(struct super_bl +@@ -1832,7 +1842,8 @@ extern int do_remount_sb(struct super_bl #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif @@ -2218,20 +2221,20 @@ diff -uprN linux-2.6.27./include/linux/fs.h linux-2.6.27/include/linux/fs.h extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int, int (*check_acl)(struct inode *, int)); -diff -uprN linux-2.6.27./include/linux/mount.h linux-2.6.27/include/linux/mount.h ---- linux-2.6.27./include/linux/mount.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/mount.h 2008-10-29 14:28:53.279441727 +0100 -@@ -114,4 +114,6 @@ extern void mark_mounts_for_expiry(struc +diff -uprN a/include/linux/mount.h b/include/linux/mount.h +--- a/include/linux/mount.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/mount.h 2009-02-08 13:26:38.879284319 +0000 +@@ -112,4 +112,6 @@ extern void mark_mounts_for_expiry(struc extern spinlock_t vfsmount_lock; extern dev_t name_to_dev_t(char *name); +extern char *d_namespace_path(struct dentry *, struct vfsmount *, char *, int); + #endif /* _LINUX_MOUNT_H */ -diff -uprN linux-2.6.27./include/linux/nfsd/nfsd.h linux-2.6.27/include/linux/nfsd/nfsd.h ---- linux-2.6.27./include/linux/nfsd/nfsd.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/nfsd/nfsd.h 2008-10-29 14:28:53.259441865 +0100 -@@ -85,7 +85,8 @@ __be32 nfsd_setattr(struct svc_rqst *, +diff -uprN a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h +--- a/include/linux/nfsd/nfsd.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/nfsd/nfsd.h 2009-02-08 13:26:38.855950336 +0000 +@@ -86,7 +86,8 @@ __be32 nfsd_setattr(struct svc_rqst *, #ifdef CONFIG_NFSD_V4 __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, struct nfs4_acl *); @@ -2241,9 +2244,9 @@ diff -uprN linux-2.6.27./include/linux/nfsd/nfsd.h linux-2.6.27/include/linux/nf #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, -diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/security.h ---- linux-2.6.27./include/linux/security.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/security.h 2008-10-29 14:28:53.302774553 +0100 +diff -uprN a/include/linux/security.h b/include/linux/security.h +--- a/include/linux/security.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/security.h 2009-02-08 13:26:38.902622774 +0000 @@ -54,9 +54,11 @@ extern void cap_capset_set(struct task_s extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); @@ -2453,7 +2456,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec int (*task_create) (unsigned long clone_flags); int (*task_alloc_security) (struct task_struct *p); -@@ -1622,30 +1664,43 @@ int security_inode_alloc(struct inode *i +@@ -1618,30 +1660,43 @@ int security_inode_alloc(struct inode *i void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, char **name, void **value, size_t *len); @@ -2515,7 +2518,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct dentry *dentry); int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); -@@ -1668,6 +1723,7 @@ int security_file_send_sigiotask(struct +@@ -1664,6 +1719,7 @@ int security_file_send_sigiotask(struct struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_dentry_open(struct file *file); @@ -2523,7 +2526,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec int security_task_create(unsigned long clone_flags); int security_task_alloc(struct task_struct *p); void security_task_free(struct task_struct *p); -@@ -1968,26 +2024,31 @@ static inline int security_inode_init_se +@@ -1973,26 +2029,31 @@ static inline int security_inode_init_se static inline int security_inode_create(struct inode *dir, struct dentry *dentry, @@ -2558,7 +2561,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec const char *old_name) { return 0; -@@ -1995,19 +2056,22 @@ static inline int security_inode_symlink +@@ -2000,19 +2061,22 @@ static inline int security_inode_symlink static inline int security_inode_mkdir(struct inode *dir, struct dentry *dentry, @@ -2582,7 +2585,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec int mode, dev_t dev) { return 0; -@@ -2015,13 +2079,16 @@ static inline int security_inode_mknod(s +@@ -2020,13 +2084,16 @@ static inline int security_inode_mknod(s static inline int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -2601,7 +2604,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec { return 0; } -@@ -2038,7 +2105,8 @@ static inline int security_inode_permiss +@@ -2043,7 +2110,8 @@ static inline int security_inode_permiss } static inline int security_inode_setattr(struct dentry *dentry, @@ -2611,7 +2614,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec { return 0; } -@@ -2053,30 +2121,42 @@ static inline void security_inode_delete +@@ -2058,30 +2126,42 @@ static inline void security_inode_delete { } static inline int security_inode_setxattr(struct dentry *dentry, @@ -2661,7 +2664,7 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec } static inline int security_inode_need_killpriv(struct dentry *dentry) -@@ -2177,6 +2257,11 @@ static inline int security_dentry_open(s +@@ -2182,6 +2262,11 @@ static inline int security_dentry_open(s return 0; } @@ -2673,9 +2676,9 @@ diff -uprN linux-2.6.27./include/linux/security.h linux-2.6.27/include/linux/sec static inline int security_task_create(unsigned long clone_flags) { return 0; -diff -uprN linux-2.6.27./include/linux/sysctl.h linux-2.6.27/include/linux/sysctl.h ---- linux-2.6.27./include/linux/sysctl.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/sysctl.h 2008-10-29 14:28:53.299441785 +0100 +diff -uprN a/include/linux/sysctl.h b/include/linux/sysctl.h +--- a/include/linux/sysctl.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/sysctl.h 2009-02-08 13:26:38.899283105 +0000 @@ -996,6 +996,8 @@ extern int proc_doulongvec_minmax(struct extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, struct file *, void __user *, size_t *, loff_t *); @@ -2685,9 +2688,9 @@ diff -uprN linux-2.6.27./include/linux/sysctl.h linux-2.6.27/include/linux/sysct extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -diff -uprN linux-2.6.27./include/linux/xattr.h linux-2.6.27/include/linux/xattr.h ---- linux-2.6.27./include/linux/xattr.h 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/include/linux/xattr.h 2008-10-29 14:28:53.292774801 +0100 +diff -uprN a/include/linux/xattr.h b/include/linux/xattr.h +--- a/include/linux/xattr.h 2008-12-24 23:26:37.000000000 +0000 ++++ b/include/linux/xattr.h 2009-02-08 13:26:38.895950162 +0000 @@ -16,6 +16,8 @@ #ifdef __KERNEL__ @@ -2712,10 +2715,10 @@ diff -uprN linux-2.6.27./include/linux/xattr.h linux-2.6.27/include/linux/xattr. ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -diff -uprN linux-2.6.27./ipc/mqueue.c linux-2.6.27/ipc/mqueue.c ---- linux-2.6.27./ipc/mqueue.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/ipc/mqueue.c 2008-10-29 14:28:53.232566922 +0100 -@@ -745,7 +745,7 @@ asmlinkage long sys_mq_unlink(const char +diff -uprN a/ipc/mqueue.c b/ipc/mqueue.c +--- a/ipc/mqueue.c 2009-02-08 13:22:56.428264020 +0000 ++++ b/ipc/mqueue.c 2009-02-08 13:26:38.832414683 +0000 +@@ -753,7 +753,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __ err = mnt_want_write(mqueue_mnt); if (err) goto out_err; @@ -2724,10 +2727,10 @@ diff -uprN linux-2.6.27./ipc/mqueue.c linux-2.6.27/ipc/mqueue.c mnt_drop_write(mqueue_mnt); out_err: dput(dentry); -diff -uprN linux-2.6.27./kernel/audit.c linux-2.6.27/kernel/audit.c ---- linux-2.6.27./kernel/audit.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/kernel/audit.c 2008-10-29 14:28:53.309441302 +0100 -@@ -1231,8 +1231,7 @@ static inline int audit_expand(struct au +diff -uprN a/kernel/audit.c b/kernel/audit.c +--- a/kernel/audit.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/kernel/audit.c 2009-02-08 13:26:38.912617629 +0000 +@@ -1243,8 +1243,7 @@ static inline int audit_expand(struct au * will be called a second time. Currently, we assume that a printk * can't format message larger than 1024 bytes, so we don't either. */ @@ -2737,17 +2740,17 @@ diff -uprN linux-2.6.27./kernel/audit.c linux-2.6.27/kernel/audit.c { int len, avail; struct sk_buff *skb; -@@ -1506,3 +1505,6 @@ EXPORT_SYMBOL(audit_log_start); +@@ -1518,3 +1517,6 @@ EXPORT_SYMBOL(audit_log_start); EXPORT_SYMBOL(audit_log_end); EXPORT_SYMBOL(audit_log_format); EXPORT_SYMBOL(audit_log); +EXPORT_SYMBOL_GPL(audit_log_vformat); +EXPORT_SYMBOL_GPL(audit_log_untrustedstring); +EXPORT_SYMBOL_GPL(audit_log_d_path); -diff -uprN linux-2.6.27./kernel/cgroup.c linux-2.6.27/kernel/cgroup.c ---- linux-2.6.27./kernel/cgroup.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/kernel/cgroup.c 2008-10-29 14:28:53.185696851 +0100 -@@ -2905,7 +2905,7 @@ int cgroup_clone(struct task_struct *tsk +diff -uprN a/kernel/cgroup.c b/kernel/cgroup.c +--- a/kernel/cgroup.c 2009-02-08 13:22:56.431602672 +0000 ++++ b/kernel/cgroup.c 2009-02-08 13:26:38.778246972 +0000 +@@ -2968,7 +2968,7 @@ int cgroup_clone(struct task_struct *tsk } /* Create the cgroup directory, which also creates the cgroup */ @@ -2756,10 +2759,10 @@ diff -uprN linux-2.6.27./kernel/cgroup.c linux-2.6.27/kernel/cgroup.c child = __d_cgrp(dentry); dput(dentry); if (ret) { -diff -uprN linux-2.6.27./kernel/sysctl.c linux-2.6.27/kernel/sysctl.c ---- linux-2.6.27./kernel/sysctl.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/kernel/sysctl.c 2008-10-29 14:28:53.299441785 +0100 -@@ -1506,6 +1506,33 @@ void register_sysctl_root(struct ctl_tab +diff -uprN a/kernel/sysctl.c b/kernel/sysctl.c +--- a/kernel/sysctl.c 2009-02-08 13:22:56.454931677 +0000 ++++ b/kernel/sysctl.c 2009-02-08 13:26:38.899283105 +0000 +@@ -1528,6 +1528,33 @@ void register_sysctl_root(struct ctl_tab spin_unlock(&sysctl_lock); } @@ -2793,10 +2796,10 @@ diff -uprN linux-2.6.27./kernel/sysctl.c linux-2.6.27/kernel/sysctl.c #ifdef CONFIG_SYSCTL_SYSCALL /* Perform the actual read/write of a sysctl table entry. */ static int do_sysctl_strategy(struct ctl_table_root *root, -diff -uprN linux-2.6.27./mm/filemap.c linux-2.6.27/mm/filemap.c ---- linux-2.6.27./mm/filemap.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/mm/filemap.c 2008-10-29 14:28:53.175696536 +0100 -@@ -1760,12 +1760,12 @@ int should_remove_suid(struct dentry *de +diff -uprN a/mm/filemap.c b/mm/filemap.c +--- a/mm/filemap.c 2009-02-08 13:22:56.478269615 +0000 ++++ b/mm/filemap.c 2009-02-08 13:26:38.769075001 +0000 +@@ -1781,12 +1781,12 @@ int should_remove_suid(struct dentry *de } EXPORT_SYMBOL(should_remove_suid); @@ -2811,7 +2814,7 @@ diff -uprN linux-2.6.27./mm/filemap.c linux-2.6.27/mm/filemap.c } int file_remove_suid(struct file *file) -@@ -1780,7 +1780,7 @@ int file_remove_suid(struct file *file) +@@ -1801,7 +1801,7 @@ int file_remove_suid(struct file *file) if (killpriv) error = security_inode_killpriv(dentry); if (!error && killsuid) @@ -2820,23 +2823,39 @@ diff -uprN linux-2.6.27./mm/filemap.c linux-2.6.27/mm/filemap.c return error; } -diff -uprN linux-2.6.27./net/unix/af_unix.c linux-2.6.27/net/unix/af_unix.c ---- linux-2.6.27./net/unix/af_unix.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/net/unix/af_unix.c 2008-10-29 14:28:53.192566383 +0100 -@@ -827,7 +827,8 @@ static int unix_bind(struct socket *sock - err = mnt_want_write(nd.path.mnt); - if (err) +diff -uprN a/net/unix/af_unix.c b/net/unix/af_unix.c +--- a/net/unix/af_unix.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/net/unix/af_unix.c 2009-02-08 13:26:38.789081510 +0000 +@@ -829,7 +829,8 @@ static int unix_bind(struct socket *sock goto out_mknod_dput; + } + - err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); + err = vfs_mknod(nd.path.dentry->d_inode, dentry, nd.path.mnt, + mode, 0); mnt_drop_write(nd.path.mnt); if (err) goto out_mknod_dput; -diff -uprN linux-2.6.27./security/Kconfig linux-2.6.27/security/Kconfig ---- linux-2.6.27./security/Kconfig 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/Kconfig 2008-10-29 14:28:53.322775050 +0100 -@@ -117,6 +117,7 @@ config SECURITY_DEFAULT_MMAP_MIN_ADDR +diff -uprN a/security/Kconfig b/security/Kconfig +--- a/security/Kconfig 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/Kconfig 2009-02-08 13:26:38.922616652 +0000 +@@ -59,6 +59,15 @@ config SECURITYFS + + If you are unsure how to answer this question, answer N. + ++config SECURITY_DEFAULT ++ string "Default security module" ++ depends on SECURITY ++ default "" ++ help ++ This determines the security module used if the security= ++ boot parmater is not provided. If a security module is not ++ specified the first module to register will be used. ++ + config SECURITY_NETWORK + bool "Socket and Networking Security Hooks" + depends on SECURITY +@@ -125,6 +134,7 @@ config SECURITY_DEFAULT_MMAP_MIN_ADDR source security/selinux/Kconfig source security/smack/Kconfig @@ -2844,25 +2863,34 @@ diff -uprN linux-2.6.27./security/Kconfig linux-2.6.27/security/Kconfig endmenu -diff -uprN linux-2.6.27./security/Makefile linux-2.6.27/security/Makefile ---- linux-2.6.27./security/Makefile 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/Makefile 2008-10-29 14:28:53.322775050 +0100 -@@ -14,5 +14,6 @@ obj-$(CONFIG_SECURITY) += security.o c +diff -uprN a/security/Makefile b/security/Makefile +--- a/security/Makefile 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/Makefile 2009-02-08 13:26:38.949289615 +0000 +@@ -5,6 +5,7 @@ + obj-$(CONFIG_KEYS) += keys/ + subdir-$(CONFIG_SECURITY_SELINUX) += selinux + subdir-$(CONFIG_SECURITY_SMACK) += smack ++subdir-$(CONFIG_SECURITY_APPARMOR) += apparmor + + # always enable default capabilities + obj-y += commoncap.o +@@ -15,5 +16,6 @@ obj-$(CONFIG_SECURITYFS) += inode.o # Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o --obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o -+obj-$(CONFIG_SECURITY_APPARMOR) += commoncap.o apparmor/ -+ obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o ++obj-$(CONFIG_SECURITY_APPARMOR) += apparmor/built-in.o + obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o -diff -uprN linux-2.6.27./security/apparmor/Kconfig linux-2.6.27/security/apparmor/Kconfig ---- linux-2.6.27./security/apparmor/Kconfig 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/Kconfig 2008-10-29 14:28:53.319441671 +0100 -@@ -0,0 +1,42 @@ +diff -uprN a/security/apparmor/Kconfig b/security/apparmor/Kconfig +--- a/security/apparmor/Kconfig 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/Kconfig 2009-02-08 13:26:38.945950214 +0000 +@@ -0,0 +1,44 @@ +config SECURITY_APPARMOR + bool "AppArmor support" + depends on SECURITY ++ depends on SECURITY_NETWORK + select AUDIT ++ default n + help + This enables the AppArmor security module. + Required userspace tools (if they are not included in your @@ -2901,9 +2929,9 @@ diff -uprN linux-2.6.27./security/apparmor/Kconfig linux-2.6.27/security/apparmo + parameters are difficult to employ. + + If you are unsure how to answer this question, answer N. -diff -uprN linux-2.6.27./security/apparmor/Makefile linux-2.6.27/security/apparmor/Makefile ---- linux-2.6.27./security/apparmor/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/Makefile 2008-10-29 14:28:53.322775050 +0100 +diff -uprN a/security/apparmor/Makefile b/security/apparmor/Makefile +--- a/security/apparmor/Makefile 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/Makefile 2009-02-08 13:26:38.925956402 +0000 @@ -0,0 +1,18 @@ +# Makefile for AppArmor Linux Security Module +# @@ -2923,10 +2951,10 @@ diff -uprN linux-2.6.27./security/apparmor/Makefile linux-2.6.27/security/apparm + $(call cmd,make-caps) +$(obj)/af_names.h : $(srctree)/include/linux/socket.h + $(call cmd,make-af) -diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/apparmor/apparmor.h ---- linux-2.6.27./security/apparmor/apparmor.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/apparmor.h 2008-10-29 14:28:53.326113825 +0100 -@@ -0,0 +1,403 @@ +diff -uprN a/security/apparmor/apparmor.h b/security/apparmor/apparmor.h +--- a/security/apparmor/apparmor.h 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/apparmor.h 2009-02-08 13:26:38.945950214 +0000 +@@ -0,0 +1,405 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE + * @@ -3067,7 +3095,7 @@ diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/appa + printk(KERN_DEBUG "AppArmor: " fmt, ##args); \ + } while (0) + -+#define AA_ERROR(fmt, args...) printk(KERN_ERR "AppArmor: " fmt, ##args) ++#define AA_ERROR(fmt, args...) do { if (printk_ratelimit()) printk(KERN_ERR "AppArmor: " fmt, ##args); } while (0) + +/* struct aa_rlimit - rlimits settings for the profile + * @mask: which hard limits to set @@ -3143,9 +3171,9 @@ diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/appa + char **exec_table; + struct aa_dfa *file_rules; + struct { -+ int hat; -+ int complain; -+ int audit; ++ u32 hat; ++ u32 complain; ++ u32 audit; + } flags; + int isstale; + @@ -3226,6 +3254,9 @@ diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/appa + aa_lock_task_release +}; + ++/* apparmor/profiles */ ++extern struct seq_operations apparmorfs_profiles_op; ++ +/* main.c */ +extern int alloc_default_namespace(void); +extern void free_default_namespace(void); @@ -3236,7 +3267,6 @@ diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/appa +int aa_audit_reject(struct aa_profile *profile, struct aa_audit *sa); +extern int aa_audit_syscallreject(struct aa_profile *profile, gfp_t gfp, + const char *); -+extern int aa_audit(struct aa_profile *profile, struct aa_audit *); + +extern int aa_attr(struct aa_profile *profile, struct dentry *dentry, + struct vfsmount *mnt, struct iattr *iattr); @@ -3263,7 +3293,7 @@ diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/appa +extern struct aa_profile *__aa_replace_profile(struct task_struct *task, + struct aa_profile *profile); +extern struct aa_task_context *lock_task_and_profiles(struct task_struct *task, -+ struct aa_profile *profile); ++ struct aa_profile *profile); +extern void unlock_task_and_profiles(struct task_struct *task, + struct aa_task_context *cxt, + struct aa_profile *profile); @@ -3330,10 +3360,10 @@ diff -uprN linux-2.6.27./security/apparmor/apparmor.h linux-2.6.27/security/appa + unsigned int start); + +#endif /* __APPARMOR_H */ -diff -uprN linux-2.6.27./security/apparmor/apparmorfs.c linux-2.6.27/security/apparmor/apparmorfs.c ---- linux-2.6.27./security/apparmor/apparmorfs.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/apparmorfs.c 2008-10-29 14:28:53.326113825 +0100 -@@ -0,0 +1,281 @@ +diff -uprN a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c +--- a/security/apparmor/apparmorfs.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/apparmorfs.c 2009-02-08 13:26:38.942622976 +0000 +@@ -0,0 +1,277 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE + * @@ -3349,7 +3379,7 @@ diff -uprN linux-2.6.27./security/apparmor/apparmorfs.c linux-2.6.27/security/ap +#include +#include +#include -+#include ++#include +#include + +#include "apparmor.h" @@ -3401,9 +3431,6 @@ diff -uprN linux-2.6.27./security/apparmor/apparmorfs.c linux-2.6.27/security/ap + return data; +} + -+/* apparmor/profiles */ -+extern struct seq_operations apparmorfs_profiles_op; -+ +static int aa_profiles_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &apparmorfs_profiles_op); @@ -3572,8 +3599,7 @@ diff -uprN linux-2.6.27./security/apparmor/apparmorfs.c linux-2.6.27/security/ap + return 0; + + if (apparmor_dentry) { -+ AA_ERROR("%s: AppArmor securityfs already exists\n", -+ __FUNCTION__); ++ AA_ERROR("%s: AppArmor securityfs already exists\n", __func__); + return -EEXIST; + } + @@ -3581,7 +3607,7 @@ diff -uprN linux-2.6.27./security/apparmor/apparmorfs.c linux-2.6.27/security/ap + if (IS_ERR(apparmor_dentry)) { + error = PTR_ERR(apparmor_dentry); + apparmor_dentry = NULL; -+ goto error; ++ goto error; + } + error = aafs_create("profiles", 0440, &apparmorfs_profiles_fops); + if (error) @@ -3615,9 +3641,9 @@ diff -uprN linux-2.6.27./security/apparmor/apparmorfs.c linux-2.6.27/security/ap + +fs_initcall(create_apparmorfs); + -diff -uprN linux-2.6.27./security/apparmor/inline.h linux-2.6.27/security/apparmor/inline.h ---- linux-2.6.27./security/apparmor/inline.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/inline.h 2008-10-29 14:28:53.319441671 +0100 +diff -uprN a/security/apparmor/inline.h b/security/apparmor/inline.h +--- a/security/apparmor/inline.h 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/inline.h 2009-02-08 13:26:38.945950214 +0000 @@ -0,0 +1,250 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE @@ -3642,7 +3668,7 @@ diff -uprN linux-2.6.27./security/apparmor/inline.h linux-2.6.27/security/apparm + +static inline struct aa_task_context *aa_task_context(struct task_struct *task) +{ -+ return (struct aa_task_context *) rcu_dereference(task->security); ++ return rcu_dereference(task->security); +} + +static inline struct aa_namespace *aa_get_namespace(struct aa_namespace *ns) @@ -3712,7 +3738,7 @@ diff -uprN linux-2.6.27./security/apparmor/inline.h linux-2.6.27/security/apparm +static inline struct aa_profile *aa_find_profile(struct aa_namespace *ns, + const char *name) +{ -+ struct aa_profile *profile = NULL; ++ struct aa_profile *profile; + + read_lock(&ns->lock); + profile = aa_dup_profile(__aa_find_profile(name, &ns->profiles)); @@ -3834,7 +3860,7 @@ diff -uprN linux-2.6.27./security/apparmor/inline.h linux-2.6.27/security/apparm + * gives us RCU reader safety. + */ +static inline void unlock_both_profiles(struct aa_profile *profile1, -+ struct aa_profile *profile2) ++ struct aa_profile *profile2) +{ + /* Unlock the two profiles. */ + if (!profile1 || profile1 == profile2) { @@ -3869,10 +3895,10 @@ diff -uprN linux-2.6.27./security/apparmor/inline.h linux-2.6.27/security/apparm +} + +#endif /* __INLINE_H__ */ -diff -uprN linux-2.6.27./security/apparmor/list.c linux-2.6.27/security/apparmor/list.c ---- linux-2.6.27./security/apparmor/list.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/list.c 2008-10-29 14:28:53.319441671 +0100 -@@ -0,0 +1,174 @@ +diff -uprN a/security/apparmor/list.c b/security/apparmor/list.c +--- a/security/apparmor/list.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/list.c 2009-02-08 13:26:38.945950214 +0000 +@@ -0,0 +1,176 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE + * @@ -3890,7 +3916,7 @@ diff -uprN linux-2.6.27./security/apparmor/list.c linux-2.6.27/security/apparmor + +/* list of profile namespaces and lock */ +LIST_HEAD(profile_ns_list); -+rwlock_t profile_ns_list_lock = RW_LOCK_UNLOCKED; ++DEFINE_RWLOCK(profile_ns_list_lock); + +/** + * __aa_find_namespace - look up a profile namespace on the namespace list @@ -3987,6 +4013,7 @@ diff -uprN linux-2.6.27./security/apparmor/list.c linux-2.6.27/security/apparmor +} + +static void *p_start(struct seq_file *f, loff_t *pos) ++ __acquires(profile_ns_list_lock) +{ + struct aa_namespace *ns; + loff_t l = *pos; @@ -4019,6 +4046,7 @@ diff -uprN linux-2.6.27./security/apparmor/list.c linux-2.6.27/security/apparmor +} + +static void p_stop(struct seq_file *f, void *p) ++ __releases(profile_ns_list_lock) +{ + struct aa_profile *profile = (struct aa_profile *) p; + @@ -4047,9 +4075,9 @@ diff -uprN linux-2.6.27./security/apparmor/list.c linux-2.6.27/security/apparmor + .stop = p_stop, + .show = seq_show_profile, +}; -diff -uprN linux-2.6.27./security/apparmor/locking.txt linux-2.6.27/security/apparmor/locking.txt ---- linux-2.6.27./security/apparmor/locking.txt 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/locking.txt 2008-10-29 14:28:53.319441671 +0100 +diff -uprN a/security/apparmor/locking.txt b/security/apparmor/locking.txt +--- a/security/apparmor/locking.txt 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/locking.txt 2009-02-08 13:26:38.922616652 +0000 @@ -0,0 +1,68 @@ +Locking in AppArmor +=================== @@ -4119,10 +4147,10 @@ diff -uprN linux-2.6.27./security/apparmor/locking.txt linux-2.6.27/security/app +dead task A. The kernel should not be taking a dead task's task_lock +at the same time the task is being freed by task rcu cleanup other wise +the task would not be out of its quiescent period. -diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/lsm.c ---- linux-2.6.27./security/apparmor/lsm.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/lsm.c 2008-10-29 14:28:53.329440939 +0100 -@@ -0,0 +1,1060 @@ +diff -uprN a/security/apparmor/lsm.c b/security/apparmor/lsm.c +--- a/security/apparmor/lsm.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/lsm.c 2009-02-08 13:26:38.949289615 +0000 +@@ -0,0 +1,1024 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE + * @@ -4135,7 +4163,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + */ + +#include -+#include ++#include +#include +#include +#include @@ -4149,7 +4177,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ +#include "inline.h" + +/* Flag indicating whether initialization completed */ -+int apparmor_initialized = 0; ++int apparmor_initialized; + +static int param_set_aabool(const char *val, struct kernel_param *kp); +static int param_get_aabool(char *buffer, struct kernel_param *kp); @@ -4168,29 +4196,23 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + * Value is also togglable per profile and referenced when global value is + * enforce. + */ -+int apparmor_complain = 0; ++int apparmor_complain; +module_param_named(complain, apparmor_complain, aabool, S_IRUSR | S_IWUSR); -+MODULE_PARM_DESC(apparmor_complain, "Toggle AppArmor complain mode"); + +/* Debug mode */ -+int apparmor_debug = 0; ++int apparmor_debug; +module_param_named(debug, apparmor_debug, aabool, S_IRUSR | S_IWUSR); -+MODULE_PARM_DESC(apparmor_debug, "Toggle AppArmor debug mode"); + +/* Audit mode */ -+int apparmor_audit = 0; ++int apparmor_audit; +module_param_named(audit, apparmor_audit, aabool, S_IRUSR | S_IWUSR); -+MODULE_PARM_DESC(apparmor_audit, "Toggle AppArmor audit mode"); -+ +/* Syscall logging mode */ -+int apparmor_logsyscall = 0; ++int apparmor_logsyscall; +module_param_named(logsyscall, apparmor_logsyscall, aabool, S_IRUSR | S_IWUSR); -+MODULE_PARM_DESC(apparmor_logsyscall, "Toggle AppArmor logsyscall mode"); + +/* Maximum pathname length before accesses will start getting rejected */ +unsigned int apparmor_path_max = 2 * PATH_MAX; +module_param_named(path_max, apparmor_path_max, aauint, S_IRUSR | S_IWUSR); -+MODULE_PARM_DESC(apparmor_path_max, "Maximum pathname length allowed"); + +/* Boot time disable flag */ +#ifdef CONFIG_SECURITY_APPARMOR_DISABLE @@ -4199,10 +4221,9 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ +#define AA_ENABLED_PERMS 0400 +#endif +static int param_set_aa_enabled(const char *val, struct kernel_param *kp); -+unsigned int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; ++static unsigned int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; +module_param_call(enabled, param_set_aa_enabled, param_get_aauint, + &apparmor_enabled, AA_ENABLED_PERMS); -+MODULE_PARM_DESC(apparmor_enabled, "Enable/Disable Apparmor on boot"); + +static int __init apparmor_enabled_setup(char *str) +{ @@ -4242,7 +4263,6 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ +/* allow run time disabling of apparmor */ +static int param_set_aa_enabled(const char *val, struct kernel_param *kp) +{ -+ char *endp; + unsigned long l; + + if (!apparmor_initialized) { @@ -4259,8 +4279,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + if (!val) + return -EINVAL; + -+ l = simple_strtoul(val, &endp, 0); -+ if (endp == val || l != 0) ++ if (strict_strtoul(val, 0, &l) || l != 0) + return -EINVAL; + + apparmor_enabled = 0; @@ -4268,20 +4287,6 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + return 0; +} + -+static int aa_reject_syscall(struct task_struct *task, gfp_t flags, -+ const char *name) -+{ -+ struct aa_profile *profile = aa_get_profile(task); -+ int error = 0; -+ -+ if (profile) { -+ error = aa_audit_syscallreject(profile, flags, name); -+ aa_put_profile(profile); -+ } -+ -+ return error; -+} -+ +static int apparmor_ptrace(struct task_struct *tracer, + struct task_struct *tracee) +{ @@ -4377,7 +4382,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + mask |= MAY_WRITE; + + error = -ENOMEM; -+ buffer = (char*)__get_free_page(GFP_KERNEL); ++ buffer = (char *)__get_free_page(GFP_KERNEL); + if (!buffer) + goto out; + name = sysctl_pathname(table, buffer, PAGE_SIZE); @@ -4410,24 +4415,13 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + + if (!ret && (unsigned long)bprm->security & AA_SECURE_EXEC_NEEDED) { + AA_DEBUG("%s: secureexec required for %s\n", -+ __FUNCTION__, bprm->filename); ++ __func__, bprm->filename); + ret = 1; + } + + return ret; +} + -+static int apparmor_sb_mount(char *dev_name, struct path *path, char *type, -+ unsigned long flags, void *data) -+{ -+ return aa_reject_syscall(current, GFP_KERNEL, "mount"); -+} -+ -+static int apparmor_umount(struct vfsmount *mnt, int flags) -+{ -+ return aa_reject_syscall(current, GFP_KERNEL, "umount"); -+} -+ +static int apparmor_inode_mkdir(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mask) +{ @@ -4672,7 +4666,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ +static int aa_file_permission(const char *op, struct file *file, int mask) +{ + struct aa_profile *profile; -+ struct aa_profile *file_profile = (struct aa_profile*)file->f_security; ++ struct aa_profile *file_profile = file->f_security; + int error = 0; + + if (!file_profile) @@ -4709,7 +4703,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + aa_mask_permissions(mask)); +} + -+static inline int apparmor_file_lock (struct file *file, unsigned int cmd) ++static int apparmor_file_lock(struct file *file, unsigned int cmd) +{ + int mask = AA_MAY_LOCK; + if (cmd == F_WRLCK) @@ -4730,13 +4724,13 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + +static void apparmor_file_free_security(struct file *file) +{ -+ struct aa_profile *file_profile = (struct aa_profile*)file->f_security; ++ struct aa_profile *file_profile = file->f_security; + + aa_put_profile(file_profile); +} + -+static inline int aa_mmap(struct file *file, const char *operation, -+ unsigned long prot, unsigned long flags) ++static int aa_mmap(struct file *file, const char *operation, ++ unsigned long prot, unsigned long flags) +{ + struct dentry *dentry; + int mask = 0; @@ -5035,7 +5029,8 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + return error; +} + -+struct security_operations apparmor_ops = { ++static struct security_operations apparmor_ops = { ++ .name = "apparmor", + .ptrace_may_access = apparmor_ptrace_may_access, + .ptrace_traceme = apparmor_ptrace_traceme, + .capget = cap_capget, @@ -5049,9 +5044,6 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + .bprm_set_security = apparmor_bprm_set_security, + .bprm_secureexec = apparmor_bprm_secureexec, + -+ .sb_mount = apparmor_sb_mount, -+ .sb_umount = apparmor_umount, -+ + .inode_mkdir = apparmor_inode_mkdir, + .inode_rmdir = apparmor_inode_rmdir, + .inode_create = apparmor_inode_create, @@ -5114,22 +5106,25 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ +{ + int error; + -+ if (!apparmor_enabled) { -+ info_message("AppArmor disabled by boottime parameter\n"); ++ if (!apparmor_enabled || !security_module_enable(&apparmor_ops)) { ++ info_message("AppArmor disabled by boot time parameter\n"); + return 0; + } + -+ if ((error = create_apparmorfs())) { ++ error = create_apparmorfs(); ++ if (error) { + AA_ERROR("Unable to activate AppArmor filesystem\n"); + goto createfs_out; + } + -+ if ((error = alloc_default_namespace())){ ++ error = alloc_default_namespace(); ++ if (error) { + AA_ERROR("Unable to allocate default profile namespace\n"); + goto alloc_out; + } + -+ if ((error = register_security(&apparmor_ops))) { ++ error = register_security(&apparmor_ops); ++ if (error) { + AA_ERROR("Unable to register AppArmor\n"); + goto register_security_out; + } @@ -5147,7 +5142,7 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + free_default_namespace(); + +alloc_out: -+ destroy_apparmorfs(); ++ destroy_apparmorfs(); + +createfs_out: + return error; @@ -5180,13 +5175,10 @@ diff -uprN linux-2.6.27./security/apparmor/lsm.c linux-2.6.27/security/apparmor/ + info_message("AppArmor protection removed"); +} + -+MODULE_DESCRIPTION("AppArmor process confinement"); -+MODULE_AUTHOR("Novell/Immunix, http://bugs.opensuse.org"); -+MODULE_LICENSE("GPL"); -diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor/main.c ---- linux-2.6.27./security/apparmor/main.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/main.c 2008-10-29 14:28:53.326113825 +0100 -@@ -0,0 +1,1690 @@ +diff -uprN a/security/apparmor/main.c b/security/apparmor/main.c +--- a/security/apparmor/main.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/main.c 2009-02-08 13:26:38.949289615 +0000 +@@ -0,0 +1,1692 @@ +/* + * Copyright (C) 2002-2007 Novell/SUSE + * @@ -5418,9 +5410,13 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + audit_log_format(ab, " protocol=%d", sa->protocol); + } + -+ audit_log_format(ab, " pid=%d", current->pid); ++ audit_log_format(ab, " pid=%d", current->pid); + + if (profile) { ++ if (!sa->parent) ++ audit_log_format(ab, " parent=%d", ++ current->real_parent->pid); ++ + audit_log_format(ab, " profile="); + audit_log_untrustedstring(ab, profile->name); + @@ -5484,7 +5480,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + * @profile: profile to check against + * @sa: audit event + */ -+int aa_audit(struct aa_profile *profile, struct aa_audit *sa) ++static int aa_audit(struct aa_profile *profile, struct aa_audit *sa) +{ + int type = AUDIT_APPARMOR_DENIED; + struct audit_context *audit_cxt; @@ -5514,7 +5510,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + } else { + int mask = AUDIT_QUIET_MASK(sa->audit_mask); + -+ if (!(sa->denied_mask & ~mask)) ++ if (!(sa->denied_mask & ~mask) && !PROFILE_COMPLAIN(profile)) + return sa->error_code; + + /* mask off perms whose denial is being silenced */ @@ -5601,7 +5597,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + * + * If the link has 'x', an exact match of all the execute flags + * must match. -+ */ ++ */ + denied_mask |= ~l_mode & link_mask; + + t_mode = aa_match(profile->file_rules, target, NULL); @@ -5630,8 +5626,10 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + (x & AA_USER_EXEC_TYPE) != (t_x & AA_USER_EXEC_TYPE)) + denied_mask = AA_USER_EXEC | (l_x & AA_USER_EXEC_TYPE); + if ((l_mode & AA_OTHER_EXEC) && -+ (x & AA_OTHER_EXEC_TYPE) != (t_x & AA_OTHER_EXEC_TYPE)) -+ denied_mask = AA_OTHER_EXEC | (l_x & AA_OTHER_EXEC_TYPE); ++ (x & AA_OTHER_EXEC_TYPE) != (t_x & AA_OTHER_EXEC_TYPE)) { ++ denied_mask = ++ AA_OTHER_EXEC | (l_x & AA_OTHER_EXEC_TYPE); ++ } + } + + return denied_mask; @@ -5703,7 +5701,8 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + sprintf(name, "%s//%s", n1, n2); + return name; +} -+static inline void aa_put_name_buffer(char *buffer) ++ ++static void aa_put_name_buffer(char *buffer) +{ + kfree(buffer); +} @@ -6051,7 +6050,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + /* this is some debugging code to flush out the network hooks that + that are called in interrupt context */ + if (in_interrupt()) { -+ printk("AppArmor Debug: Hook being called from interrupt context\n"); ++ printk(KERN_WARNING "AppArmor Debug: Hook being called from interrupt context\n"); + dump_stack(); + return 0; + } @@ -6196,10 +6195,6 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + + unlock_profile(profile); + -+ if (APPARMOR_COMPLAIN(child_cxt) && -+ profile == profile->ns->null_complain_profile) { -+ aa_audit_hint(profile, &sa); -+ } + aa_put_profile(profile); + } else + aa_free_task_context(child_cxt); @@ -6208,7 +6203,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor +} + +static struct aa_profile * -+aa_register_find(struct aa_profile *profile, const char* ns_name, ++aa_register_find(struct aa_profile *profile, const char *ns_name, + const char *name, int mandatory, int complain, + struct aa_audit *sa) +{ @@ -6242,7 +6237,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + + if (new_profile) { + AA_DEBUG("%s: setting profile %s\n", -+ __FUNCTION__, new_profile->name); ++ __func__, new_profile->name); + } else if (mandatory && profile) { + sa->info = "mandatory profile missing"; + sa->denied_mask = sa->request_mask; /* shifted MAY_EXEC */ @@ -6261,8 +6256,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + * is unconfined, pix, nix. + */ + AA_DEBUG("%s: No profile found for exec image '%s'\n", -+ __FUNCTION__, -+ name); ++ __func__, name); + } + if (ns_ref) + aa_put_namespace(ns); @@ -6347,7 +6341,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + int exec_mode, complain = 0, shift; + struct aa_audit sa; + -+ AA_DEBUG("%s\n", __FUNCTION__); ++ AA_DEBUG("%s\n", __func__); + + profile = aa_get_profile(current); + @@ -6455,7 +6449,7 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + unsigned long bprm_flags; + + bprm_flags = AA_SECURE_EXEC_NEEDED; -+ bprm->security = (void*) ++ bprm->security = (void *) + ((unsigned long)bprm->security | bprm_flags); + } + @@ -6877,10 +6871,10 @@ diff -uprN linux-2.6.27./security/apparmor/main.c linux-2.6.27/security/apparmor + } + rcu_assign_pointer(task->security, new_cxt); +} -diff -uprN linux-2.6.27./security/apparmor/match.c linux-2.6.27/security/apparmor/match.c ---- linux-2.6.27./security/apparmor/match.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/match.c 2008-10-29 14:28:53.316113441 +0100 -@@ -0,0 +1,364 @@ +diff -uprN a/security/apparmor/match.c b/security/apparmor/match.c +--- a/security/apparmor/match.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/match.c 2009-02-08 13:26:38.949289615 +0000 +@@ -0,0 +1,363 @@ +/* + * Copyright (C) 2007 Novell/SUSE + * @@ -6965,7 +6959,7 @@ diff -uprN linux-2.6.27./security/apparmor/match.c linux-2.6.27/security/apparmo + if (!table) + goto fail; + -+ switch(table->td_id) { ++ switch (table->td_id) { + case YYTD_ID_ACCEPT: + case YYTD_ID_ACCEPT2: + case YYTD_ID_BASE: @@ -6998,10 +6992,8 @@ diff -uprN linux-2.6.27./security/apparmor/match.c linux-2.6.27/security/apparmo + +fail: + for (i = 0; i < ARRAY_SIZE(dfa->tables); i++) { -+ if (dfa->tables[i]) { -+ kfree(dfa->tables[i]); -+ dfa->tables[i] = NULL; -+ } ++ kfree(dfa->tables[i]); ++ dfa->tables[i] = NULL; + } + return error; +} @@ -7111,8 +7103,9 @@ diff -uprN linux-2.6.27./security/apparmor/match.c linux-2.6.27/security/apparmo + * but that would require traversing the string twice and be slightly + * slower. + */ -+unsigned int aa_dfa_next_state_len(struct aa_dfa *dfa, unsigned int start, -+ const char *str, int len) ++static unsigned int aa_dfa_next_state_len(struct aa_dfa *dfa, ++ unsigned int start, ++ const char *str, int len) +{ + u16 *def = DEFAULT_TABLE(dfa); + u32 *base = BASE_TABLE(dfa); @@ -7245,9 +7238,9 @@ diff -uprN linux-2.6.27./security/apparmor/match.c linux-2.6.27/security/apparmo + return 0; +} + -diff -uprN linux-2.6.27./security/apparmor/match.h linux-2.6.27/security/apparmor/match.h ---- linux-2.6.27./security/apparmor/match.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/match.h 2008-10-29 14:28:53.316113441 +0100 +diff -uprN a/security/apparmor/match.h b/security/apparmor/match.h +--- a/security/apparmor/match.h 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/match.h 2009-02-08 13:26:38.945950214 +0000 @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2007 Novell/SUSE @@ -7312,7 +7305,7 @@ diff -uprN linux-2.6.27./security/apparmor/match.h linux-2.6.27/security/apparmo +#define CHECK_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_CHK - 1]->td_data)) +#define EQUIV_TABLE(DFA) ((u8 *)((DFA)->tables[YYTD_ID_EC - 1]->td_data)) +#define ACCEPT_TABLE(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT - 1]->td_data)) -+#define ACCEPT_TABLE2(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT2 -1]->td_data)) ++#define ACCEPT_TABLE2(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT2 - 1]->td_data)) + +struct aa_dfa { + struct table_header *tables[YYTD_ID_NXT]; @@ -7336,10 +7329,10 @@ diff -uprN linux-2.6.27./security/apparmor/match.h linux-2.6.27/security/apparmo +} + +#endif /* __MATCH_H */ -diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/security/apparmor/module_interface.c ---- linux-2.6.27./security/apparmor/module_interface.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/module_interface.c 2008-10-29 14:28:53.329440939 +0100 -@@ -0,0 +1,967 @@ +diff -uprN a/security/apparmor/module_interface.c b/security/apparmor/module_interface.c +--- a/security/apparmor/module_interface.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/module_interface.c 2009-02-08 13:26:38.949289615 +0000 +@@ -0,0 +1,956 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE + * @@ -7403,7 +7396,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + char *ns_name; +}; + -+static inline int aa_inbounds(struct aa_ext *e, size_t size) ++static int aa_inbounds(struct aa_ext *e, size_t size) +{ + return (size <= e->end - e->pos); +} @@ -7436,7 +7429,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + return 0; +} + -+static inline int aa_is_X(struct aa_ext *e, enum aa_code code) ++static int aa_is_X(struct aa_ext *e, enum aa_code code) +{ + if (!aa_inbounds(e, 1)) + return 0; @@ -7468,7 +7461,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + * AA_NAME tag value is a u16. + */ + if (aa_is_X(e, AA_NAME)) { -+ char *tag; ++ char *tag = NULL; + size_t size = aa_is_u16_chunk(e, &tag); + /* if a name is specified it must match. otherwise skip tag */ + if (name && (!size || strcmp(name, tag))) @@ -7561,7 +7554,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + size = le32_to_cpu(get_unaligned((u32 *)e->pos)); + e->pos += sizeof(u32); + if (aa_inbounds(e, (size_t) size)) { -+ * blob = e->pos; ++ *blob = e->pos; + e->pos += size; + return size; + } @@ -7579,8 +7572,8 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + *string = NULL; + if (aa_is_nameX(e, AA_STRING, name) && + (size = aa_is_u16_chunk(e, &src_str))) { -+ char *str; -+ if (!(str = kmalloc(size, GFP_KERNEL))) ++ char *str = kmalloc(size, GFP_KERNEL); ++ if (!str) + goto fail; + memcpy(str, src_str, size); + *string = str; @@ -7711,7 +7704,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur +static struct aa_profile *aa_unpack_profile(struct aa_ext *e, + struct aa_audit *sa) +{ -+ struct aa_profile *profile = NULL; ++ struct aa_profile *profile; + size_t size = 0; + int i, error = -EPROTO; + @@ -7807,8 +7800,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + sa->info = "failed to unpack profile"; + aa_audit_status(NULL, sa); + -+ if (profile) -+ free_aa_profile(profile); ++ free_aa_profile(profile); + + return ERR_PTR(error); +} @@ -7837,9 +7829,8 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + } + + /* read the namespace if present */ -+ if (!aa_is_dynstring(e, &e->ns_name, "namespace")) { ++ if (!aa_is_dynstring(e, &e->ns_name, "namespace")) + e->ns_name = NULL; -+ } + + return 0; +} @@ -7851,7 +7842,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + */ +ssize_t aa_add_profile(void *data, size_t size) +{ -+ struct aa_profile *profile = NULL; ++ struct aa_profile *profile; + struct aa_namespace *ns = NULL; + struct aa_ext e = { + .start = data, @@ -7928,17 +7919,14 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + * @new_cxt: new aa_task_context to do replacement with + * @new_profile: new profile + */ -+static inline void task_replace(struct task_struct *task, ++static void task_replace(struct task_struct *task, + struct aa_task_context *new_cxt, + struct aa_profile *new_profile) +{ + struct aa_task_context *cxt = aa_task_context(task); + -+ AA_DEBUG("%s: replacing profile for task %d " -+ "profile=%s (%p)\n", -+ __FUNCTION__, -+ cxt->task->pid, -+ cxt->profile->name, cxt->profile); ++ AA_DEBUG("%s: replacing profile for task %d profile=%s (%p)\n", ++ __func__, cxt->task->pid, cxt->profile->name, cxt->profile); + + aa_change_task_context(task, new_cxt, new_profile, cxt->cookie, + cxt->previous_profile); @@ -8139,9 +8127,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + */ +void free_aa_namespace_kref(struct kref *kref) +{ -+ struct aa_namespace *ns=container_of(kref, struct aa_namespace, count); -+ -+ free_aa_namespace(ns); ++ free_aa_namespace(container_of(kref, struct aa_namespace, count)); +} + +/** @@ -8154,7 +8140,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + struct aa_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); -+ AA_DEBUG("%s(%p)\n", __FUNCTION__, ns); ++ AA_DEBUG("%s(%p)\n", __func__, ns); + if (ns) { + ns->name = name; + INIT_LIST_HEAD(&ns->list); @@ -8196,7 +8182,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + */ +void free_aa_namespace(struct aa_namespace *ns) +{ -+ AA_DEBUG("%s(%p)\n", __FUNCTION__, ns); ++ AA_DEBUG("%s(%p)\n", __func__, ns); + + if (!ns) + return; @@ -8205,15 +8191,12 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + if (!list_empty(&ns->profiles)) { + AA_ERROR("%s: internal error, " + "namespace '%s' still contains profiles\n", -+ __FUNCTION__, -+ ns->name); ++ __func__, ns->name); + BUG(); + } + if (!list_empty(&ns->list)) { -+ AA_ERROR("%s: internal error, " -+ "namespace '%s' still on list\n", -+ __FUNCTION__, -+ ns->name); ++ AA_ERROR("%s: internal error, namespace '%s' still on list\n", ++ __func__, ns->name); + BUG(); + } + /* null_complain_profile doesn't contribute to ns ref counting */ @@ -8229,7 +8212,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + */ +void free_aa_profile_kref(struct kref *kref) +{ -+ struct aa_profile *p=container_of(kref, struct aa_profile, count); ++ struct aa_profile *p = container_of(kref, struct aa_profile, count); + + free_aa_profile(p); +} @@ -8243,7 +8226,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + struct aa_profile *profile; + + profile = kzalloc(sizeof(*profile), GFP_KERNEL); -+ AA_DEBUG("%s(%p)\n", __FUNCTION__, profile); ++ AA_DEBUG("%s(%p)\n", __func__, profile); + if (profile) { + INIT_LIST_HEAD(&profile->list); + kref_init(&profile->count); @@ -8265,7 +8248,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + */ +void free_aa_profile(struct aa_profile *profile) +{ -+ AA_DEBUG("%s(%p)\n", __FUNCTION__, profile); ++ AA_DEBUG("%s(%p)\n", __func__, profile); + + if (!profile) + return; @@ -8274,8 +8257,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + if (!list_empty(&profile->list)) { + AA_ERROR("%s: internal error, " + "profile '%s' still on global list\n", -+ __FUNCTION__, -+ profile->name); ++ __func__, profile->name); + BUG(); + } + aa_put_namespace(profile->ns); @@ -8283,7 +8265,7 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + aa_match_free(profile->file_rules); + + if (profile->name) { -+ AA_DEBUG("%s: %s\n", __FUNCTION__, profile->name); ++ AA_DEBUG("%s: %s\n", __func__, profile->name); + kfree(profile->name); + } + @@ -8307,10 +8289,10 @@ diff -uprN linux-2.6.27./security/apparmor/module_interface.c linux-2.6.27/secur + task_unlock(task); + } +} -diff -uprN linux-2.6.27./security/apparmor/procattr.c linux-2.6.27/security/apparmor/procattr.c ---- linux-2.6.27./security/apparmor/procattr.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.27/security/apparmor/procattr.c 2008-10-29 14:28:53.319441671 +0100 -@@ -0,0 +1,195 @@ +diff -uprN a/security/apparmor/procattr.c b/security/apparmor/procattr.c +--- a/security/apparmor/procattr.c 1970-01-01 00:00:00.000000000 +0000 ++++ b/security/apparmor/procattr.c 2009-02-08 13:26:38.945950214 +0000 +@@ -0,0 +1,194 @@ +/* + * Copyright (C) 1998-2007 Novell/SUSE + * @@ -8401,7 +8383,7 @@ diff -uprN linux-2.6.27./security/apparmor/procattr.c linux-2.6.27/security/appa + } + + AA_DEBUG("%s: Magic 0x%llx Hat '%s'\n", -+ __FUNCTION__, cookie, hat ? hat : NULL); ++ __func__, cookie, hat ? hat : NULL); + + return aa_change_hat(hat, cookie); +} @@ -8434,8 +8416,7 @@ diff -uprN linux-2.6.27./security/apparmor/procattr.c linux-2.6.27/security/appa + sa.gfp_mask = GFP_KERNEL; + sa.task = task->pid; + -+ AA_DEBUG("%s: current %d\n", -+ __FUNCTION__, current->pid); ++ AA_DEBUG("%s: current %d\n", __func__, current->pid); + + name = args; + if (args[0] != '/') { @@ -8506,9 +8487,9 @@ diff -uprN linux-2.6.27./security/apparmor/procattr.c linux-2.6.27/security/appa + aa_put_profile(new_profile); + return 0; +} -diff -uprN linux-2.6.27./security/capability.c linux-2.6.27/security/capability.c ---- linux-2.6.27./security/capability.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/capability.c 2008-10-29 14:28:53.302774553 +0100 +diff -uprN a/security/capability.c b/security/capability.c +--- a/security/capability.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/capability.c 2009-02-08 13:26:38.902622774 +0000 @@ -155,52 +155,56 @@ static int cap_inode_init_security(struc } @@ -8630,9 +8611,9 @@ diff -uprN linux-2.6.27./security/capability.c linux-2.6.27/security/capability. set_to_cap_if_null(ops, task_create); set_to_cap_if_null(ops, task_alloc_security); set_to_cap_if_null(ops, task_free_security); -diff -uprN linux-2.6.27./security/commoncap.c linux-2.6.27/security/commoncap.c ---- linux-2.6.27./security/commoncap.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/commoncap.c 2008-10-29 14:28:53.296107952 +0100 +diff -uprN a/security/commoncap.c b/security/commoncap.c +--- a/security/commoncap.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/commoncap.c 2009-02-08 13:26:38.895950162 +0000 @@ -411,8 +411,9 @@ int cap_bprm_secureexec (struct linux_bi current->egid != current->gid); } @@ -8655,10 +8636,19 @@ diff -uprN linux-2.6.27./security/commoncap.c linux-2.6.27/security/commoncap.c { if (!strcmp(name, XATTR_NAME_CAPS)) { if (!capable(CAP_SETFCAP)) -diff -uprN linux-2.6.27./security/security.c linux-2.6.27/security/security.c ---- linux-2.6.27./security/security.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/security.c 2008-10-29 14:28:53.332780504 +0100 -@@ -358,72 +358,81 @@ int security_inode_init_security(struct +diff -uprN a/security/security.c b/security/security.c +--- a/security/security.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/security.c 2009-02-08 13:26:38.932616900 +0000 +@@ -18,7 +18,7 @@ + #include + + /* Boot-time LSM user choice */ +-static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1]; ++static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_SECURITY_DEFAULT; + + /* things that live in capability.c */ + extern struct security_operations default_security_ops; +@@ -367,72 +367,81 @@ int security_inode_init_security(struct } EXPORT_SYMBOL(security_inode_init_security); @@ -8760,7 +8750,7 @@ diff -uprN linux-2.6.27./security/security.c linux-2.6.27/security/security.c } int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd) -@@ -440,11 +449,12 @@ int security_inode_permission(struct ino +@@ -449,11 +458,12 @@ int security_inode_permission(struct ino return security_ops->inode_permission(inode, mask); } @@ -8775,7 +8765,7 @@ diff -uprN linux-2.6.27./security/security.c linux-2.6.27/security/security.c } EXPORT_SYMBOL_GPL(security_inode_setattr); -@@ -462,41 +472,48 @@ void security_inode_delete(struct inode +@@ -471,41 +481,48 @@ void security_inode_delete(struct inode security_ops->inode_delete(inode); } @@ -8836,7 +8826,7 @@ diff -uprN linux-2.6.27./security/security.c linux-2.6.27/security/security.c } int security_inode_need_killpriv(struct dentry *dentry) -@@ -599,6 +616,15 @@ int security_dentry_open(struct file *fi +@@ -608,6 +625,15 @@ int security_dentry_open(struct file *fi return security_ops->dentry_open(file); } @@ -8852,10 +8842,10 @@ diff -uprN linux-2.6.27./security/security.c linux-2.6.27/security/security.c int security_task_create(unsigned long clone_flags) { return security_ops->task_create(clone_flags); -diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/hooks.c ---- linux-2.6.27./security/selinux/hooks.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/selinux/hooks.c 2008-10-29 14:28:53.299441785 +0100 -@@ -1811,40 +1811,16 @@ static int selinux_capable(struct task_s +diff -uprN a/security/selinux/hooks.c b/security/selinux/hooks.c +--- a/security/selinux/hooks.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/selinux/hooks.c 2009-02-08 13:26:38.902622774 +0000 +@@ -1814,40 +1814,16 @@ static int selinux_capable(struct task_s static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid) { @@ -8901,7 +8891,7 @@ diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/ free_page((unsigned long)buffer); out: return rc; -@@ -2566,64 +2542,79 @@ static int selinux_inode_init_security(s +@@ -2564,64 +2540,79 @@ static int selinux_inode_init_security(s return 0; } @@ -8994,7 +8984,7 @@ diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/ { return dentry_has_perm(current, NULL, dentry, FILE__READ); } -@@ -2655,11 +2646,12 @@ static int selinux_inode_permission(stru +@@ -2653,11 +2644,12 @@ static int selinux_inode_permission(stru open_file_mask_to_av(inode->i_mode, mask), NULL); } @@ -9009,7 +8999,7 @@ diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/ if (rc) return rc; -@@ -2697,8 +2689,9 @@ static int selinux_inode_setotherxattr(s +@@ -2695,8 +2687,9 @@ static int selinux_inode_setotherxattr(s return dentry_has_perm(current, NULL, dentry, FILE__SETATTR); } @@ -9021,7 +9011,7 @@ diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/ { struct task_security_struct *tsec = current->security; struct inode *inode = dentry->d_inode; -@@ -2752,7 +2745,8 @@ static int selinux_inode_setxattr(struct +@@ -2750,7 +2743,8 @@ static int selinux_inode_setxattr(struct &ad); } @@ -9031,7 +9021,7 @@ diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/ const void *value, size_t size, int flags) { -@@ -2778,17 +2772,21 @@ static void selinux_inode_post_setxattr( +@@ -2776,17 +2770,21 @@ static void selinux_inode_post_setxattr( return; } @@ -9056,9 +9046,9 @@ diff -uprN linux-2.6.27./security/selinux/hooks.c linux-2.6.27/security/selinux/ { if (strcmp(name, XATTR_NAME_SELINUX)) return selinux_inode_setotherxattr(dentry, name); -diff -uprN linux-2.6.27./security/smack/smack_lsm.c linux-2.6.27/security/smack/smack_lsm.c ---- linux-2.6.27./security/smack/smack_lsm.c 2008-10-10 00:13:53.000000000 +0200 -+++ linux-2.6.27/security/smack/smack_lsm.c 2008-10-29 14:28:53.296107952 +0100 +diff -uprN a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c +--- a/security/smack/smack_lsm.c 2008-12-24 23:26:37.000000000 +0000 ++++ b/security/smack/smack_lsm.c 2009-02-08 13:26:38.899283105 +0000 @@ -432,8 +432,9 @@ static int smack_inode_init_security(str * * Returns 0 if access is permitted, an error code otherwise @@ -9228,3 +9218,21 @@ diff -uprN linux-2.6.27./security/smack/smack_lsm.c linux-2.6.27/security/smack/ if (rc == 0) rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); +--- + security/apparmor/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -503,10 +503,10 @@ static char *aa_get_name(struct dentry * + *buffer = buf; + return name; + } ++ kfree(buf); + if (PTR_ERR(name) != -ENAMETOOLONG) + return name; + +- kfree(buf); + size <<= 1; + if (size > apparmor_path_max) + return ERR_PTR(-ENAMETOOLONG); diff --git a/kernel-apparmor.patch b/kernel-apparmor.patch new file mode 100644 index 00000000..049b8588 --- /dev/null +++ b/kernel-apparmor.patch @@ -0,0 +1,9063 @@ +diff -uprN e/fs/afs/dir.c f/fs/afs/dir.c +--- e/fs/afs/dir.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/afs/dir.c 2008-05-28 20:29:29.410207000 +0000 +@@ -45,6 +45,7 @@ const struct file_operations afs_dir_fil + .release = afs_release, + .readdir = afs_readdir, + .lock = afs_lock, ++ .fsetattr = afs_fsetattr, + }; + + const struct inode_operations afs_dir_inode_operations = { +diff -uprN e/fs/afs/file.c f/fs/afs/file.c +--- e/fs/afs/file.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/afs/file.c 2008-05-28 20:29:29.410207000 +0000 +@@ -36,6 +36,7 @@ const struct file_operations afs_file_op + .fsync = afs_fsync, + .lock = afs_lock, + .flock = afs_flock, ++ .fsetattr = afs_fsetattr, + }; + + const struct inode_operations afs_file_inode_operations = { +diff -uprN e/fs/afs/inode.c f/fs/afs/inode.c +--- e/fs/afs/inode.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/afs/inode.c 2008-05-28 20:29:29.410207000 +0000 +@@ -358,7 +358,8 @@ void afs_clear_inode(struct inode *inode + /* + * set the attributes of an inode + */ +-int afs_setattr(struct dentry *dentry, struct iattr *attr) ++static int afs_do_setattr(struct dentry *dentry, struct iattr *attr, ++ struct file *file) + { + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct key *key; +@@ -380,8 +381,8 @@ int afs_setattr(struct dentry *dentry, s + afs_writeback_all(vnode); + } + +- if (attr->ia_valid & ATTR_FILE) { +- key = attr->ia_file->private_data; ++ if (file) { ++ key = file->private_data; + } else { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { +@@ -391,10 +392,20 @@ int afs_setattr(struct dentry *dentry, s + } + + ret = afs_vnode_setattr(vnode, key, attr); +- if (!(attr->ia_valid & ATTR_FILE)) ++ if (!file) + key_put(key); + + error: + _leave(" = %d", ret); + return ret; + } ++ ++int afs_setattr(struct dentry *dentry, struct iattr *attr) ++{ ++ return afs_do_setattr(dentry, attr, NULL); ++} ++ ++int afs_fsetattr(struct file *file, struct iattr *attr) ++{ ++ return afs_do_setattr(file->f_path.dentry, attr, file); ++} +diff -uprN e/fs/afs/internal.h f/fs/afs/internal.h +--- e/fs/afs/internal.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/afs/internal.h 2008-05-28 20:29:29.410207000 +0000 +@@ -550,6 +550,7 @@ extern void afs_zap_data(struct afs_vnod + extern int afs_validate(struct afs_vnode *, struct key *); + extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); + extern int afs_setattr(struct dentry *, struct iattr *); ++extern int afs_fsetattr(struct file *, struct iattr *); + extern void afs_clear_inode(struct inode *); + + /* +diff -uprN e/fs/attr.c f/fs/attr.c +--- e/fs/attr.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/attr.c 2008-05-28 20:29:29.410207000 +0000 +@@ -100,7 +100,8 @@ int inode_setattr(struct inode * inode, + } + EXPORT_SYMBOL(inode_setattr); + +-int notify_change(struct dentry * dentry, struct iattr * attr) ++int fnotify_change(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr, struct file *file) + { + struct inode *inode = dentry->d_inode; + mode_t mode = inode->i_mode; +@@ -158,13 +159,17 @@ int notify_change(struct dentry * dentry + down_write(&dentry->d_inode->i_alloc_sem); + + if (inode->i_op && inode->i_op->setattr) { +- error = security_inode_setattr(dentry, attr); +- if (!error) +- error = inode->i_op->setattr(dentry, attr); ++ error = security_inode_setattr(dentry, mnt, attr); ++ if (!error) { ++ if (file && file->f_op && file->f_op->fsetattr) ++ error = file->f_op->fsetattr(file, attr); ++ else ++ error = inode->i_op->setattr(dentry, attr); ++ } + } else { + error = inode_change_ok(inode, attr); + if (!error) +- error = security_inode_setattr(dentry, attr); ++ error = security_inode_setattr(dentry, mnt, attr); + if (!error) { + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) +@@ -182,5 +187,12 @@ int notify_change(struct dentry * dentry + + return error; + } ++EXPORT_SYMBOL_GPL(fnotify_change); ++ ++int notify_change(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr) ++{ ++ return fnotify_change(dentry, mnt, attr, NULL); ++} + + EXPORT_SYMBOL(notify_change); +diff -uprN e/fs/dcache.c f/fs/dcache.c +--- e/fs/dcache.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/dcache.c 2008-05-28 20:29:29.410207000 +0000 +@@ -1747,86 +1747,118 @@ shouldnt_be_hashed: + } + + /** +- * d_path - return the path of a dentry ++ * __d_path - return the path of a dentry + * @dentry: dentry to report + * @vfsmnt: vfsmnt to which the dentry belongs + * @root: root dentry + * @rootmnt: vfsmnt to which the root dentry belongs + * @buffer: buffer to return value in + * @buflen: buffer length ++ * @fail_deleted: what to return for deleted files ++ * @disconnect: don't return a path starting with / when disconnected + * +- * Convert a dentry into an ASCII path name. If the entry has been deleted ++ * Convert a dentry into an ASCII path name. If the entry has been deleted, ++ * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise, + * the string " (deleted)" is appended. Note that this is ambiguous. + * + * Returns the buffer or an error code if the path was too long. ++ * If @dentry is not connected to @root, the path returned will be relative ++ * (i.e., it will not start with a slash). + * +- * "buflen" should be positive. Caller holds the dcache_lock. ++ * Returns the buffer or an error code. + */ +-static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, +- struct path *root, char *buffer, int buflen) ++char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, ++ struct path *root, char *buffer, int buflen, ++ int fail_deleted, int disconnect) + { +- char * end = buffer+buflen; +- char * retval; +- int namelen; ++ int namelen, vfsmount_locked = 0; ++ const unsigned char *name; ++ ++ if (buflen < 2) ++ return ERR_PTR(-ENAMETOOLONG); ++ buffer += --buflen; ++ *buffer = '\0'; + +- *--end = '\0'; +- buflen--; ++ spin_lock(&dcache_lock); + if (!IS_ROOT(dentry) && d_unhashed(dentry)) { +- buflen -= 10; +- end -= 10; +- if (buflen < 0) ++ if (fail_deleted) { ++ buffer = ERR_PTR(-ENOENT); ++ goto out; ++ } ++ if (buflen < 10) + goto Elong; +- memcpy(end, " (deleted)", 10); ++ buflen -= 10; ++ buffer -= 10; ++ memcpy(buffer, " (deleted)", 10); + } + +- if (buflen < 1) +- goto Elong; +- /* Get '/' right */ +- retval = end-1; +- *retval = '/'; +- +- for (;;) { ++ while (dentry != root->dentry || vfsmnt != root->mnt) { + struct dentry * parent; + +- if (dentry == root->dentry && vfsmnt == root->mnt) +- break; + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { +- /* Global root? */ +- spin_lock(&vfsmount_lock); +- if (vfsmnt->mnt_parent == vfsmnt) { +- spin_unlock(&vfsmount_lock); +- goto global_root; ++ if (!vfsmount_locked) { ++ spin_lock(&vfsmount_lock); ++ vfsmount_locked = 1; + } ++ if (vfsmnt->mnt_parent == vfsmnt) ++ goto global_root; + dentry = vfsmnt->mnt_mountpoint; + vfsmnt = vfsmnt->mnt_parent; +- spin_unlock(&vfsmount_lock); + continue; + } + parent = dentry->d_parent; + prefetch(parent); + namelen = dentry->d_name.len; +- buflen -= namelen + 1; +- if (buflen < 0) ++ if (buflen < namelen + 1) + goto Elong; +- end -= namelen; +- memcpy(end, dentry->d_name.name, namelen); +- *--end = '/'; +- retval = end; ++ buflen -= namelen + 1; ++ buffer -= namelen; ++ memcpy(buffer, dentry->d_name.name, namelen); ++ *--buffer = '/'; + dentry = parent; + } ++ /* Get '/' right. */ ++ if (*buffer != '/') ++ *--buffer = '/'; + +- return retval; ++out: ++ if (vfsmount_locked) ++ spin_unlock(&vfsmount_lock); ++ spin_unlock(&dcache_lock); ++ return buffer; + + global_root: ++ /* ++ * We went past the (vfsmount, dentry) we were looking for and have ++ * either hit a root dentry, a lazily unmounted dentry, an ++ * unconnected dentry, or the file is on a pseudo filesystem. ++ */ + namelen = dentry->d_name.len; +- buflen -= namelen; +- if (buflen < 0) ++ name = dentry->d_name.name; ++ ++ /* ++ * If this is a root dentry, then overwrite the slash. This ++ * will also DTRT with pseudo filesystems which have root ++ * dentries named "foo:". ++ */ ++ if (IS_ROOT(dentry)) { ++ buffer++; ++ buflen++; ++ } ++ if (disconnect && *name == '/') { ++ /* Make sure we won't return a pathname starting with '/' */ ++ name++; ++ namelen--; ++ } ++ if (buflen < namelen) + goto Elong; +- retval -= namelen-1; /* hit the slash */ +- memcpy(retval, dentry->d_name.name, namelen); +- return retval; ++ buffer -= namelen; ++ memcpy(buffer, dentry->d_name.name, namelen); ++ goto out; ++ + Elong: +- return ERR_PTR(-ENAMETOOLONG); ++ buffer = ERR_PTR(-ENAMETOOLONG); ++ goto out; + } + + /** +@@ -1861,9 +1893,7 @@ char *d_path(struct path *path, char *bu + root = current->fs->root; + path_get(¤t->fs->root); + read_unlock(¤t->fs->lock); +- spin_lock(&dcache_lock); +- res = __d_path(path->dentry, path->mnt, &root, buf, buflen); +- spin_unlock(&dcache_lock); ++ res = __d_path(path->dentry, path->mnt, &root, buf, buflen, 0, 0); + path_put(&root); + return res; + } +@@ -1909,9 +1939,9 @@ char *dynamic_dname(struct dentry *dentr + */ + asmlinkage long sys_getcwd(char __user *buf, unsigned long size) + { +- int error; ++ int error, len; + struct path pwd, root; +- char *page = (char *) __get_free_page(GFP_USER); ++ char *page = (char *) __get_free_page(GFP_USER), *cwd; + + if (!page) + return -ENOMEM; +@@ -1923,29 +1953,18 @@ asmlinkage long sys_getcwd(char __user * + path_get(¤t->fs->root); + read_unlock(¤t->fs->lock); + +- error = -ENOENT; +- /* Has the current directory has been unlinked? */ +- spin_lock(&dcache_lock); +- if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { +- unsigned long len; +- char * cwd; +- +- cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE); +- spin_unlock(&dcache_lock); +- +- error = PTR_ERR(cwd); +- if (IS_ERR(cwd)) +- goto out; +- +- error = -ERANGE; +- len = PAGE_SIZE + page - cwd; +- if (len <= size) { +- error = len; +- if (copy_to_user(buf, cwd, len)) +- error = -EFAULT; +- } +- } else +- spin_unlock(&dcache_lock); ++ cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE, 1, 0); ++ error = PTR_ERR(cwd); ++ if (IS_ERR(cwd)) ++ goto out; ++ ++ error = -ERANGE; ++ len = PAGE_SIZE + page - cwd; ++ if (len <= size) { ++ error = len; ++ if (copy_to_user(buf, cwd, len)) ++ error = -EFAULT; ++ } + + out: + path_put(&pwd); +diff -uprN e/fs/ecryptfs/inode.c f/fs/ecryptfs/inode.c +--- e/fs/ecryptfs/inode.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/ecryptfs/inode.c 2008-05-28 20:29:28.910241000 +0000 +@@ -388,19 +388,24 @@ static int ecryptfs_link(struct dentry * + struct dentry *new_dentry) + { + struct dentry *lower_old_dentry; ++ struct vfsmount *lower_old_mnt; + struct dentry *lower_new_dentry; ++ struct vfsmount *lower_new_mnt; + struct dentry *lower_dir_dentry; + u64 file_size_save; + int rc; + + file_size_save = i_size_read(old_dentry->d_inode); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); ++ lower_old_mnt = ecryptfs_dentry_to_lower_mnt(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); ++ lower_new_mnt = ecryptfs_dentry_to_lower_mnt(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_dir_dentry = lock_parent(lower_new_dentry); +- rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, +- lower_new_dentry, NULL); ++ rc = vfs_link(lower_old_dentry, lower_old_mnt, ++ lower_dir_dentry->d_inode, lower_new_dentry, ++ lower_new_mnt, NULL); + if (rc || !lower_new_dentry->d_inode) + goto out_lock; + rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); +@@ -425,10 +430,11 @@ static int ecryptfs_unlink(struct inode + int rc = 0; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; + + lower_dir_dentry = lock_parent(lower_dentry); +- rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); ++ rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL, lower_mnt); + if (rc) { + printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); + goto out_unlock; +@@ -448,6 +454,7 @@ static int ecryptfs_symlink(struct inode + { + int rc; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + umode_t mode; + char *encoded_symname; +@@ -456,6 +463,7 @@ static int ecryptfs_symlink(struct inode + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + dget(lower_dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + mode = S_IALLUGO; + encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, +@@ -465,7 +473,7 @@ static int ecryptfs_symlink(struct inode + rc = encoded_symlen; + goto out_lock; + } +- rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, ++ rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, + encoded_symname, mode, NULL); + kfree(encoded_symname); + if (rc || !lower_dentry->d_inode) +@@ -487,11 +495,14 @@ static int ecryptfs_mkdir(struct inode * + { + int rc; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_dir_dentry = lock_parent(lower_dentry); +- rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode, NULL); ++ rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, ++ mode, NULL); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); +@@ -510,14 +521,16 @@ out: + static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) + { + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + int rc; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + dget(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + dget(lower_dentry); +- rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry, NULL); ++ rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry, NULL, lower_mnt); + dput(lower_dentry); + if (!rc) + d_delete(lower_dentry); +@@ -535,11 +548,14 @@ ecryptfs_mknod(struct inode *dir, struct + { + int rc; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_dir_dentry = lock_parent(lower_dentry); +- rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev, NULL); ++ rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, mode, ++ dev, NULL); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); +@@ -560,19 +576,24 @@ ecryptfs_rename(struct inode *old_dir, s + { + int rc; + struct dentry *lower_old_dentry; ++ struct vfsmount *lower_old_mnt; + struct dentry *lower_new_dentry; ++ struct vfsmount *lower_new_mnt; + struct dentry *lower_old_dir_dentry; + struct dentry *lower_new_dir_dentry; + + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); ++ lower_old_mnt = ecryptfs_dentry_to_lower_mnt(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); ++ lower_new_mnt = ecryptfs_dentry_to_lower_mnt(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, +- lower_new_dir_dentry->d_inode, lower_new_dentry); ++ lower_old_mnt, lower_new_dir_dentry->d_inode, ++ lower_new_dentry, lower_new_mnt); + if (rc) + goto out_lock; + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); +@@ -848,6 +869,7 @@ static int ecryptfs_setattr(struct dentr + { + int rc = 0; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct inode *inode; + struct inode *lower_inode; + struct ecryptfs_crypt_stat *crypt_stat; +@@ -858,6 +880,7 @@ static int ecryptfs_setattr(struct dentr + inode = dentry->d_inode; + lower_inode = ecryptfs_inode_to_lower(inode); + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + mutex_lock(&crypt_stat->cs_mutex); + if (S_ISDIR(dentry->d_inode->i_mode)) + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); +@@ -927,7 +927,7 @@ + ia->ia_valid &= ~ATTR_MODE; + + mutex_lock(&lower_dentry->d_inode->i_mutex); +- rc = notify_change(lower_dentry, ia); ++ rc = notify_change(lower_dentry, lower_mnt, ia); + mutex_unlock(&lower_dentry->d_inode->i_mutex); + out: + fsstack_copy_attr_all(inode, lower_inode); +diff -uprN e/fs/exec.c f/fs/exec.c +--- e/fs/exec.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/exec.c 2008-05-28 20:29:28.910241000 +0000 +@@ -1777,7 +1777,8 @@ int do_coredump(long signr, int exit_cod + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) ++ if (!ispipe && ++ do_truncate(file->f_path.dentry, file->f_path.mnt, 0, 0, file) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file, core_limit); +diff -uprN e/fs/fat/file.c f/fs/fat/file.c +--- e/fs/fat/file.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/fat/file.c 2008-05-28 20:29:28.910241000 +0000 +@@ -92,7 +92,7 @@ int fat_generic_ioctl(struct inode *inod + } + + /* This MUST be done before doing anything irreversible... */ +- err = notify_change(filp->f_path.dentry, &ia); ++ err = notify_change(filp->f_path.dentry, filp->f_path.mnt, &ia); + if (err) + goto up; + +diff -uprN e/fs/fuse/dir.c f/fs/fuse/dir.c +--- e/fs/fuse/dir.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/fuse/dir.c 2008-05-28 20:29:29.410207000 +0000 +@@ -1064,21 +1064,22 @@ static int fuse_dir_fsync(struct file *f + return file ? fuse_fsync_common(file, de, datasync, 1) : 0; + } + +-static bool update_mtime(unsigned ivalid) ++static bool update_mtime(unsigned ivalid, bool have_file) + { + /* Always update if mtime is explicitly set */ + if (ivalid & ATTR_MTIME_SET) + return true; + + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ +- if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) ++ if ((ivalid & ATTR_SIZE) && ((ivalid & ATTR_OPEN) || have_file)) + return false; + + /* In all other cases update */ + return true; + } + +-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) ++static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, ++ bool have_file) + { + unsigned ivalid = iattr->ia_valid; + +@@ -1097,7 +1098,7 @@ static void iattr_to_fattr(struct iattr + if (!(ivalid & ATTR_ATIME_SET)) + arg->valid |= FATTR_ATIME_NOW; + } +- if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { ++ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, have_file)) { + arg->valid |= FATTR_MTIME; + arg->mtime = iattr->ia_mtime.tv_sec; + arg->mtimensec = iattr->ia_mtime.tv_nsec; +@@ -1114,8 +1115,8 @@ static void iattr_to_fattr(struct iattr + * vmtruncate() doesn't allow for this case, so do the rlimit checking + * and the actual truncation by hand. + */ +-static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, +- struct file *file) ++int fuse_do_setattr(struct dentry *entry, struct iattr *attr, ++ struct file *file) + { + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); +@@ -1153,7 +1154,7 @@ static int fuse_do_setattr(struct dentry + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); +- iattr_to_fattr(attr, &inarg); ++ iattr_to_fattr(attr, &inarg, file != NULL); + if (file) { + struct fuse_file *ff = file->private_data; + inarg.valid |= FATTR_FH; +@@ -1195,10 +1196,7 @@ static int fuse_do_setattr(struct dentry + + static int fuse_setattr(struct dentry *entry, struct iattr *attr) + { +- if (attr->ia_valid & ATTR_FILE) +- return fuse_do_setattr(entry, attr, attr->ia_file); +- else +- return fuse_do_setattr(entry, attr, NULL); ++ return fuse_do_setattr(entry, attr, NULL); + } + + static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, +diff -uprN e/fs/fuse/file.c f/fs/fuse/file.c +--- e/fs/fuse/file.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/fuse/file.c 2008-05-28 20:29:29.410207000 +0000 +@@ -909,6 +909,11 @@ static sector_t fuse_bmap(struct address + return err ? 0 : outarg.block; + } + ++static int fuse_fsetattr(struct file *file, struct iattr *attr) ++{ ++ return fuse_do_setattr(file->f_path.dentry, attr, file); ++} ++ + static const struct file_operations fuse_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, +@@ -922,6 +927,7 @@ static const struct file_operations fuse + .fsync = fuse_fsync, + .lock = fuse_file_lock, + .flock = fuse_file_flock, ++ .fsetattr = fuse_fsetattr, + .splice_read = generic_file_splice_read, + }; + +@@ -935,6 +941,7 @@ static const struct file_operations fuse + .fsync = fuse_fsync, + .lock = fuse_file_lock, + .flock = fuse_file_flock, ++ .fsetattr = fuse_fsetattr, + /* no mmap and splice_read */ + }; + +diff -uprN e/fs/fuse/fuse_i.h f/fs/fuse/fuse_i.h +--- e/fs/fuse/fuse_i.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/fuse/fuse_i.h 2008-05-28 20:29:29.410207000 +0000 +@@ -509,6 +509,10 @@ void fuse_change_attributes(struct inode + */ + int fuse_dev_init(void); + ++ ++int fuse_do_setattr(struct dentry *entry, struct iattr *attr, ++ struct file *file); ++ + /** + * Cleanup the client device + */ +diff -uprN e/fs/hpfs/namei.c f/fs/hpfs/namei.c +--- e/fs/hpfs/namei.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/hpfs/namei.c 2008-05-28 20:29:28.910241000 +0000 +@@ -426,7 +426,7 @@ again: + /*printk("HPFS: truncating file before delete.\n");*/ + newattrs.ia_size = 0; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ err = notify_change(dentry, NULL, &newattrs); + put_write_access(inode); + if (!err) + goto again; +diff -uprN e/fs/namei.c f/fs/namei.c +--- e/fs/namei.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/namei.c 2008-05-28 20:29:29.410207000 +0000 +@@ -313,7 +313,12 @@ int vfs_permission(struct nameidata *nd, + */ + int file_permission(struct file *file, int mask) + { +- return permission(file->f_path.dentry->d_inode, mask, NULL); ++ struct nameidata nd; ++ ++ nd.path = file->f_path; ++ nd.flags = LOOKUP_ACCESS; ++ ++ return permission(nd.path.dentry->d_inode, mask, &nd); + } + + /* +@@ -1150,24 +1155,21 @@ static int do_path_lookup(int dfd, const + path_get(&fs->pwd); + read_unlock(&fs->lock); + } else { +- struct dentry *dentry; +- + file = fget_light(dfd, &fput_needed); + retval = -EBADF; + if (!file) + goto out_fail; + +- dentry = file->f_path.dentry; ++ nd->path = file->f_path; + + retval = -ENOTDIR; +- if (!S_ISDIR(dentry->d_inode->i_mode)) ++ if (!S_ISDIR(nd->path.dentry->d_inode->i_mode)) + goto fput_fail; + + retval = file_permission(file, MAY_EXEC); + if (retval) + goto fput_fail; + +- nd->path = file->f_path; + path_get(&file->f_path); + + fput_light(file, fput_needed); +@@ -1511,6 +1513,8 @@ static inline int may_create(struct inod + return -EEXIST; + if (IS_DEADDIR(dir)) + return -ENOENT; ++ if (nd) ++ nd->flags |= LOOKUP_CONTINUE; + return permission(dir,MAY_WRITE | MAY_EXEC, nd); + } + +@@ -1586,7 +1590,7 @@ int vfs_create(struct inode *dir, struct + return -EACCES; /* shouldn't it be ENOSYS? */ + mode &= S_IALLUGO; + mode |= S_IFREG; +- error = security_inode_create(dir, dentry, mode); ++ error = security_inode_create(dir, dentry, nd ? nd->path.mnt : NULL, mode); + if (error) + return error; + DQUOT_INIT(dir); +@@ -1663,7 +1667,7 @@ int may_open(struct nameidata *nd, int a + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0, ++ error = do_truncate(dentry, nd->path.mnt, 0, + ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, + NULL); + } +@@ -1921,8 +1925,8 @@ fail: + } + EXPORT_SYMBOL_GPL(lookup_create); + +-int vfs_mknod(struct inode *dir, struct dentry *dentry, +- int mode, dev_t dev, struct nameidata *nd) ++int vfs_mknod(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt, ++ int mode, dev_t dev, struct nameidata *nd) + { + int error = may_create(dir, dentry, NULL); + +@@ -1934,7 +1939,7 @@ int vfs_mknod(struct inode *dir, struct + if (!dir->i_op || !dir->i_op->mknod) + return -EPERM; + +- error = security_inode_mknod(dir, dentry, mode, dev); ++ error = security_inode_mknod(dir, dentry, mnt, mode, dev); + if (error) + return error; + +@@ -1973,12 +1978,12 @@ asmlinkage long sys_mknodat(int dfd, con + error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); + break; + case S_IFCHR: case S_IFBLK: +- error = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, +- new_decode_dev(dev), &nd); ++ error = vfs_mknod(nd.path.dentry->d_inode, dentry, ++ nd.path.mnt, mode, new_decode_dev(dev), &nd); + break; + case S_IFIFO: case S_IFSOCK: +- error = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, +- 0, &nd); ++ error = vfs_mknod(nd.path.dentry->d_inode, dentry, ++ nd.path.mnt, mode, 0, &nd); + break; + case S_IFDIR: + error = -EPERM; +@@ -2000,8 +2006,8 @@ asmlinkage long sys_mknod(const char __u + return sys_mknodat(AT_FDCWD, filename, mode, dev); + } + +-int vfs_mkdir(struct inode *dir, struct dentry *dentry, +- int mode, struct nameidata *nd) ++int vfs_mkdir(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt, ++ int mode, struct nameidata *nd) + { + int error = may_create(dir, dentry, nd); + +@@ -2011,7 +2018,7 @@ int vfs_mkdir(struct inode *dir, struct + return -EPERM; + + mode &= (S_IRWXUGO|S_ISVTX); +- error = security_inode_mkdir(dir, dentry, mode); ++ error = security_inode_mkdir(dir, dentry, mnt, mode); + if (error) + return error; + +@@ -2044,7 +2051,7 @@ asmlinkage long sys_mkdirat(int dfd, con + + if (!IS_POSIXACL(nd.path.dentry->d_inode)) + mode &= ~current->fs->umask; +- error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode, &nd); ++ error = vfs_mkdir(nd.path.dentry->d_inode, dentry, nd.path.mnt, mode, &nd); + dput(dentry); + out_unlock: + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); +@@ -2087,8 +2094,8 @@ void dentry_unhash(struct dentry *dentry + spin_unlock(&dcache_lock); + } + +-int vfs_rmdir(struct inode *dir, struct dentry *dentry, +- struct nameidata *nd) ++int vfs_rmdir(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd, struct vfsmount *mnt) + { + int error = may_delete(dir, dentry, 1); + +@@ -2097,6 +2104,10 @@ int vfs_rmdir(struct inode *dir, struct + if (!dir->i_op || !dir->i_op->rmdir) + return -EPERM; + ++ error = security_inode_rmdir(dir, dentry, mnt); ++ if (error) ++ return error; ++ + DQUOT_INIT(dir); + + mutex_lock(&dentry->d_inode->i_mutex); +@@ -2104,12 +2115,9 @@ int vfs_rmdir(struct inode *dir, struct + if (d_mountpoint(dentry)) + error = -EBUSY; + else { +- error = security_inode_rmdir(dir, dentry); +- if (!error) { +- error = dir->i_op->rmdir(dir, dentry); +- if (!error) +- dentry->d_inode->i_flags |= S_DEAD; +- } ++ error = dir->i_op->rmdir(dir, dentry); ++ if (!error) ++ dentry->d_inode->i_flags |= S_DEAD; + } + mutex_unlock(&dentry->d_inode->i_mutex); + if (!error) { +@@ -2151,7 +2159,7 @@ static long do_rmdir(int dfd, const char + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto exit2; +- error = vfs_rmdir(nd.path.dentry->d_inode, dentry, &nd); ++ error = vfs_rmdir(nd.path.dentry->d_inode, dentry, &nd, nd.path.mnt); + dput(dentry); + exit2: + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); +@@ -2167,8 +2175,8 @@ asmlinkage long sys_rmdir(const char __u + return do_rmdir(AT_FDCWD, pathname); + } + +-int vfs_unlink(struct inode *dir, struct dentry *dentry, +- struct nameidata *nd) ++int vfs_unlink(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd, struct vfsmount *mnt) + { + int error = may_delete(dir, dentry, 0, nd); + +@@ -2183,7 +2191,7 @@ int vfs_unlink(struct inode *dir, struct + if (d_mountpoint(dentry)) + error = -EBUSY; + else { +- error = security_inode_unlink(dir, dentry); ++ error = security_inode_unlink(dir, dentry, mnt); + if (!error) + error = dir->i_op->unlink(dir, dentry); + } +@@ -2232,7 +2240,7 @@ static long do_unlinkat(int dfd, const c + inode = dentry->d_inode; + if (inode) + atomic_inc(&inode->i_count); +- error = vfs_unlink(nd.path.dentry->d_inode, dentry, &nd); ++ error = vfs_unlink(nd.path.dentry->d_inode, dentry, &nd, nd.path.mnt); + exit2: + dput(dentry); + } +@@ -2267,7 +2275,7 @@ asmlinkage long sys_unlink(const char __ + return do_unlinkat(AT_FDCWD, pathname); + } + +-int vfs_symlink(struct inode *dir, struct dentry *dentry, ++int vfs_symlink(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt, + const char *oldname, int mode, struct nameidata *nd) + const char *oldname, int mode, struct nameidata *nd) + { +@@ -2277,7 +2286,7 @@ int vfs_symlink(struct inode *dir, struc + if (!dir->i_op || !dir->i_op->symlink) + return -EPERM; + +- error = security_inode_symlink(dir, dentry, oldname); ++ error = security_inode_symlink(dir, dentry, mnt, oldname); + if (error) + return error; + +@@ -2313,8 +2322,8 @@ asmlinkage long sys_symlinkat(const char + if (IS_ERR(dentry)) + goto out_unlock; + +- error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, +- S_IALLUGO, &nd); ++ error = vfs_symlink(nd.path.dentry->d_inode, dentry, nd.path.mnt, from, ++ S_IALLUGO, &nd); + dput(dentry); + out_unlock: + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); +@@ -2330,8 +2340,8 @@ asmlinkage long sys_symlink(const char _ + return sys_symlinkat(oldname, AT_FDCWD, newname); + } + +-int vfs_link(struct dentry *old_dentry, struct inode *dir, +- struct dentry *new_dentry, struct nameidata *nd) ++int vfs_link(struct dentry *old_dentry, struct vfsmount *old_mnt, struct inode *dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt, struct nameidata *nd) + { + struct inode *inode = old_dentry->d_inode; + int error; +@@ -2355,7 +2365,8 @@ int vfs_link(struct dentry *old_dentry, + if (S_ISDIR(old_dentry->d_inode->i_mode)) + return -EPERM; + +- error = security_inode_link(old_dentry, dir, new_dentry); ++ error = security_inode_link(old_dentry, old_mnt, dir, new_dentry, ++ new_mnt); + if (error) + return error; + +@@ -2408,8 +2419,8 @@ asmlinkage long sys_linkat(int olddfd, c + goto out_unlock_dput; + } + +- error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, +- new_dentry, &nd); ++ error = vfs_link(old_nd.path.dentry, old_nd.path.mnt, nd.path.dentry->d_inode, ++ new_dentry, nd.path.mnt, &nd); + out_unlock_dput: + dput(new_dentry); + out_unlock: +@@ -2460,7 +2472,8 @@ asmlinkage long sys_link(const char __us + * locking]. + */ + static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + int error = 0; + struct inode *target; +@@ -2475,7 +2488,8 @@ static int vfs_rename_dir(struct inode * + return error; + } + +- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); ++ error = security_inode_rename(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + if (error) + return error; + +@@ -2503,12 +2517,14 @@ static int vfs_rename_dir(struct inode * + } + + static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + struct inode *target; + int error; + +- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); ++ error = security_inode_rename(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + if (error) + return error; + +@@ -2531,7 +2547,8 @@ static int vfs_rename_other(struct inode + } + + int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + int error; + int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); +@@ -2560,9 +2577,11 @@ int vfs_rename(struct inode *old_dir, st + old_name = fsnotify_oldname_init(old_dentry->d_name.name); + + if (is_dir) +- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); ++ error = vfs_rename_dir(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + else +- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); ++ error = vfs_rename_other(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + if (!error) { + const char *new_name = old_dentry->d_name.name; + fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, +@@ -2634,8 +2653,8 @@ static int do_rename(int olddfd, const c + if (new_dentry == trap) + goto exit5; + +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, oldnd.path.mnt, ++ new_dir->d_inode, new_dentry, newnd.path.mnt); + exit5: + dput(new_dentry); + exit4: +diff -uprN e/fs/namespace.c f/fs/namespace.c +--- e/fs/namespace.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/namespace.c 2008-05-28 20:29:29.410207000 +0000 +@@ -1852,3 +1852,30 @@ void __put_mnt_ns(struct mnt_namespace * + release_mounts(&umount_list); + kfree(ns); + } ++ ++char *d_namespace_path(struct dentry *dentry, struct vfsmount *vfsmnt, ++ char *buf, int buflen) ++{ ++ struct path root, ns_root = { }; ++ char *res; ++ ++ read_lock(¤t->fs->lock); ++ root = current->fs->root; ++ path_get(¤t->fs->root); ++ read_unlock(¤t->fs->lock); ++ spin_lock(&vfsmount_lock); ++ if (root.mnt) ++ ns_root.mnt = mntget(root.mnt->mnt_ns->root); ++ if (ns_root.mnt) ++ ns_root.dentry = dget(ns_root.mnt->mnt_root); ++ spin_unlock(&vfsmount_lock); ++ res = __d_path(dentry, vfsmnt, &ns_root, buf, buflen, 1, 1); ++ path_put(&root); ++ path_put(&ns_root); ++ ++ /* Prevent empty path for lazily unmounted filesystems. */ ++ if (!IS_ERR(res) && *res == '\0') ++ *--res = '.'; ++ return res; ++} ++EXPORT_SYMBOL(d_namespace_path); +diff -uprN e/fs/nfsd/nfs4recover.c f/fs/nfsd/nfs4recover.c +--- e/fs/nfsd/nfs4recover.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/nfsd/nfs4recover.c 2008-05-28 20:29:28.910241000 +0000 +@@ -154,7 +154,8 @@ nfsd4_create_clid_dir(struct nfs4_client + dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); + goto out_put; + } +- status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU, NULL); ++ status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, ++ rec_dir.path.mnt, S_IRWXU, NULL); + out_put: + dput(dentry); + out_unlock: +@@ -258,7 +259,7 @@ nfsd4_remove_clid_file(struct dentry *di + return -EINVAL; + } + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); +- status = vfs_unlink(dir->d_inode, dentry, NULL); ++ status = vfs_unlink(dir->d_inode, dentry, NULL, rec_dir.path.mnt); + mutex_unlock(&dir->d_inode->i_mutex); + return status; + } +@@ -273,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, + * a kernel from the future.... */ + nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); +- status = vfs_rmdir(dir->d_inode, dentry, NULL); ++ status = vfs_rmdir(dir->d_inode, dentry, NULL, rec_dir.path.mnt); + mutex_unlock(&dir->d_inode->i_mutex); + return status; + } +diff -uprN e/fs/nfsd/nfs4xdr.c f/fs/nfsd/nfs4xdr.c +--- e/fs/nfsd/nfs4xdr.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/nfsd/nfs4xdr.c 2008-05-28 20:29:28.910241000 +0000 +@@ -1501,7 +1501,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + } + if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT + | FATTR4_WORD0_SUPPORTED_ATTRS)) { +- err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); ++ err = nfsd4_get_nfs4_acl(rqstp, dentry, exp->ex_path.mnt, &acl); + aclsupport = (err == 0); + if (bmval0 & FATTR4_WORD0_ACL) { + if (err == -EOPNOTSUPP) +diff -uprN e/fs/nfsd/vfs.c f/fs/nfsd/vfs.c +--- e/fs/nfsd/vfs.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/nfsd/vfs.c 2008-05-28 20:29:29.410207000 +0000 +@@ -388,7 +388,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str + err = nfserr_notsync; + if (!check_guard || guardtime == inode->i_ctime.tv_sec) { + fh_lock(fhp); +- host_err = notify_change(dentry, iap); ++ host_err = notify_change(dentry, fhp->fh_export->ex_path.mnt, iap); + err = nfserrno(host_err); + fh_unlock(fhp); + } +@@ -408,11 +408,12 @@ out_nfserr: + #if defined(CONFIG_NFSD_V2_ACL) || \ + defined(CONFIG_NFSD_V3_ACL) || \ + defined(CONFIG_NFSD_V4) +-static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) ++static ssize_t nfsd_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *key, void **buf) + { + ssize_t buflen; + +- buflen = vfs_getxattr(dentry, key, NULL, 0); ++ buflen = vfs_getxattr(dentry, mnt, key, NULL, 0, NULL); + if (buflen <= 0) + return buflen; + +@@ -420,13 +421,14 @@ static ssize_t nfsd_getxattr(struct dent + if (!*buf) + return -ENOMEM; + +- return vfs_getxattr(dentry, key, *buf, buflen); ++ return vfs_getxattr(dentry, mnt, key, *buf, buflen, NULL); + } + #endif + + #if defined(CONFIG_NFSD_V4) + static int +-set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) ++set_nfsv4_acl_one(struct dentry *dentry, struct vfsmount *mnt, ++ struct posix_acl *pacl, char *key) + { + int len; + size_t buflen; +@@ -445,7 +447,7 @@ set_nfsv4_acl_one(struct dentry *dentry, + goto out; + } + +- error = vfs_setxattr(dentry, key, buf, len, 0); ++ error = vfs_setxattr(dentry, mnt, key, buf, len, 0, NULL); + out: + kfree(buf); + return error; +@@ -458,6 +460,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst + __be32 error; + int host_error; + struct dentry *dentry; ++ struct vfsmount *mnt; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; +@@ -468,6 +471,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst + return error; + + dentry = fhp->fh_dentry; ++ mnt = fhp->fh_export->ex_path.mnt; + inode = dentry->d_inode; + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; +@@ -478,12 +482,14 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst + } else if (host_error < 0) + goto out_nfserr; + +- host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); ++ host_error = set_nfsv4_acl_one(dentry, mnt, pacl, ++ POSIX_ACL_XATTR_ACCESS); + if (host_error < 0) + goto out_release; + + if (S_ISDIR(inode->i_mode)) +- host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); ++ host_error = set_nfsv4_acl_one(dentry, mnt, dpacl, ++ POSIX_ACL_XATTR_DEFAULT); + + out_release: + posix_acl_release(pacl); +@@ -496,13 +502,13 @@ out_nfserr: + } + + static struct posix_acl * +-_get_posix_acl(struct dentry *dentry, char *key) ++_get_posix_acl(struct dentry *dentry, struct vfsmount *mnt, char *key) + { + void *buf = NULL; + struct posix_acl *pacl = NULL; + int buflen; + +- buflen = nfsd_getxattr(dentry, key, &buf); ++ buflen = nfsd_getxattr(dentry, mnt, key, &buf); + if (!buflen) + buflen = -ENODATA; + if (buflen <= 0) +@@ -514,14 +520,15 @@ _get_posix_acl(struct dentry *dentry, ch + } + + int +-nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) ++nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, ++ struct vfsmount *mnt, struct nfs4_acl **acl) + { + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + +- pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); ++ pacl = _get_posix_acl(dentry, mnt, POSIX_ACL_XATTR_ACCESS); + if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) { +@@ -531,7 +538,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqst + } + + if (S_ISDIR(inode->i_mode)) { +- dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); ++ dpacl = _get_posix_acl(dentry, mnt, POSIX_ACL_XATTR_DEFAULT); + if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) + dpacl = NULL; + else if (IS_ERR(dpacl)) { +@@ -944,13 +951,13 @@ out: + return err; + } + +-static void kill_suid(struct dentry *dentry) ++static void kill_suid(struct dentry *dentry, struct vfsmount *mnt) + { + struct iattr ia; + ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + + mutex_lock(&dentry->d_inode->i_mutex); +- notify_change(dentry, &ia); ++ notify_change(dentry, mnt, &ia); + mutex_unlock(&dentry->d_inode->i_mutex); + } + +@@ -1009,7 +1016,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s + + /* clear setuid/setgid flag after write */ + if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) +- kill_suid(dentry); ++ kill_suid(dentry, exp->ex_path.mnt); + + if (host_err >= 0 && stable) { + static ino_t last_ino; +@@ -1186,6 +1193,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + int type, dev_t rdev, struct svc_fh *resfhp) + { + struct dentry *dentry, *dchild = NULL; ++ struct svc_export *exp; + struct inode *dirp; + __be32 err; + __be32 err2; +@@ -1203,6 +1211,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + goto out; + + dentry = fhp->fh_dentry; ++ exp = fhp->fh_export; + dirp = dentry->d_inode; + + err = nfserr_notdir; +@@ -1219,7 +1228,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + host_err = PTR_ERR(dchild); + if (IS_ERR(dchild)) + goto out_nfserr; +- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); ++ err = fh_compose(resfhp, exp, dchild, fhp); + if (err) + goto out; + } else { +@@ -1258,13 +1267,14 @@ nfsd_create(struct svc_rqst *rqstp, stru + host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + break; + case S_IFDIR: +- host_err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL); ++ host_err = vfs_mkdir(dirp, dchild, exp->ex_path.mnt, iap->ia_mode, NULL); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: +- host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL); ++ host_err = vfs_mknod(dirp, dchild, exp->ex_path.mnt, ++ iap->ia_mode, rdev, NULL); + break; + default: + printk("nfsd: bad file type %o in nfsd_create\n", type); +@@ -1273,7 +1283,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + if (host_err < 0) + goto out_nfserr; + +- if (EX_ISSYNC(fhp->fh_export)) { ++ if (EX_ISSYNC(exp)) { + err = nfserrno(nfsd_sync_dir(dentry)); + write_inode_now(dchild->d_inode, 1); + } +@@ -1496,6 +1506,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str + struct iattr *iap) + { + struct dentry *dentry, *dnew; ++ struct svc_export *exp; + __be32 err, cerr; + int host_err; + umode_t mode; +@@ -1522,6 +1533,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str + if (iap && (iap->ia_valid & ATTR_MODE)) + mode = iap->ia_mode & S_IALLUGO; + ++ exp = fhp->fh_export; + if (unlikely(path[plen] != 0)) { + char *path_alloced = kmalloc(plen+1, GFP_KERNEL); + if (path_alloced == NULL) +@@ -1529,21 +1541,21 @@ nfsd_symlink(struct svc_rqst *rqstp, str + strncpy(path_alloced, path, plen); + path_alloced[plen] = 0; + host_err = vfs_symlink(dentry->d_inode, dnew, +- path_alloced, mode, NULL); ++ exp->ex_path.mnt, path_alloced, mode, NULL); + kfree(path_alloced); + } + } else +- host_err = vfs_symlink(dentry->d_inode, dnew, +- path, mode, NULL); ++ host_err = vfs_symlink(dentry->d_inode, dnew, exp->ex_path.mnt, ++ path, mode, NULL); + + if (!host_err) { +- if (EX_ISSYNC(fhp->fh_export)) ++ if (EX_ISSYNC(exp)) + host_err = nfsd_sync_dir(dentry); + } + err = nfserrno(host_err); + fh_unlock(fhp); + +- cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); ++ cerr = fh_compose(resfhp, exp, dnew, fhp); + dput(dnew); + if (err==0) err = cerr; + out: +@@ -1592,7 +1606,8 @@ nfsd_link(struct svc_rqst *rqstp, struct + dold = tfhp->fh_dentry; + dest = dold->d_inode; + +- host_err = vfs_link(dold, dirp, dnew, NULL); ++ host_err = vfs_link(dold, tfhp->fh_export->ex_path.mnt, dirp, ++ dnew, ffhp->fh_export->ex_path.mnt, NULL); + if (!host_err) { + if (EX_ISSYNC(ffhp->fh_export)) { + err = nfserrno(nfsd_sync_dir(ddir)); +@@ -1685,7 +1700,8 @@ nfsd_rename(struct svc_rqst *rqstp, stru + host_err = -EPERM; + } else + #endif +- host_err = vfs_rename(fdir, odentry, tdir, ndentry); ++ host_err = vfs_rename(fdir, odentry, ffhp->fh_export->ex_path.mnt, ++ tdir, ndentry, tfhp->fh_export->ex_path.mnt); + if (!host_err && EX_ISSYNC(tfhp->fh_export)) { + host_err = nfsd_sync_dir(tdentry); + if (!host_err) +@@ -1721,6 +1737,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + char *fname, int flen) + { + struct dentry *dentry, *rdentry; ++ struct svc_export *exp; + struct inode *dirp; + __be32 err; + int host_err; +@@ -1735,6 +1752,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + fh_lock_nested(fhp, I_MUTEX_PARENT); + dentry = fhp->fh_dentry; + dirp = dentry->d_inode; ++ exp = fhp->fh_export; + + rdentry = lookup_one_len(fname, dentry, flen); + host_err = PTR_ERR(rdentry); +@@ -1752,21 +1770,21 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + + if (type != S_IFDIR) { /* It's UNLINK */ + #ifdef MSNFS +- if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && ++ if ((exp->ex_flags & NFSEXP_MSNFS) && + (atomic_read(&rdentry->d_count) > 1)) { + host_err = -EPERM; + } else + #endif +- host_err = vfs_unlink(dirp, rdentry, NULL); ++ host_err = vfs_unlink(dirp, rdentry, NULL, exp->ex_path.mnt); + } else { /* It's RMDIR */ +- host_err = vfs_rmdir(dirp, rdentry, NULL); ++ host_err = vfs_rmdir(dirp, rdentry, NULL, exp->ex_path.mnt); + } + + dput(rdentry); + + if (host_err) + goto out_nfserr; +- if (EX_ISSYNC(fhp->fh_export)) ++ if (EX_ISSYNC(exp)) + host_err = nfsd_sync_dir(dentry); + + out_nfserr: +@@ -1995,7 +2013,8 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i + return ERR_PTR(-EOPNOTSUPP); + } + +- size = nfsd_getxattr(fhp->fh_dentry, name, &value); ++ size = nfsd_getxattr(fhp->fh_dentry, fhp->fh_export->ex_path.mnt, name, ++ &value); + if (size < 0) + return ERR_PTR(size); + +@@ -2007,6 +2026,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i + int + nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) + { ++ struct vfsmount *mnt; + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; +@@ -2039,13 +2059,16 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i + } else + size = 0; + ++ mnt = fhp->fh_export->ex_path.mnt; + if (size) +- error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); ++ error = vfs_setxattr(fhp->fh_dentry, mnt, name, value, size, 0, ++ NULL); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { +- error = vfs_removexattr(fhp->fh_dentry, name); ++ error = vfs_removexattr(fhp->fh_dentry, mnt, name, ++ NULL); + if (error == -ENODATA) + error = 0; + } +diff -uprN e/fs/ntfs/file.c f/fs/ntfs/file.c +--- e/fs/ntfs/file.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/ntfs/file.c 2008-05-28 20:29:28.910241000 +0000 +@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_noloc + goto out; + if (!count) + goto out; +- err = remove_suid(file->f_path.dentry); ++ err = remove_suid(&file->f_path); + if (err) + goto out; + file_update_time(file); +diff -uprN e/fs/open.c f/fs/open.c +--- e/fs/open.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/open.c 2008-05-28 20:29:29.410207000 +0000 +@@ -194,8 +194,8 @@ out: + return error; + } + +-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, +- struct file *filp) ++int do_truncate(struct dentry *dentry, struct vfsmount *mnt, loff_t length, ++ unsigned int time_attrs, struct file *filp) + { + int err; + struct iattr newattrs; +@@ -206,16 +206,15 @@ int do_truncate(struct dentry *dentry, l + + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | time_attrs; +- if (filp) { +- newattrs.ia_file = filp; ++ ++ if (filp) + newattrs.ia_valid |= ATTR_FILE; +- } + + /* Remove suid/sgid on truncate too */ + newattrs.ia_valid |= should_remove_suid(dentry); + + mutex_lock(&dentry->d_inode->i_mutex); +- err = notify_change(dentry, &newattrs); ++ err = fnotify_change(dentry, mnt, &newattrs, filp); + mutex_unlock(&dentry->d_inode->i_mutex); + return err; + } +@@ -271,7 +270,7 @@ static long do_sys_truncate(const char _ + error = locks_verify_truncate(inode, NULL, length); + if (!error) { + DQUOT_INIT(inode); +- error = do_truncate(nd.path.dentry, length, 0, NULL); ++ error = do_truncate(nd.path.dentry, nd.path.mnt, length, 0, NULL); + } + + put_write_and_out: +@@ -324,7 +323,8 @@ static long do_sys_ftruncate(unsigned in + + error = locks_verify_truncate(inode, file, length); + if (!error) +- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); ++ error = do_truncate(dentry, file->f_path.mnt, length, ++ ATTR_MTIME|ATTR_CTIME, file); + out_putf: + fput(file); + out: +@@ -500,8 +500,8 @@ out: + + asmlinkage long sys_fchdir(unsigned int fd) + { ++ struct nameidata nd = { .flags = 0 }; + struct file *file; +- struct inode *inode; + int error; + + error = -EBADF; +@@ -509,12 +509,11 @@ asmlinkage long sys_fchdir(unsigned int + if (!file) + goto out; + +- inode = file->f_path.dentry->d_inode; +- + error = -ENOTDIR; +- if (!S_ISDIR(inode->i_mode)) ++ if (!S_ISDIR(file->f_path.dentry->d_inode->i_mode)) + goto out_putf; + ++ nd.path = file->f_path; + error = file_permission(file, MAY_EXEC); + if (!error) + set_fs_pwd(current->fs, &file->f_path); +@@ -577,8 +576,8 @@ asmlinkage long sys_fchmod(unsigned int + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); +- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME | ATTR_FILE; ++ err = fnotify_change(dentry, file->f_path.mnt, &newattrs, file); + mutex_unlock(&inode->i_mutex); + + out_putf: +@@ -613,7 +612,7 @@ asmlinkage long sys_fchmodat(int dfd, co + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- error = notify_change(nd.path.dentry, &newattrs); ++ error = notify_change(nd.path.dentry, nd.path.mnt, &newattrs); + mutex_unlock(&inode->i_mutex); + + dput_and_out: +@@ -627,7 +626,7 @@ asmlinkage long sys_chmod(const char __u + } + + static int chown_common(struct dentry *dentry, struct vfsmount *mnt, +- uid_t user, gid_t group) ++ uid_t user, gid_t group, struct file *file) + { + struct inode * inode; + int error; +@@ -666,8 +666,11 @@ static int chown_common(struct dentry * + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= + ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; ++ if (file) ++ newattrs.ia_valid |= ATTR_FILE; ++ + mutex_lock(&inode->i_mutex); +- error = notify_change(dentry, &newattrs); ++ error = fnotify_change(dentry, mnt, &newattrs, file); + mutex_unlock(&inode->i_mutex); + out: + return error; +@@ -671,7 +686,7 @@ asmlinkage long sys_chown(const char __u + error = cow_check_and_break(&nd); + if (!error) + #endif +- error = chown_common(nd.path.dentry, nd.path.mnt, user, group); ++ error = chown_common(nd.path.dentry, nd.path.mnt, user, group, NULL); + path_put(&nd.path); + out: + return error; +@@ -691,7 +710,7 @@ asmlinkage long sys_fchownat(int dfd, co + error = __user_walk_fd(dfd, filename, follow, &nd); + if (!error) + #endif +- error = chown_common(nd.path.dentry, nd.path.mnt, user, group); ++ error = chown_common(nd.path.dentry, nd.path.mnt, user, group, NULL); + path_put(&nd.path); + out: + return error; +@@ -705,7 +708,7 @@ asmlinkage long sys_lchown(const char __ + error = cow_check_and_break(&nd); + if (!error) + #endif +- error = chown_common(nd.path.dentry, nd.path.mnt, user, group); ++ error = chown_common(nd.path.dentry, nd.path.mnt, user, group, NULL); + path_put(&nd.path); + out: + return error; +@@ -724,7 +727,7 @@ asmlinkage long sys_fchown(unsigned int + + dentry = file->f_path.dentry; + audit_inode(NULL, dentry); +- error = chown_common(dentry, file->f_vfsmnt, user, group); ++ error = chown_common(dentry, file->f_path.mnt, user, group, file); + fput(file); + out: + return error; +diff -uprN e/fs/reiserfs/xattr.c f/fs/reiserfs/xattr.c +--- e/fs/reiserfs/xattr.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/reiserfs/xattr.c 2008-05-28 20:29:28.910241000 +0000 +@@ -460,7 +460,7 @@ reiserfs_xattr_set(struct inode *inode, + newattrs.ia_size = buffer_size; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR); +- err = notify_change(dentry, &newattrs); ++ err = notify_change(dentry, NULL, &newattrs); + if (err) + goto out_filp; + +@@ -747,7 +747,7 @@ int reiserfs_delete_xattrs(struct inode + if (dir->d_inode->i_nlink <= 2) { + root = get_xa_root(inode->i_sb, XATTR_REPLACE); + reiserfs_write_lock_xattrs(inode->i_sb); +- err = vfs_rmdir(root->d_inode, dir, NULL); ++ err = vfs_rmdir(root->d_inode, dir, NULL, NULL); + reiserfs_write_unlock_xattrs(inode->i_sb); + dput(root); + } else { +@@ -791,7 +791,7 @@ reiserfs_chown_xattrs_filler(void *buf, + } + + if (!S_ISDIR(xafile->d_inode->i_mode)) +- err = notify_change(xafile, attrs); ++ err = notify_change(xafile, NULL, attrs); + dput(xafile); + + return err; +@@ -835,7 +835,7 @@ int reiserfs_chown_xattrs(struct inode * + goto out_dir; + } + +- err = notify_change(dir, attrs); ++ err = notify_change(dir, NULL, attrs); + unlock_kernel(); + + out_dir: +diff -uprN e/fs/splice.c f/fs/splice.c +--- e/fs/splice.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/splice.c 2008-05-28 20:29:28.910241000 +0000 +@@ -762,7 +762,7 @@ generic_file_splice_write_nolock(struct + ssize_t ret; + int err; + +- err = remove_suid(out->f_path.dentry); ++ err = remove_suid(&out->f_path); + if (unlikely(err)) + return err; + +@@ -822,7 +822,7 @@ generic_file_splice_write(struct pipe_in + if (killpriv) + err = security_inode_killpriv(out->f_path.dentry); + if (!err && killsuid) +- err = __remove_suid(out->f_path.dentry, killsuid); ++ err = __remove_suid(&out->f_path, killsuid); + mutex_unlock(&inode->i_mutex); + if (err) + return err; +diff -uprN e/fs/stat.c f/fs/stat.c +--- e/fs/stat.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/stat.c 2008-05-28 20:29:28.910241000 +0000 +@@ -306,7 +306,7 @@ asmlinkage long sys_readlinkat(int dfd, + + error = -EINVAL; + if (inode->i_op && inode->i_op->readlink) { +- error = security_inode_readlink(nd.path.dentry); ++ error = security_inode_readlink(nd.path.dentry, nd.path.mnt); + if (!error) { + touch_atime(nd.path.mnt, nd.path.dentry); + error = inode->i_op->readlink(nd.path.dentry, +diff -uprN e/fs/sysfs/file.c f/fs/sysfs/file.c +--- e/fs/sysfs/file.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/sysfs/file.c 2008-05-28 20:29:28.910241000 +0000 +@@ -579,7 +579,7 @@ int sysfs_chmod_file(struct kobject *kob + + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- rc = notify_change(victim, &newattrs); ++ rc = notify_change(victim, NULL, &newattrs); + + if (rc == 0) { + mutex_lock(&sysfs_mutex); +diff -uprN e/fs/utimes.c f/fs/utimes.c +--- e/fs/utimes.c 2008-05-28 20:32:27.897940261 +0000 ++++ f/fs/utimes.c 2008-05-28 20:29:29.410207000 +0000 +@@ -60,7 +60,7 @@ long do_utimes(int dfd, char __user *fil + { + int error; + struct nameidata nd; +- struct dentry *dentry; ++ struct path path; + struct inode *inode; + struct iattr newattrs; + struct file *f = NULL; +@@ -83,19 +83,19 @@ long do_utimes(int dfd, char __user *fil + f = fget(dfd); + if (!f) + goto out; +- dentry = f->f_path.dentry; ++ path = f->f_path; + } else { + error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); + if (error) + goto out; + + error = cow_check_and_break(&nd); + if (error) + goto dput_and_out; +- dentry = nd.path.dentry; ++ path = nd.path; + } + +- inode = dentry->d_inode; ++ inode = path.dentry->d_inode; + + error = -EROFS; + if (IS_RDONLY(inode)) +@@ -145,7 +145,7 @@ long do_utimes(int dfd, char __user *fil + } + } + mutex_lock(&inode->i_mutex); +- error = notify_change(dentry, &newattrs); ++ error = fnotify_change(path.dentry, path.mnt, &newattrs, f); + mutex_unlock(&inode->i_mutex); + dput_and_out: + if (f) +diff -uprN e/fs/xattr.c f/fs/xattr.c +--- e/fs/xattr.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/xattr.c 2008-05-28 20:29:29.410207000 +0000 +@@ -68,8 +68,8 @@ xattr_permission(struct inode *inode, co + } + + int +-vfs_setxattr(struct dentry *dentry, char *name, void *value, +- size_t size, int flags) ++vfs_setxattr(struct dentry *dentry, struct vfsmount *mnt, char *name, ++ void *value, size_t size, int flags, struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -79,7 +79,7 @@ vfs_setxattr(struct dentry *dentry, char + return error; + + mutex_lock(&inode->i_mutex); +- error = security_inode_setxattr(dentry, name, value, size, flags); ++ error = security_inode_setxattr(dentry, mnt, name, value, size, flags, file); + if (error) + goto out; + error = -EOPNOTSUPP; +@@ -87,7 +87,7 @@ vfs_setxattr(struct dentry *dentry, char + error = inode->i_op->setxattr(dentry, name, value, size, flags); + if (!error) { + fsnotify_xattr(dentry); +- security_inode_post_setxattr(dentry, name, value, ++ security_inode_post_setxattr(dentry, mnt, name, value, + size, flags); + } + } else if (!strncmp(name, XATTR_SECURITY_PREFIX, +@@ -132,7 +132,8 @@ out_noalloc: + EXPORT_SYMBOL_GPL(xattr_getsecurity); + + ssize_t +-vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) ++vfs_getxattr(struct dentry *dentry, struct vfsmount *mnt, char *name, ++ void *value, size_t size, struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -141,7 +142,7 @@ vfs_getxattr(struct dentry *dentry, char + if (error) + return error; + +- error = security_inode_getxattr(dentry, name); ++ error = security_inode_getxattr(dentry, mnt, name, file); + if (error) + return error; + +@@ -168,18 +169,20 @@ nolsm: + EXPORT_SYMBOL_GPL(vfs_getxattr); + + ssize_t +-vfs_listxattr(struct dentry *d, char *list, size_t size) ++vfs_listxattr(struct dentry *dentry, struct vfsmount *mnt, char *list, ++ size_t size, struct file *file) + { ++ struct inode *inode = dentry->d_inode; + ssize_t error; + +- error = security_inode_listxattr(d); ++ error = security_inode_listxattr(dentry, mnt, file); + if (error) + return error; + error = -EOPNOTSUPP; +- if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { +- error = d->d_inode->i_op->listxattr(d, list, size); +- } else { +- error = security_inode_listsecurity(d->d_inode, list, size); ++ if (inode->i_op && inode->i_op->listxattr) ++ error = inode->i_op->listxattr(dentry, list, size); ++ else { ++ error = security_inode_listsecurity(inode, list, size); + if (size && error > size) + error = -ERANGE; + } +@@ -188,7 +191,8 @@ vfs_listxattr(struct dentry *d, char *li + EXPORT_SYMBOL_GPL(vfs_listxattr); + + int +-vfs_removexattr(struct dentry *dentry, char *name) ++vfs_removexattr(struct dentry *dentry, struct vfsmount *mnt, char *name, ++ struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -200,7 +204,7 @@ vfs_removexattr(struct dentry *dentry, c + if (error) + return error; + +- error = security_inode_removexattr(dentry, name); ++ error = security_inode_removexattr(dentry, mnt, name, file); + if (error) + return error; + +@@ -219,7 +223,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); + */ + static long + setxattr(struct dentry *d, char __user *name, void __user *value, +- size_t size, int flags, struct vfsmount *mnt) ++ size_t size, int flags, struct vfsmount *mnt, struct file *file) + { + int error; + void *kvalue = NULL; +@@ -247,7 +251,7 @@ setxattr(struct dentry *d, char __user * + } + } + +- error = vfs_setxattr(d, kname, kvalue, size, flags); ++ error = vfs_setxattr(d, mnt, kname, kvalue, size, flags, file); + kfree(kvalue); + return error; + } +@@ -262,7 +266,7 @@ sys_setxattr(char __user *path, char __u + error = user_path_walk(path, &nd); + if (error) + return error; +- error = setxattr(nd.path.dentry, name, value, size, flags, nd.path.mnt); ++ error = setxattr(nd.path.dentry, name, value, size, flags, nd.path.mnt, NULL); + path_put(&nd.path); + return error; + } +@@ -277,7 +281,7 @@ sys_lsetxattr(char __user *path, char __ + error = user_path_walk_link(path, &nd); + if (error) + return error; +- error = setxattr(nd.path.dentry, name, value, size, flags, nd.path.mnt); ++ error = setxattr(nd.path.dentry, name, value, size, flags, nd.path.mnt, NULL); + path_put(&nd.path); + return error; + } +@@ -295,7 +299,7 @@ sys_fsetxattr(int fd, char __user *name, + return error; + dentry = f->f_path.dentry; + audit_inode(NULL, dentry); +- error = setxattr(dentry, name, value, size, flags, f->f_vfsmnt); ++ error = setxattr(dentry, name, value, size, flags, f->f_vfsmnt, f); + fput(f); + return error; + } +@@ -304,7 +308,8 @@ sys_fsetxattr(int fd, char __user *name, + * Extended attribute GET operations + */ + static ssize_t +-getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) ++getxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *name, ++ void __user *value, size_t size, struct file *file) + { + ssize_t error; + void *kvalue = NULL; +@@ -324,7 +329,7 @@ getxattr(struct dentry *d, char __user * + return -ENOMEM; + } + +- error = vfs_getxattr(d, kname, kvalue, size); ++ error = vfs_getxattr(dentry, mnt, kname, kvalue, size, file); + if (error > 0) { + if (size && copy_to_user(value, kvalue, error)) + error = -EFAULT; +@@ -347,7 +352,7 @@ sys_getxattr(char __user *path, char __u + error = user_path_walk(path, &nd); + if (error) + return error; +- error = getxattr(nd.path.dentry, name, value, size); ++ error = getxattr(nd.path.dentry, nd.path.mnt, name, value, size, NULL); + path_put(&nd.path); + return error; + } +@@ -362,7 +367,7 @@ sys_lgetxattr(char __user *path, char __ + error = user_path_walk_link(path, &nd); + if (error) + return error; +- error = getxattr(nd.path.dentry, name, value, size); ++ error = getxattr(nd.path.dentry, nd.path.mnt, name, value, size, NULL); + path_put(&nd.path); + return error; + } +@@ -377,7 +382,7 @@ sys_fgetxattr(int fd, char __user *name, + if (!f) + return error; + audit_inode(NULL, f->f_path.dentry); +- error = getxattr(f->f_path.dentry, name, value, size); ++ error = getxattr(f->f_path.dentry, f->f_path.mnt, name, value, size, f); + fput(f); + return error; + } +@@ -386,7 +391,8 @@ sys_fgetxattr(int fd, char __user *name, + * Extended attribute LIST operations + */ + static ssize_t +-listxattr(struct dentry *d, char __user *list, size_t size) ++listxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *list, ++ size_t size, struct file *file) + { + ssize_t error; + char *klist = NULL; +@@ -399,7 +405,7 @@ listxattr(struct dentry *d, char __user + return -ENOMEM; + } + +- error = vfs_listxattr(d, klist, size); ++ error = vfs_listxattr(dentry, mnt, klist, size, file); + if (error > 0) { + if (size && copy_to_user(list, klist, error)) + error = -EFAULT; +@@ -421,7 +427,7 @@ sys_listxattr(char __user *path, char __ + error = user_path_walk(path, &nd); + if (error) + return error; +- error = listxattr(nd.path.dentry, list, size); ++ error = listxattr(nd.path.dentry, nd.path.mnt, list, size, NULL); + path_put(&nd.path); + return error; + } +@@ -435,7 +441,7 @@ sys_llistxattr(char __user *path, char _ + error = user_path_walk_link(path, &nd); + if (error) + return error; +- error = listxattr(nd.path.dentry, list, size); ++ error = listxattr(nd.path.dentry, nd.path.mnt, list, size, NULL); + path_put(&nd.path); + return error; + } +@@ -450,7 +456,7 @@ sys_flistxattr(int fd, char __user *list + if (!f) + return error; + audit_inode(NULL, f->f_path.dentry); +- error = listxattr(f->f_path.dentry, list, size); ++ error = listxattr(f->f_path.dentry, f->f_path.mnt, list, size, f); + fput(f); + return error; + } +@@ -459,7 +465,8 @@ sys_flistxattr(int fd, char __user *list + * Extended attribute REMOVE operations + */ + static long +-removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt) ++removexattr(struct dentry *dentry, char __user *name, struct vfsmount *mnt, ++ struct file *file) + { + int error; + char kname[XATTR_NAME_MAX + 1]; +@@ -470,7 +477,7 @@ removexattr(struct dentry *d, char __use + if (error < 0) + return error; + +- return vfs_removexattr(d, kname); ++ return vfs_removexattr(dentry, mnt, kname, file); + } + + asmlinkage long +@@ -482,7 +489,7 @@ sys_removexattr(char __user *path, char + error = user_path_walk(path, &nd); + if (error) + return error; +- error = removexattr(nd.path.dentry, name, nd.path.mnt); ++ error = removexattr(nd.path.dentry, name, nd.path.mnt, NULL); + path_put(&nd.path); + return error; + } +@@ -496,7 +503,7 @@ sys_lremovexattr(char __user *path, char + error = user_path_walk_link(path, &nd); + if (error) + return error; +- error = removexattr(nd.path.dentry, name, nd.path.mnt); ++ error = removexattr(nd.path.dentry, name, nd.path.mnt, NULL); + path_put(&nd.path); + return error; + } +@@ -513,7 +520,7 @@ sys_fremovexattr(int fd, char __user *na + return error; + dentry = f->f_path.dentry; + audit_inode(NULL, dentry); +- error = removexattr(dentry, name, f->f_vfsmnt); ++ error = removexattr(dentry, name, f->f_path.mnt, f); + fput(f); + return error; + } +diff -uprN e/fs/xfs/linux-2.6/xfs_lrw.c f/fs/xfs/linux-2.6/xfs_lrw.c +--- e/fs/xfs/linux-2.6/xfs_lrw.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/fs/xfs/linux-2.6/xfs_lrw.c 2008-05-28 20:29:28.910241000 +0000 +@@ -716,7 +716,7 @@ start: + !capable(CAP_FSETID)) { + error = xfs_write_clear_setuid(xip); + if (likely(!error)) +- error = -remove_suid(file->f_path.dentry); ++ error = -remove_suid(&file->f_path); + if (unlikely(error)) { + goto out_unlock_internal; + } +diff -uprN e/include/linux/audit.h f/include/linux/audit.h +--- e/include/linux/audit.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/audit.h 2008-05-28 20:29:29.410207000 +0000 +@@ -33,7 +33,7 @@ + * 1200 - 1299 messages internal to the audit daemon + * 1300 - 1399 audit event messages + * 1400 - 1499 SE Linux use +- * 1500 - 1599 kernel LSPP events ++ * 1500 - 1599 AppArmor use + * 1600 - 1699 kernel crypto events + * 1700 - 1799 kernel anomaly records + * 1800 - 1999 future kernel use (maybe integrity labels and related events) +@@ -119,6 +119,13 @@ + #define AUDIT_MAC_UNLBL_STCADD 1416 /* NetLabel: add a static label */ + #define AUDIT_MAC_UNLBL_STCDEL 1417 /* NetLabel: del a static label */ + ++#define AUDIT_APPARMOR_AUDIT 1501 /* AppArmor audited grants */ ++#define AUDIT_APPARMOR_ALLOWED 1502 /* Allowed Access for learning */ ++#define AUDIT_APPARMOR_DENIED 1503 ++#define AUDIT_APPARMOR_HINT 1504 /* Process Tracking information */ ++#define AUDIT_APPARMOR_STATUS 1505 /* Changes in config */ ++#define AUDIT_APPARMOR_ERROR 1506 /* Internal AppArmor Errors */ ++ + #define AUDIT_FIRST_KERN_ANOM_MSG 1700 + #define AUDIT_LAST_KERN_ANOM_MSG 1799 + #define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */ +@@ -518,6 +525,9 @@ extern void audit_log(struct audit_ + __attribute__((format(printf,4,5))); + + extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); ++extern void audit_log_vformat(struct audit_buffer *ab, ++ const char *fmt, va_list args) ++ __attribute__((format(printf,2,0))); + extern void audit_log_format(struct audit_buffer *ab, + const char *fmt, ...) + __attribute__((format(printf,2,3))); +diff -uprN e/include/linux/dcache.h f/include/linux/dcache.h +--- e/include/linux/dcache.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/dcache.h 2008-05-28 20:29:29.410207000 +0000 +@@ -300,7 +300,8 @@ extern int d_validate(struct dentry *, s + * helper function for dentry_operations.d_dname() members + */ + extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +- ++extern char *__d_path(struct dentry *, struct vfsmount *, struct path *, ++ char *, int, int, int); + extern char *d_path(struct path *, char *, int); + + /* Allocation counts.. */ +diff -uprN e/include/linux/fs.h f/include/linux/fs.h +--- e/include/linux/fs.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/fs.h 2008-05-28 20:29:29.410207000 +0000 +@@ -353,13 +353,6 @@ struct iattr { + struct timespec ia_atime; + struct timespec ia_mtime; + struct timespec ia_ctime; +- +- /* +- * Not an attribute, but an auxilary info for filesystems wanting to +- * implement an ftruncate() like method. NOTE: filesystem should +- * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). +- */ +- struct file *ia_file; + }; + + /* +@@ -1076,13 +1069,13 @@ extern void unlock_super(struct super_bl + */ + extern int vfs_permission(struct nameidata *, int); + extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); +-extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *); +-extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *); +-extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *); +-extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *); +-extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *); +-extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *); +-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); ++extern int vfs_mkdir(struct inode *, struct dentry *, struct vfsmount *, int, struct nameidata *); ++extern int vfs_mknod(struct inode *, struct dentry *, struct vfsmount *, int, dev_t, struct nameidata *); ++extern int vfs_symlink(struct inode *, struct dentry *, struct vfsmount *, const char *, int, struct nameidata *); ++extern int vfs_link(struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *, struct nameidata *); ++extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *, struct vfsmount *); ++extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *, struct vfsmount *); ++extern int vfs_rename(struct inode *, struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *); + + /* + * VFS dentry helper functions. +@@ -1196,6 +1189,7 @@ struct file_operations { + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); + ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long, struct file_lock **); ++ int (*fsetattr)(struct file *, struct iattr *); + }; + + struct inode_operations { +@@ -1559,8 +1553,8 @@ static inline int break_lease(struct ino + + /* fs/open.c */ + +-extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, +- struct file *filp); ++extern int do_truncate(struct dentry *, struct vfsmount *, loff_t start, ++ unsigned int time_attrs, struct file *filp); + extern long do_sys_open(int dfd, const char __user *filename, int flags, + int mode); + extern struct file *filp_open(const char *, int, int); +@@ -1714,7 +1708,8 @@ extern int do_remount_sb(struct super_bl + #ifdef CONFIG_BLOCK + extern sector_t bmap(struct inode *, sector_t); + #endif +-extern int notify_change(struct dentry *, struct iattr *); ++extern int notify_change(struct dentry *, struct vfsmount *, struct iattr *); ++extern int fnotify_change(struct dentry *, struct vfsmount *, struct iattr *, struct file *); + extern int permission(struct inode *, int, struct nameidata *); + extern int generic_permission(struct inode *, int, + int (*check_acl)(struct inode *, int)); +@@ -1776,9 +1771,9 @@ extern void iget_failed(struct inode *); + extern void clear_inode(struct inode *); + extern void destroy_inode(struct inode *); + extern struct inode *new_inode(struct super_block *); +-extern int __remove_suid(struct dentry *, int); ++extern int __remove_suid(struct path *, int); + extern int should_remove_suid(struct dentry *); +-extern int remove_suid(struct dentry *); ++extern int remove_suid(struct path *); + + extern void __insert_inode_hash(struct inode *, unsigned long hashval); + extern void remove_inode_hash(struct inode *); +diff -uprN e/include/linux/mount.h f/include/linux/mount.h +--- e/include/linux/mount.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/mount.h 2008-05-28 20:29:29.410207000 +0000 +@@ -103,5 +103,7 @@ extern void mark_mounts_for_expiry(struc + extern spinlock_t vfsmount_lock; + extern dev_t name_to_dev_t(char *name); + ++extern char *d_namespace_path(struct dentry *, struct vfsmount *, char *, int); ++ + #endif + #endif /* _LINUX_MOUNT_H */ +diff -uprN e/include/linux/nfsd/nfsd.h f/include/linux/nfsd/nfsd.h +--- e/include/linux/nfsd/nfsd.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/nfsd/nfsd.h 2008-05-28 20:29:28.910241000 +0000 +@@ -78,7 +78,8 @@ __be32 nfsd_setattr(struct svc_rqst *, + #ifdef CONFIG_NFSD_V4 + __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, + struct nfs4_acl *); +-int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); ++int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, ++ struct vfsmount *mnt, struct nfs4_acl **); + #endif /* CONFIG_NFSD_V4 */ + __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, +diff -uprN e/include/linux/security.h f/include/linux/security.h +--- e/include/linux/security.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/security.h 2008-05-28 20:29:29.410207000 +0000 +@@ -51,8 +51,8 @@ extern void cap_capset_set (struct task_ + extern int cap_bprm_set_security (struct linux_binprm *bprm); + extern void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe); + extern int cap_bprm_secureexec(struct linux_binprm *bprm); +-extern int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags); +-extern int cap_inode_removexattr(struct dentry *dentry, char *name); ++extern int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, char *name, void *value, size_t size, int flags, struct file *file); ++extern int cap_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, char *name, struct file *file); + extern int cap_inode_need_killpriv(struct dentry *dentry); + extern int cap_inode_killpriv(struct dentry *dentry); + extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); +@@ -330,23 +330,28 @@ static inline void security_free_mnt_opt + * Check permission to create a regular file. + * @dir contains inode structure of the parent of the new file. + * @dentry contains the dentry structure for the file to be created. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @mode contains the file mode of the file to be created. + * Return 0 if permission is granted. + * @inode_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link to the file. ++ * @old_mnt is the vfsmount corresponding to @old_dentry (may be NULL). + * @dir contains the inode structure of the parent directory of the new link. + * @new_dentry contains the dentry structure for the new link. ++ * @new_mnt is the vfsmount corresponding to @new_dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the inode structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the inode structure of parent directory of the symbolic link. + * @dentry contains the dentry structure of the symbolic link. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @inode_mkdir: +@@ -354,12 +359,14 @@ static inline void security_free_mnt_opt + * associated with inode strcture @dir. + * @dir containst the inode structure of parent of the directory to be created. + * @dentry contains the dentry structure of new directory. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @inode_rmdir: + * Check the permission to remove a directory. + * @dir contains the inode structure of parent of the directory to be removed. + * @dentry contains the dentry structure of directory to be removed. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_mknod: + * Check permissions when creating a special file (or a socket or a fifo +@@ -368,6 +375,7 @@ static inline void security_free_mnt_opt + * and not this hook. + * @dir contains the inode structure of parent of the new file. + * @dentry contains the dentry structure of the new file. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @mode contains the mode of the new file. + * @dev contains the device number. + * Return 0 if permission is granted. +@@ -375,12 +383,15 @@ static inline void security_free_mnt_opt + * Check for permission to rename a file or directory. + * @old_dir contains the inode structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. ++ * @old_mnt is the vfsmount corresponding to @old_dentry (may be NULL). + * @new_dir contains the inode structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. ++ * @new_mnt is the vfsmount corresponding to @new_dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_readlink: + * Check the permission to read the symbolic link. + * @dentry contains the dentry structure for the file link. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_follow_link: + * Check permission to follow a symbolic link when looking up a pathname. +@@ -404,6 +415,7 @@ static inline void security_free_mnt_opt + * file attributes change (such as when a file is truncated, chown/chmod + * operations, transferring disk quotas, etc). + * @dentry contains the dentry structure for the file. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @attr is the iattr structure containing the new file attributes. + * Return 0 if permission is granted. + * @inode_getattr: +@@ -419,18 +431,18 @@ static inline void security_free_mnt_opt + * inode. + * @inode_setxattr: + * Check permission before setting the extended attributes +- * @value identified by @name for @dentry. ++ * @value identified by @name for @dentry and @mnt. + * Return 0 if permission is granted. + * @inode_post_setxattr: + * Update inode security field after successful setxattr operation. +- * @value identified by @name for @dentry. ++ * @value identified by @name for @dentry and @mnt. + * @inode_getxattr: + * Check permission before obtaining the extended attributes +- * identified by @name for @dentry. ++ * identified by @name for @dentry and @mnt. + * Return 0 if permission is granted. + * @inode_listxattr: + * Check permission before obtaining the list of extended attribute +- * names for @dentry. ++ * names for @dentry and @mnt. + * Return 0 if permission is granted. + * @inode_removexattr: + * Check permission before removing the extended attribute +@@ -1286,32 +1298,45 @@ struct security_operations { + void (*inode_free_security) (struct inode *inode); + int (*inode_init_security) (struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len); +- int (*inode_create) (struct inode *dir, +- struct dentry *dentry, int mode); +- int (*inode_link) (struct dentry *old_dentry, +- struct inode *dir, struct dentry *new_dentry); +- int (*inode_unlink) (struct inode *dir, struct dentry *dentry); +- int (*inode_symlink) (struct inode *dir, +- struct dentry *dentry, const char *old_name); +- int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, int mode); +- int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); ++ int (*inode_create) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); ++ int (*inode_link) (struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt); ++ int (*inode_unlink) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); ++ int (*inode_symlink) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, const char *old_name); ++ int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); ++ int (*inode_rmdir) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); + int (*inode_mknod) (struct inode *dir, struct dentry *dentry, +- int mode, dev_t dev); ++ struct vfsmount *mnt, int mode, dev_t dev); + int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry); +- int (*inode_readlink) (struct dentry *dentry); ++ struct vfsmount *old_mnt, ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt); ++ int (*inode_readlink) (struct dentry *dentry, struct vfsmount *mnt); + int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); + int (*inode_permission) (struct inode *inode, int mask, struct nameidata *nd); +- int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); ++ int (*inode_setattr) (struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr); + int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); + void (*inode_delete) (struct inode *inode); +- int (*inode_setxattr) (struct dentry *dentry, char *name, void *value, +- size_t size, int flags); +- void (*inode_post_setxattr) (struct dentry *dentry, char *name, void *value, ++ int (*inode_setxattr) (struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, int flags, ++ struct file *file); ++ void (*inode_post_setxattr) (struct dentry *dentry, ++ struct vfsmount *mnt, ++ char *name, void *value, + size_t size, int flags); +- int (*inode_getxattr) (struct dentry *dentry, char *name); +- int (*inode_listxattr) (struct dentry *dentry); +- int (*inode_removexattr) (struct dentry *dentry, char *name); ++ int (*inode_getxattr) (struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file); ++ int (*inode_listxattr) (struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file); ++ int (*inode_removexattr) (struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file); + int (*inode_need_killpriv) (struct dentry *dentry); + int (*inode_killpriv) (struct dentry *dentry); + int (*inode_getsecurity)(const struct inode *inode, const char *name, void **buffer, bool alloc); +@@ -1549,30 +1574,43 @@ int security_inode_alloc(struct inode *i + void security_inode_free(struct inode *inode); + int security_inode_init_security(struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len); +-int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); +-int security_inode_link(struct dentry *old_dentry, struct inode *dir, +- struct dentry *new_dentry); +-int security_inode_unlink(struct inode *dir, struct dentry *dentry); ++int security_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); ++int security_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt); ++int security_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); + int security_inode_symlink(struct inode *dir, struct dentry *dentry, +- const char *old_name); +-int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode); +-int security_inode_rmdir(struct inode *dir, struct dentry *dentry); +-int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); ++ struct vfsmount *mnt, const char *old_name); ++int security_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); ++int security_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); ++int security_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry); +-int security_inode_readlink(struct dentry *dentry); ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt); ++int security_inode_readlink(struct dentry *dentry, struct vfsmount *mnt); + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); + int security_inode_permission(struct inode *inode, int mask, struct nameidata *nd); +-int security_inode_setattr(struct dentry *dentry, struct iattr *attr); ++int security_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr); + int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); + void security_inode_delete(struct inode *inode); +-int security_inode_setxattr(struct dentry *dentry, char *name, +- void *value, size_t size, int flags); +-void security_inode_post_setxattr(struct dentry *dentry, char *name, +- void *value, size_t size, int flags); +-int security_inode_getxattr(struct dentry *dentry, char *name); +-int security_inode_listxattr(struct dentry *dentry); +-int security_inode_removexattr(struct dentry *dentry, char *name); ++int security_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, int flags, ++ struct file *file); ++void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, ++ int flags); ++int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file); ++int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file); ++int security_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file); + int security_inode_need_killpriv(struct dentry *dentry); + int security_inode_killpriv(struct dentry *dentry); + int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); +@@ -1887,26 +1925,31 @@ static inline int security_inode_init_se + + static inline int security_inode_create (struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + int mode) + { + return 0; + } + + static inline int security_inode_link (struct dentry *old_dentry, ++ struct vfsmount *old_mnt, + struct inode *dir, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return 0; + } + + static inline int security_inode_unlink (struct inode *dir, +- struct dentry *dentry) ++ struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } + + static inline int security_inode_symlink (struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + const char *old_name) + { + return 0; +@@ -1914,19 +1957,22 @@ static inline int security_inode_symlink + + static inline int security_inode_mkdir (struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + int mode) + { + return 0; + } + + static inline int security_inode_rmdir (struct inode *dir, +- struct dentry *dentry) ++ struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } + + static inline int security_inode_mknod (struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + int mode, dev_t dev) + { + return 0; +@@ -1934,13 +1980,16 @@ static inline int security_inode_mknod ( + + static inline int security_inode_rename (struct inode *old_dir, + struct dentry *old_dentry, ++ struct vfsmount *old_mnt, + struct inode *new_dir, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return 0; + } + +-static inline int security_inode_readlink (struct dentry *dentry) ++static inline int security_inode_readlink(struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +@@ -1958,6 +2007,7 @@ static inline int security_inode_permiss + } + + static inline int security_inode_setattr (struct dentry *dentry, ++ struct vfsmount *mnt, + struct iattr *attr) + { + return 0; +@@ -1972,29 +2022,40 @@ static inline int security_inode_getattr + static inline void security_inode_delete (struct inode *inode) + { } + +-static inline int security_inode_setxattr (struct dentry *dentry, char *name, +- void *value, size_t size, int flags) +-{ +- return cap_inode_setxattr(dentry, name, value, size, flags); ++static inline int security_inode_setxattr (struct dentry *dentry, ++ struct vfsmount *mnt, char *name, ++ void *value, size_t size, int flags, ++ struct file *file) ++{ ++ return cap_inode_setxattr(dentry, mnt, name, value, size, flags, file); + } + +-static inline void security_inode_post_setxattr (struct dentry *dentry, char *name, +- void *value, size_t size, int flags) ++static inline void security_inode_post_setxattr (struct dentry *dentry, ++ struct vfsmount *mnt, ++ char *name, ++ void *value, size_t size, ++ int flags) + { } + +-static inline int security_inode_getxattr (struct dentry *dentry, char *name) ++static inline int security_inode_getxattr (struct dentry *dentry, ++ struct vfsmount *mnt, char *name, ++ struct file *file) + { + return 0; + } + +-static inline int security_inode_listxattr (struct dentry *dentry) ++static inline int security_inode_listxattr (struct dentry *dentry, ++ struct vfsmount *mnt, ++ struct file *file) + { + return 0; + } + +-static inline int security_inode_removexattr (struct dentry *dentry, char *name) ++static inline int security_inode_removexattr (struct dentry *dentry, ++ struct vfsmount *mnt, char *name, ++ struct file *file) + { +- return cap_inode_removexattr(dentry, name); ++ return cap_inode_removexattr(dentry, mnt, name, file); + } + + static inline int security_inode_need_killpriv(struct dentry *dentry) +diff -uprN e/include/linux/sysctl.h f/include/linux/sysctl.h +--- e/include/linux/sysctl.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/sysctl.h 2008-05-28 20:29:29.410207000 +0000 +@@ -977,6 +977,8 @@ extern int proc_doulongvec_minmax(struct + extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, + struct file *, void __user *, size_t *, loff_t *); + ++extern char *sysctl_pathname(ctl_table *, char *, int); ++ + extern int do_sysctl (int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); +diff -uprN e/include/linux/xattr.h f/include/linux/xattr.h +--- e/include/linux/xattr.h 2008-04-17 02:49:44.000000000 +0000 ++++ f/include/linux/xattr.h 2008-05-28 20:29:29.410207000 +0000 +@@ -47,10 +47,13 @@ struct xattr_handler { + }; + + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); +-ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t); +-ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); +-int vfs_setxattr(struct dentry *, char *, void *, size_t, int); +-int vfs_removexattr(struct dentry *, char *); ++ssize_t vfs_getxattr(struct dentry *, struct vfsmount *, char *, void *, ++ size_t, struct file *); ++ssize_t vfs_listxattr(struct dentry *d, struct vfsmount *, char *list, ++ size_t size, struct file *); ++int vfs_setxattr(struct dentry *, struct vfsmount *, char *, void *, size_t, ++ int, struct file *); ++int vfs_removexattr(struct dentry *, struct vfsmount *, char *, struct file *); + + ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); + ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); +diff -uprN e/ipc/mqueue.c f/ipc/mqueue.c +--- e/ipc/mqueue.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/ipc/mqueue.c 2008-05-28 20:29:28.910241000 +0000 +@@ -743,7 +743,7 @@ asmlinkage long sys_mq_unlink(const char + if (inode) + atomic_inc(&inode->i_count); + +- err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL); ++ err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL, mqueue_mnt); + out_err: + dput(dentry); + +diff -uprN e/kernel/audit.c f/kernel/audit.c +--- e/kernel/audit.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/kernel/audit.c 2008-05-28 20:29:29.410207000 +0000 +@@ -1136,8 +1136,7 @@ static inline int audit_expand(struct au + * will be called a second time. Currently, we assume that a printk + * can't format message larger than 1024 bytes, so we don't either. + */ +-static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, +- va_list args) ++void audit_log_vformat(struct audit_buffer *ab, const char *fmt, va_list args) + { + int len, avail; + struct sk_buff *skb; +@@ -1407,3 +1406,6 @@ EXPORT_SYMBOL(audit_log_start); + EXPORT_SYMBOL(audit_log_end); + EXPORT_SYMBOL(audit_log_format); + EXPORT_SYMBOL(audit_log); ++EXPORT_SYMBOL_GPL(audit_log_vformat); ++EXPORT_SYMBOL_GPL(audit_log_untrustedstring); ++EXPORT_SYMBOL_GPL(audit_log_d_path); +diff -uprN e/kernel/cgroup.c f/kernel/cgroup.c +--- e/kernel/cgroup.c 2008-05-28 20:32:27.897940261 +0000 ++++ f/kernel/cgroup.c 2008-05-28 20:29:28.910241000 +0000 +@@ -2833,7 +2833,7 @@ int cgroup_clone(struct task_struct *tsk + } + + /* Create the cgroup directory, which also creates the cgroup */ +- ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755, NULL); ++ ret = vfs_mkdir(inode, dentry, NULL, S_IFDIR | 0755, NULL); + child = __d_cgrp(dentry); + dput(dentry); + if (ret) { +diff -uprN e/kernel/sysctl.c f/kernel/sysctl.c +--- e/kernel/sysctl.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/kernel/sysctl.c 2008-05-28 20:29:29.410207000 +0000 +@@ -1440,6 +1440,33 @@ void register_sysctl_root(struct ctl_tab + spin_unlock(&sysctl_lock); + } + ++char *sysctl_pathname(struct ctl_table *table, char *buffer, int buflen) ++{ ++ if (buflen < 1) ++ return NULL; ++ buffer += --buflen; ++ *buffer = '\0'; ++ ++ while (table) { ++ int namelen = strlen(table->procname); ++ ++ if (buflen < namelen + 1) ++ return NULL; ++ buflen -= namelen + 1; ++ buffer -= namelen; ++ memcpy(buffer, table->procname, namelen); ++ *--buffer = '/'; ++ table = table->parent; ++ } ++ if (buflen < 4) ++ return NULL; ++ buffer -= 4; ++ memcpy(buffer, "/sys", 4); ++ ++ return buffer; ++} ++EXPORT_SYMBOL_GPL(sysctl_pathname); ++ + #ifdef CONFIG_SYSCTL_SYSCALL + int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +diff -uprN e/mm/filemap.c f/mm/filemap.c +--- e/mm/filemap.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/mm/filemap.c 2008-05-28 20:29:28.910241000 +0000 +@@ -1653,26 +1653,26 @@ int should_remove_suid(struct dentry *de + } + EXPORT_SYMBOL(should_remove_suid); + +-int __remove_suid(struct dentry *dentry, int kill) ++int __remove_suid(struct path *path, int kill) + { + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; +- return notify_change(dentry, &newattrs); ++ return notify_change(path->dentry, path->mnt, &newattrs); + } + +-int remove_suid(struct dentry *dentry) ++int remove_suid(struct path *path) + { +- int killsuid = should_remove_suid(dentry); +- int killpriv = security_inode_need_killpriv(dentry); ++ int killsuid = should_remove_suid(path->dentry); ++ int killpriv = security_inode_need_killpriv(path->dentry); + int error = 0; + + if (killpriv < 0) + return killpriv; + if (killpriv) +- error = security_inode_killpriv(dentry); ++ error = security_inode_killpriv(path->dentry); + if (!error && killsuid) +- error = __remove_suid(dentry, killsuid); ++ error = __remove_suid(path, killsuid); + + return error; + } +@@ -2387,7 +2387,7 @@ __generic_file_aio_write_nolock(struct k + if (count == 0) + goto out; + +- err = remove_suid(file->f_path.dentry); ++ err = remove_suid(&file->f_path); + if (err) + goto out; + +diff -uprN e/mm/filemap_xip.c f/mm/filemap_xip.c +--- e/mm/filemap_xip.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/mm/filemap_xip.c 2008-05-28 20:29:28.910241000 +0000 +@@ -380,7 +380,7 @@ xip_file_write(struct file *filp, const + if (count == 0) + goto out_backing; + +- ret = remove_suid(filp->f_path.dentry); ++ ret = remove_suid(&filp->f_path); + if (ret) + goto out_backing; + +diff -uprN e/mm/tiny-shmem.c f/mm/tiny-shmem.c +--- e/mm/tiny-shmem.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/mm/tiny-shmem.c 2008-05-28 20:29:28.910241000 +0000 +@@ -80,7 +80,7 @@ struct file *shmem_file_setup(char *name + inode->i_nlink = 0; /* It is unlinked */ + + /* notify everyone as to the change of file size */ +- error = do_truncate(dentry, size, 0, file); ++ error = do_truncate(dentry, file->f_path.mnt, size, 0, file); + if (error < 0) + goto close_file; + +diff -uprN e/net/unix/af_unix.c f/net/unix/af_unix.c +--- e/net/unix/af_unix.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/net/unix/af_unix.c 2008-05-28 20:29:28.910241000 +0000 +@@ -819,7 +819,8 @@ static int unix_bind(struct socket *sock + */ + mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current->fs->umask); +- err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0, NULL); ++ err = vfs_mknod(nd.path.dentry->d_inode, dentry, nd.path.mnt, ++ mode, 0, NULL); + if (err) + goto out_mknod_dput; + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); +diff -uprN e/security/Kconfig f/security/Kconfig +--- e/security/Kconfig 2008-04-17 02:49:44.000000000 +0000 ++++ f/security/Kconfig 2008-05-28 20:29:29.410207000 +0000 +@@ -124,6 +124,7 @@ config SECURITY_DEFAULT_MMAP_MIN_ADDR + + source security/selinux/Kconfig + source security/smack/Kconfig ++source security/apparmor/Kconfig + + endmenu + +diff -uprN e/security/Makefile f/security/Makefile +--- e/security/Makefile 2008-04-17 02:49:44.000000000 +0000 ++++ f/security/Makefile 2008-05-28 20:29:29.410207000 +0000 +@@ -16,5 +16,6 @@ obj-$(CONFIG_SECURITY) += security.o d + # Must precede capability.o in order to stack properly. + obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o + obj-$(CONFIG_SECURITY_SMACK) += commoncap.o smack/built-in.o ++obj-$(CONFIG_SECURITY_APPARMOR) += commoncap.o apparmor/ + obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o + obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o +diff -uprN e/security/apparmor/Kconfig f/security/apparmor/Kconfig +--- e/security/apparmor/Kconfig 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/Kconfig 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,42 @@ ++config SECURITY_APPARMOR ++ bool "AppArmor support" ++ depends on SECURITY ++ select AUDIT ++ help ++ This enables the AppArmor security module. ++ Required userspace tools (if they are not included in your ++ distribution) and further information may be found at ++ ++ ++ If you are unsure how to answer this question, answer N. ++ ++config SECURITY_APPARMOR_BOOTPARAM_VALUE ++ int "AppArmor boot parameter default value" ++ depends on SECURITY_APPARMOR ++ range 0 1 ++ default 1 ++ help ++ This option sets the default value for the kernel parameter ++ 'apparmor', which allows AppArmor to be enabled or disabled ++ at boot. If this option is set to 0 (zero), the AppArmor ++ kernel parameter will default to 0, disabling AppArmor at ++ bootup. If this option is set to 1 (one), the AppArmor ++ kernel parameter will default to 1, enabling AppArmor at ++ bootup. ++ ++ If you are unsure how to answer this question, answer 1. ++ ++config SECURITY_APPARMOR_DISABLE ++ bool "AppArmor runtime disable" ++ depends on SECURITY_APPARMOR ++ default n ++ help ++ This option enables writing to a apparmorfs node 'disable', which ++ allows AppArmor to be disabled at runtime prior to the policy load. ++ AppArmor will then remain disabled until the next boot. ++ This option is similar to the apparmor.enabled=0 boot parameter, ++ but is to support runtime disabling of AppArmor, e.g. from ++ /sbin/init, for portability across platforms where boot ++ parameters are difficult to employ. ++ ++ If you are unsure how to answer this question, answer N. +diff -uprN e/security/apparmor/Makefile f/security/apparmor/Makefile +--- e/security/apparmor/Makefile 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/Makefile 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,18 @@ ++# Makefile for AppArmor Linux Security Module ++# ++obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o ++ ++apparmor-y := main.o list.o procattr.o lsm.o apparmorfs.o \ ++ module_interface.o match.o ++ ++quiet_cmd_make-caps = GEN $@ ++cmd_make-caps = sed -n -e "/CAP_FS_MASK/d" -e "s/^\#define[ \\t]\\+CAP_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z > $@ ++ ++quiet_cmd_make-af = GEN $@ ++cmd_make-af = sed -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "s/^\#define[ \\t]\\+AF_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\\(.*\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z > $@ ++ ++$(obj)/main.o : $(obj)/capability_names.h $(obj)/af_names.h ++$(obj)/capability_names.h : $(srctree)/include/linux/capability.h ++ $(call cmd,make-caps) ++$(obj)/af_names.h : $(srctree)/include/linux/socket.h ++ $(call cmd,make-af) +diff -uprN e/security/apparmor/apparmor.h f/security/apparmor/apparmor.h +--- e/security/apparmor/apparmor.h 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/apparmor.h 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,403 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor internal prototypes ++ */ ++ ++#ifndef __APPARMOR_H ++#define __APPARMOR_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * We use MAY_READ, MAY_WRITE, MAY_EXEC, MAY_APPEND and the following flags ++ * for profile permissions ++ */ ++#define AA_MAY_LINK 0x0010 ++#define AA_MAY_LOCK 0x0020 ++#define AA_EXEC_MMAP 0x0040 ++#define AA_MAY_MOUNT 0x0080 /* no direct audit mapping */ ++#define AA_EXEC_UNSAFE 0x0100 ++#define AA_EXEC_INHERIT 0x0200 ++#define AA_EXEC_MOD_0 0x0400 ++#define AA_EXEC_MOD_1 0x0800 ++#define AA_EXEC_MOD_2 0x1000 ++#define AA_EXEC_MOD_3 0x2000 ++ ++#define AA_BASE_PERMS (MAY_READ | MAY_WRITE | MAY_EXEC | \ ++ MAY_APPEND | AA_MAY_LINK | \ ++ AA_MAY_LOCK | AA_EXEC_MMAP | \ ++ AA_MAY_MOUNT | AA_EXEC_UNSAFE | \ ++ AA_EXEC_INHERIT | AA_EXEC_MOD_0 | \ ++ AA_EXEC_MOD_1 | AA_EXEC_MOD_2 | \ ++ AA_EXEC_MOD_3) ++ ++#define AA_EXEC_MODIFIERS (AA_EXEC_MOD_0 | AA_EXEC_MOD_1 | \ ++ AA_EXEC_MOD_2 | AA_EXEC_MOD_3) ++ ++#define AA_EXEC_TYPE (AA_EXEC_UNSAFE | AA_EXEC_INHERIT | \ ++ AA_EXEC_MODIFIERS) ++ ++#define AA_EXEC_UNCONFINED AA_EXEC_MOD_0 ++#define AA_EXEC_PROFILE AA_EXEC_MOD_1 ++#define AA_EXEC_CHILD (AA_EXEC_MOD_0 | AA_EXEC_MOD_1) ++/* remaining exec modes are index into profile name table */ ++#define AA_EXEC_INDEX(mode) ((mode & AA_EXEC_MODIFIERS) >> 10) ++ ++#define AA_USER_SHIFT 0 ++#define AA_OTHER_SHIFT 14 ++ ++#define AA_USER_PERMS (AA_BASE_PERMS << AA_USER_SHIFT) ++#define AA_OTHER_PERMS (AA_BASE_PERMS << AA_OTHER_SHIFT) ++ ++#define AA_FILE_PERMS (AA_USER_PERMS | AA_OTHER_PERMS) ++ ++#define AA_LINK_BITS ((AA_MAY_LINK << AA_USER_SHIFT) | \ ++ (AA_MAY_LINK << AA_OTHER_SHIFT)) ++ ++#define AA_USER_EXEC (MAY_EXEC << AA_USER_SHIFT) ++#define AA_OTHER_EXEC (MAY_EXEC << AA_OTHER_SHIFT) ++ ++#define AA_USER_EXEC_TYPE (AA_EXEC_TYPE << AA_USER_SHIFT) ++#define AA_OTHER_EXEC_TYPE (AA_EXEC_TYPE << AA_OTHER_SHIFT) ++ ++#define AA_EXEC_BITS (AA_USER_EXEC | AA_OTHER_EXEC) ++ ++#define ALL_AA_EXEC_UNSAFE ((AA_EXEC_UNSAFE << AA_USER_SHIFT) | \ ++ (AA_EXEC_UNSAFE << AA_OTHER_SHIFT)) ++ ++#define ALL_AA_EXEC_TYPE (AA_USER_EXEC_TYPE | AA_OTHER_EXEC_TYPE) ++ ++/* overloaded permissions for link pairs */ ++#define AA_LINK_SUBSET_TEST 0x0020 ++ ++#define AA_USER_PTRACE 0x10000000 ++#define AA_OTHER_PTRACE 0x20000000 ++#define AA_PTRACE_PERMS (AA_USER_PTRACE | AA_OTHER_PTRACE) ++ ++/* shared permissions that are not duplicated in user::other */ ++#define AA_CHANGE_HAT 0x40000000 ++#define AA_CHANGE_PROFILE 0x80000000 ++ ++#define AA_SHARED_PERMS (AA_CHANGE_HAT | AA_CHANGE_PROFILE) ++ ++#define AA_VALID_PERM_MASK (AA_FILE_PERMS | AA_PTRACE_PERMS | \ ++ AA_SHARED_PERMS) ++ ++/* audit bits for the second accept field */ ++#define AUDIT_FILE_MASK 0x1fc07f ++#define AUDIT_QUIET_MASK(mask) ((mask >> 7) & AUDIT_FILE_MASK) ++#define AA_VALID_PERM2_MASK 0x0fffffff ++ ++#define AA_SECURE_EXEC_NEEDED 1 ++ ++/* Control parameters (0 or 1), settable thru module/boot flags or ++ * via /sys/kernel/security/apparmor/control */ ++extern int apparmor_complain; ++extern int apparmor_debug; ++extern int apparmor_audit; ++extern int apparmor_logsyscall; ++extern unsigned int apparmor_path_max; ++ ++#define PROFILE_COMPLAIN(_profile) \ ++ (apparmor_complain == 1 || ((_profile) && (_profile)->flags.complain)) ++ ++#define APPARMOR_COMPLAIN(_cxt) \ ++ (apparmor_complain == 1 || \ ++ ((_cxt) && (_cxt)->profile && (_cxt)->profile->flags.complain)) ++ ++#define PROFILE_AUDIT(_profile) \ ++ (apparmor_audit == 1 || ((_profile) && (_profile)->flags.audit)) ++ ++#define APPARMOR_AUDIT(_cxt) \ ++ (apparmor_audit == 1 || \ ++ ((_cxt) && (_cxt)->profile && (_cxt)->profile->flags.audit)) ++ ++#define PROFILE_IS_HAT(_profile) \ ++ ((_profile) && (_profile)->flags.hat) ++ ++/* ++ * DEBUG remains global (no per profile flag) since it is mostly used in sysctl ++ * which is not related to profile accesses. ++ */ ++ ++#define AA_DEBUG(fmt, args...) \ ++ do { \ ++ if (apparmor_debug) \ ++ printk(KERN_DEBUG "AppArmor: " fmt, ##args); \ ++ } while (0) ++ ++#define AA_ERROR(fmt, args...) do { if (printk_ratelimit()) printk(KERN_ERR "AppArmor: " fmt, ##args); } while (0) ++ ++/* struct aa_rlimit - rlimits settings for the profile ++ * @mask: which hard limits to set ++ * @limits: rlimit values that override task limits ++ * ++ * AppArmor rlimits are used to set confined task rlimits. Only the ++ * limits specified in @mask will be controlled by apparmor. ++ */ ++struct aa_rlimit { ++ unsigned int mask; ++ struct rlimit limits[RLIM_NLIMITS]; ++}; ++ ++struct aa_profile; ++ ++/* struct aa_namespace - namespace for a set of profiles ++ * @name: the name of the namespace ++ * @list: list the namespace is on ++ * @profiles: list of profile in the namespace ++ * @profile_count: the number of profiles in the namespace ++ * @null_complain_profile: special profile used for learning in this namespace ++ * @count: reference count on the namespace ++ * @lock: lock for adding/removing profile to the namespace ++ */ ++struct aa_namespace { ++ char *name; ++ struct list_head list; ++ struct list_head profiles; ++ int profile_count; ++ struct aa_profile *null_complain_profile; ++ ++ struct kref count; ++ rwlock_t lock; ++}; ++ ++/* struct aa_profile - basic confinement data ++ * @name: the profiles name ++ * @list: list this profile is on ++ * @ns: namespace the profile is in ++ * @file_rules: dfa containing the profiles file rules ++ * @flags: flags controlling profile behavior ++ * @isstale: flag indicating if profile is stale ++ * @set_caps: capabilities that are being set ++ * @capabilities: capabilities mask ++ * @audit_caps: caps that are to be audited ++ * @quiet_caps: caps that should not be audited ++ * @capabilities: capabilities granted by the process ++ * @rlimits: rlimits for the profile ++ * @task_count: how many tasks the profile is attached to ++ * @count: reference count of the profile ++ * @task_contexts: list of tasks confined by profile ++ * @lock: lock for the task_contexts list ++ * @network_families: basic network permissions ++ * @audit_network: which network permissions to force audit ++ * @quiet_network: which network permissions to quiet rejects ++ * ++ * The AppArmor profile contains the basic confinement data. Each profile ++ * has a name, and all nonstale profile are in a profile namespace. ++ * ++ * The task_contexts list and the isstale flag are protected by the ++ * profile lock. ++ * ++ * If a task context is moved between two profiles, we first need to grab ++ * both profile locks. lock_both_profiles() does that in a deadlock-safe ++ * way. ++ */ ++struct aa_profile { ++ char *name; ++ struct list_head list; ++ struct aa_namespace *ns; ++ ++ int exec_table_size; ++ char **exec_table; ++ struct aa_dfa *file_rules; ++ struct { ++ int hat; ++ int complain; ++ int audit; ++ } flags; ++ int isstale; ++ ++ kernel_cap_t set_caps; ++ kernel_cap_t capabilities; ++ kernel_cap_t audit_caps; ++ kernel_cap_t quiet_caps; ++ ++ struct aa_rlimit rlimits; ++ unsigned int task_count; ++ ++ struct kref count; ++ struct list_head task_contexts; ++ spinlock_t lock; ++ unsigned long int_flags; ++ u16 network_families[AF_MAX]; ++ u16 audit_network[AF_MAX]; ++ u16 quiet_network[AF_MAX]; ++}; ++ ++extern struct list_head profile_ns_list; ++extern rwlock_t profile_ns_list_lock; ++extern struct mutex aa_interface_lock; ++ ++/** ++ * struct aa_task_context - primary label for confined tasks ++ * @profile: the current profile ++ * @previous_profile: profile the task may return to ++ * @cookie: magic value the task must know for returning to @previous_profile ++ * @list: list this aa_task_context is on ++ * @task: task that the aa_task_context confines ++ * @rcu: rcu head used when freeing the aa_task_context ++ * @caps_logged: caps that have previously generated log entries ++ * ++ * Contains the task's current profile (which could change due to ++ * change_hat). Plus the hat_magic needed during change_hat. ++ */ ++struct aa_task_context { ++ struct aa_profile *profile; ++ struct aa_profile *previous_profile; ++ u64 cookie; ++ struct list_head list; ++ struct task_struct *task; ++ struct rcu_head rcu; ++ kernel_cap_t caps_logged; ++}; ++ ++extern struct aa_namespace *default_namespace; ++ ++/* aa_audit - AppArmor auditing structure ++ * Structure is populated by access control code and passed to aa_audit which ++ * provides for a single point of logging. ++ */ ++ ++struct aa_audit { ++ const char *operation; ++ gfp_t gfp_mask; ++ const char *info; ++ const char *name; ++ const char *name2; ++ const char *name3; ++ int request_mask, denied_mask, audit_mask; ++ int rlimit; ++ struct iattr *iattr; ++ pid_t task, parent; ++ int family, type, protocol; ++ int error_code; ++}; ++ ++/* Flags for the permission check functions */ ++#define AA_CHECK_FD 1 /* coming from a file descriptor */ ++#define AA_CHECK_DIR 2 /* file type is directory */ ++ ++/* lock subtypes so lockdep does not raise false dependencies */ ++enum aa_lock_class { ++ aa_lock_normal, ++ aa_lock_nested, ++ aa_lock_task_release ++}; ++ ++/* main.c */ ++extern int alloc_default_namespace(void); ++extern void free_default_namespace(void); ++extern int aa_audit_message(struct aa_profile *profile, struct aa_audit *sa, ++ int type); ++void aa_audit_hint(struct aa_profile *profile, struct aa_audit *sa); ++void aa_audit_status(struct aa_profile *profile, struct aa_audit *sa); ++int aa_audit_reject(struct aa_profile *profile, struct aa_audit *sa); ++extern int aa_audit_syscallreject(struct aa_profile *profile, gfp_t gfp, ++ const char *); ++extern int aa_audit(struct aa_profile *profile, struct aa_audit *); ++ ++extern int aa_attr(struct aa_profile *profile, struct dentry *dentry, ++ struct vfsmount *mnt, struct iattr *iattr); ++extern int aa_perm_xattr(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, ++ int mask, int check); ++extern int aa_capability(struct aa_task_context *cxt, int cap); ++extern int aa_perm(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask, ++ int check); ++extern int aa_perm_dir(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, ++ int mask); ++extern int aa_perm_path(struct aa_profile *, const char *operation, ++ const char *name, int mask, uid_t uid); ++extern int aa_link(struct aa_profile *profile, ++ struct dentry *link, struct vfsmount *link_mnt, ++ struct dentry *target, struct vfsmount *target_mnt); ++extern int aa_clone(struct task_struct *task); ++extern int aa_register(struct linux_binprm *bprm); ++extern void aa_release(struct task_struct *task); ++extern int aa_change_hat(const char *id, u64 hat_magic); ++extern int aa_change_profile(const char *ns_name, const char *name); ++extern struct aa_profile *__aa_replace_profile(struct task_struct *task, ++ struct aa_profile *profile); ++extern struct aa_task_context *lock_task_and_profiles(struct task_struct *task, ++ struct aa_profile *profile); ++extern void unlock_task_and_profiles(struct task_struct *task, ++ struct aa_task_context *cxt, ++ struct aa_profile *profile); ++extern void aa_change_task_context(struct task_struct *task, ++ struct aa_task_context *new_cxt, ++ struct aa_profile *profile, u64 cookie, ++ struct aa_profile *previous_profile); ++extern int aa_may_ptrace(struct aa_task_context *cxt, ++ struct aa_profile *tracee); ++extern int aa_net_perm(struct aa_profile *profile, char *operation, ++ int family, int type, int protocol); ++extern int aa_revalidate_sk(struct sock *sk, char *operation); ++extern int aa_task_setrlimit(struct aa_profile *profile, unsigned int resource, ++ struct rlimit *new_rlim); ++extern void aa_set_rlimits(struct task_struct *task, struct aa_profile *profile); ++ ++ ++/* lsm.c */ ++extern int apparmor_initialized; ++extern void info_message(const char *str, const char *name); ++extern void apparmor_disable(void); ++ ++/* list.c */ ++extern struct aa_namespace *__aa_find_namespace(const char *name, ++ struct list_head *list); ++extern struct aa_profile *__aa_find_profile(const char *name, ++ struct list_head *list); ++extern void aa_profile_ns_list_release(void); ++ ++/* module_interface.c */ ++extern ssize_t aa_add_profile(void *, size_t); ++extern ssize_t aa_replace_profile(void *, size_t); ++extern ssize_t aa_remove_profile(char *, size_t); ++extern struct aa_namespace *alloc_aa_namespace(char *name); ++extern void free_aa_namespace(struct aa_namespace *ns); ++extern void free_aa_namespace_kref(struct kref *kref); ++extern struct aa_profile *alloc_aa_profile(void); ++extern void free_aa_profile(struct aa_profile *profile); ++extern void free_aa_profile_kref(struct kref *kref); ++extern void aa_unconfine_tasks(struct aa_profile *profile); ++ ++/* procattr.c */ ++extern int aa_getprocattr(struct aa_profile *profile, char **string, ++ unsigned *len); ++extern int aa_setprocattr_changehat(char *args); ++extern int aa_setprocattr_changeprofile(char *args); ++extern int aa_setprocattr_setprofile(struct task_struct *task, char *args); ++ ++/* apparmorfs.c */ ++extern int create_apparmorfs(void); ++extern void destroy_apparmorfs(void); ++ ++/* match.c */ ++extern struct aa_dfa *aa_match_alloc(void); ++extern void aa_match_free(struct aa_dfa *dfa); ++extern int unpack_dfa(struct aa_dfa *dfa, void *blob, size_t size); ++extern int verify_dfa(struct aa_dfa *dfa); ++extern unsigned int aa_dfa_match(struct aa_dfa *dfa, const char *str, int *); ++extern unsigned int aa_dfa_next_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str); ++extern unsigned int aa_match_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str, unsigned int *final); ++extern unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, ++ unsigned int start); ++ ++#endif /* __APPARMOR_H */ +diff -uprN e/security/apparmor/apparmorfs.c f/security/apparmor/apparmorfs.c +--- e/security/apparmor/apparmorfs.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/apparmorfs.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,279 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * ++ * AppArmor filesystem (part of securityfs) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++static char *aa_simple_write_to_buffer(const char __user *userbuf, ++ size_t alloc_size, size_t copy_size, ++ loff_t *pos, const char *operation) ++{ ++ struct aa_profile *profile; ++ char *data; ++ ++ if (*pos != 0) { ++ /* only writes from pos 0, that is complete writes */ ++ data = ERR_PTR(-ESPIPE); ++ goto out; ++ } ++ ++ /* ++ * Don't allow confined processes to load/replace/remove profiles. ++ * No sane person would add rules allowing this to a profile ++ * but we enforce the restriction anyways. ++ */ ++ profile = aa_get_profile(current); ++ if (profile) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.error_code = -EACCES; ++ data = ERR_PTR(aa_audit_reject(profile, &sa)); ++ aa_put_profile(profile); ++ goto out; ++ } ++ ++ data = vmalloc(alloc_size); ++ if (data == NULL) { ++ data = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ if (copy_from_user(data, userbuf, copy_size)) { ++ vfree(data); ++ data = ERR_PTR(-EFAULT); ++ goto out; ++ } ++ ++out: ++ return data; ++} ++ ++/* apparmor/profiles */ ++extern struct seq_operations apparmorfs_profiles_op; ++ ++static int aa_profiles_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &apparmorfs_profiles_op); ++} ++ ++ ++static int aa_profiles_release(struct inode *inode, struct file *file) ++{ ++ return seq_release(inode, file); ++} ++ ++static struct file_operations apparmorfs_profiles_fops = { ++ .open = aa_profiles_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = aa_profiles_release, ++}; ++ ++/* apparmor/matching */ ++static ssize_t aa_matching_read(struct file *file, char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ const char *matching = "pattern=aadfa audit perms=rwxamlk/ user::other"; ++ ++ return simple_read_from_buffer(buf, size, ppos, matching, ++ strlen(matching)); ++} ++ ++static struct file_operations apparmorfs_matching_fops = { ++ .read = aa_matching_read, ++}; ++ ++/* apparmor/features */ ++static ssize_t aa_features_read(struct file *file, char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ const char *features = "file=3.0 capability=2.0 network=1.0 " ++ "change_hat=1.4 change_profile=1.0 " ++ "aanamespaces=1.0 rlimit=1.0"; ++ ++ return simple_read_from_buffer(buf, size, ppos, features, ++ strlen(features)); ++} ++ ++static struct file_operations apparmorfs_features_fops = { ++ .read = aa_features_read, ++}; ++ ++/* apparmor/.load */ ++static ssize_t aa_profile_load(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ char *data; ++ ssize_t error; ++ ++ data = aa_simple_write_to_buffer(buf, size, size, pos, "profile_load"); ++ ++ error = PTR_ERR(data); ++ if (!IS_ERR(data)) { ++ error = aa_add_profile(data, size); ++ vfree(data); ++ } ++ ++ return error; ++} ++ ++ ++static struct file_operations apparmorfs_profile_load = { ++ .write = aa_profile_load ++}; ++ ++/* apparmor/.replace */ ++static ssize_t aa_profile_replace(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ char *data; ++ ssize_t error; ++ ++ data = aa_simple_write_to_buffer(buf, size, size, pos, ++ "profile_replace"); ++ ++ error = PTR_ERR(data); ++ if (!IS_ERR(data)) { ++ error = aa_replace_profile(data, size); ++ vfree(data); ++ } ++ ++ return error; ++} ++ ++ ++static struct file_operations apparmorfs_profile_replace = { ++ .write = aa_profile_replace ++}; ++ ++/* apparmor/.remove */ ++static ssize_t aa_profile_remove(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ char *data; ++ ssize_t error; ++ ++ /* ++ * aa_remove_profile needs a null terminated string so 1 extra ++ * byte is allocated and the copied data is null terminated. ++ */ ++ data = aa_simple_write_to_buffer(buf, size + 1, size, pos, ++ "profile_remove"); ++ ++ error = PTR_ERR(data); ++ if (!IS_ERR(data)) { ++ data[size] = 0; ++ error = aa_remove_profile(data, size); ++ vfree(data); ++ } ++ ++ return error; ++} ++ ++static struct file_operations apparmorfs_profile_remove = { ++ .write = aa_profile_remove ++}; ++ ++static struct dentry *apparmor_dentry; ++ ++static void aafs_remove(const char *name) ++{ ++ struct dentry *dentry; ++ ++ dentry = lookup_one_len(name, apparmor_dentry, strlen(name)); ++ if (!IS_ERR(dentry)) { ++ securityfs_remove(dentry); ++ dput(dentry); ++ } ++} ++ ++static int aafs_create(const char *name, int mask, struct file_operations *fops) ++{ ++ struct dentry *dentry; ++ ++ dentry = securityfs_create_file(name, S_IFREG | mask, apparmor_dentry, ++ NULL, fops); ++ ++ return IS_ERR(dentry) ? PTR_ERR(dentry) : 0; ++} ++ ++void destroy_apparmorfs(void) ++{ ++ if (apparmor_dentry) { ++ aafs_remove(".remove"); ++ aafs_remove(".replace"); ++ aafs_remove(".load"); ++ aafs_remove("matching"); ++ aafs_remove("features"); ++ aafs_remove("profiles"); ++ securityfs_remove(apparmor_dentry); ++ apparmor_dentry = NULL; ++ } ++} ++ ++int create_apparmorfs(void) ++{ ++ int error; ++ ++ if (!apparmor_initialized) ++ return 0; ++ ++ if (apparmor_dentry) { ++ AA_ERROR("%s: AppArmor securityfs already exists\n", ++ __FUNCTION__); ++ return -EEXIST; ++ } ++ ++ apparmor_dentry = securityfs_create_dir("apparmor", NULL); ++ if (IS_ERR(apparmor_dentry)) { ++ error = PTR_ERR(apparmor_dentry); ++ apparmor_dentry = NULL; ++ goto error; ++ } ++ error = aafs_create("profiles", 0440, &apparmorfs_profiles_fops); ++ if (error) ++ goto error; ++ error = aafs_create("matching", 0444, &apparmorfs_matching_fops); ++ if (error) ++ goto error; ++ error = aafs_create("features", 0444, &apparmorfs_features_fops); ++ if (error) ++ goto error; ++ error = aafs_create(".load", 0640, &apparmorfs_profile_load); ++ if (error) ++ goto error; ++ error = aafs_create(".replace", 0640, &apparmorfs_profile_replace); ++ if (error) ++ goto error; ++ error = aafs_create(".remove", 0640, &apparmorfs_profile_remove); ++ if (error) ++ goto error; ++ ++ /* Report that AppArmor fs is enabled */ ++ info_message("AppArmor Filesystem Enabled", ""); ++ return 0; ++ ++error: ++ destroy_apparmorfs(); ++ AA_ERROR("Error creating AppArmor securityfs\n"); ++ apparmor_disable(); ++ return error; ++} ++ ++fs_initcall(create_apparmorfs); ++ +diff -uprN e/security/apparmor/inline.h f/security/apparmor/inline.h +--- e/security/apparmor/inline.h 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/inline.h 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,250 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ */ ++ ++#ifndef __INLINE_H ++#define __INLINE_H ++ ++#include ++ ++#include "match.h" ++ ++static inline int mediated_filesystem(struct inode *inode) ++{ ++ return !(inode->i_sb->s_flags & MS_NOUSER); ++} ++ ++static inline struct aa_task_context *aa_task_context(struct task_struct *task) ++{ ++ return (struct aa_task_context *) rcu_dereference(task->security); ++} ++ ++static inline struct aa_namespace *aa_get_namespace(struct aa_namespace *ns) ++{ ++ if (ns) ++ kref_get(&(ns->count)); ++ ++ return ns; ++} ++ ++static inline void aa_put_namespace(struct aa_namespace *ns) ++{ ++ if (ns) ++ kref_put(&ns->count, free_aa_namespace_kref); ++} ++ ++ ++static inline struct aa_namespace *aa_find_namespace(const char *name) ++{ ++ struct aa_namespace *ns = NULL; ++ ++ read_lock(&profile_ns_list_lock); ++ ns = aa_get_namespace(__aa_find_namespace(name, &profile_ns_list)); ++ read_unlock(&profile_ns_list_lock); ++ ++ return ns; ++} ++ ++/** ++ * aa_dup_profile - increment refcount on profile @p ++ * @p: profile ++ */ ++static inline struct aa_profile *aa_dup_profile(struct aa_profile *p) ++{ ++ if (p) ++ kref_get(&(p->count)); ++ ++ return p; ++} ++ ++/** ++ * aa_put_profile - decrement refcount on profile @p ++ * @p: profile ++ */ ++static inline void aa_put_profile(struct aa_profile *p) ++{ ++ if (p) ++ kref_put(&p->count, free_aa_profile_kref); ++} ++ ++static inline struct aa_profile *aa_get_profile(struct task_struct *task) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile = NULL; ++ ++ rcu_read_lock(); ++ cxt = aa_task_context(task); ++ if (cxt) { ++ profile = cxt->profile; ++ aa_dup_profile(profile); ++ } ++ rcu_read_unlock(); ++ ++ return profile; ++} ++ ++static inline struct aa_profile *aa_find_profile(struct aa_namespace *ns, ++ const char *name) ++{ ++ struct aa_profile *profile = NULL; ++ ++ read_lock(&ns->lock); ++ profile = aa_dup_profile(__aa_find_profile(name, &ns->profiles)); ++ read_unlock(&ns->lock); ++ ++ return profile; ++} ++ ++static inline struct aa_task_context *aa_alloc_task_context(gfp_t flags) ++{ ++ struct aa_task_context *cxt; ++ ++ cxt = kzalloc(sizeof(*cxt), flags); ++ if (cxt) { ++ INIT_LIST_HEAD(&cxt->list); ++ INIT_RCU_HEAD(&cxt->rcu); ++ } ++ ++ return cxt; ++} ++ ++static inline void aa_free_task_context(struct aa_task_context *cxt) ++{ ++ if (cxt) { ++ aa_put_profile(cxt->profile); ++ aa_put_profile(cxt->previous_profile); ++ kfree(cxt); ++ } ++} ++ ++/** ++ * lock_profile - lock a profile ++ * @profile: the profile to lock ++ * ++ * While the profile is locked, local interrupts are disabled. This also ++ * gives us RCU reader safety. ++ */ ++static inline void lock_profile_nested(struct aa_profile *profile, ++ enum aa_lock_class lock_class) ++{ ++ /* ++ * Lock the profile. ++ * ++ * Need to disable interrupts here because this lock is used in ++ * the task_free_security hook, which may run in RCU context. ++ */ ++ if (profile) ++ spin_lock_irqsave_nested(&profile->lock, profile->int_flags, ++ lock_class); ++} ++ ++static inline void lock_profile(struct aa_profile *profile) ++{ ++ lock_profile_nested(profile, aa_lock_normal); ++} ++ ++/** ++ * unlock_profile - unlock a profile ++ * @profile: the profile to unlock ++ */ ++static inline void unlock_profile(struct aa_profile *profile) ++{ ++ /* Unlock the profile. */ ++ if (profile) ++ spin_unlock_irqrestore(&profile->lock, profile->int_flags); ++} ++ ++/** ++ * lock_both_profiles - lock two profiles in a deadlock-free way ++ * @profile1: profile to lock (may be NULL) ++ * @profile2: profile to lock (may be NULL) ++ * ++ * The order in which profiles are passed into lock_both_profiles() / ++ * unlock_both_profiles() does not matter. ++ * While the profile is locked, local interrupts are disabled. This also ++ * gives us RCU reader safety. ++ */ ++static inline void lock_both_profiles(struct aa_profile *profile1, ++ struct aa_profile *profile2) ++{ ++ /* ++ * Lock the two profiles. ++ * ++ * We need to disable interrupts because the profile locks are ++ * used in the task_free_security hook, which may run in RCU ++ * context. ++ * ++ * Do not nest spin_lock_irqsave()/spin_unlock_irqresore(): ++ * interrupts only need to be turned off once. ++ */ ++ if (!profile1 || profile1 == profile2) { ++ if (profile2) ++ spin_lock_irqsave_nested(&profile2->lock, ++ profile2->int_flags, ++ aa_lock_normal); ++ } else if (profile1 > profile2) { ++ /* profile1 cannot be NULL here. */ ++ spin_lock_irqsave_nested(&profile1->lock, profile1->int_flags, ++ aa_lock_normal); ++ if (profile2) ++ spin_lock_nested(&profile2->lock, aa_lock_nested); ++ ++ } else { ++ /* profile2 cannot be NULL here. */ ++ spin_lock_irqsave_nested(&profile2->lock, profile2->int_flags, ++ aa_lock_normal); ++ spin_lock_nested(&profile1->lock, aa_lock_nested); ++ } ++} ++ ++/** ++ * unlock_both_profiles - unlock two profiles in a deadlock-free way ++ * @profile1: profile to unlock (may be NULL) ++ * @profile2: profile to unlock (may be NULL) ++ * ++ * The order in which profiles are passed into lock_both_profiles() / ++ * unlock_both_profiles() does not matter. ++ * While the profile is locked, local interrupts are disabled. This also ++ * gives us RCU reader safety. ++ */ ++static inline void unlock_both_profiles(struct aa_profile *profile1, ++ struct aa_profile *profile2) ++{ ++ /* Unlock the two profiles. */ ++ if (!profile1 || profile1 == profile2) { ++ if (profile2) ++ spin_unlock_irqrestore(&profile2->lock, ++ profile2->int_flags); ++ } else if (profile1 > profile2) { ++ /* profile1 cannot be NULL here. */ ++ if (profile2) ++ spin_unlock(&profile2->lock); ++ spin_unlock_irqrestore(&profile1->lock, profile1->int_flags); ++ } else { ++ /* profile2 cannot be NULL here. */ ++ spin_unlock(&profile1->lock); ++ spin_unlock_irqrestore(&profile2->lock, profile2->int_flags); ++ } ++} ++ ++static inline unsigned int aa_match(struct aa_dfa *dfa, const char *pathname, ++ int *audit_mask) ++{ ++ if (dfa) ++ return aa_dfa_match(dfa, pathname, audit_mask); ++ if (audit_mask) ++ *audit_mask = 0; ++ return 0; ++} ++ ++static inline int dfa_audit_mask(struct aa_dfa *dfa, unsigned int state) ++{ ++ return ACCEPT_TABLE2(dfa)[state]; ++} ++ ++#endif /* __INLINE_H__ */ +diff -uprN e/security/apparmor/list.c f/security/apparmor/list.c +--- e/security/apparmor/list.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/list.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor Profile List Management ++ */ ++ ++#include ++#include "apparmor.h" ++#include "inline.h" ++ ++/* list of profile namespaces and lock */ ++LIST_HEAD(profile_ns_list); ++rwlock_t profile_ns_list_lock = RW_LOCK_UNLOCKED; ++ ++/** ++ * __aa_find_namespace - look up a profile namespace on the namespace list ++ * @name: name of namespace to find ++ * @head: list to search ++ * ++ * Returns a pointer to the namespace on the list, or NULL if no namespace ++ * called @name exists. The caller must hold the profile_ns_list_lock. ++ */ ++struct aa_namespace *__aa_find_namespace(const char *name, ++ struct list_head *head) ++{ ++ struct aa_namespace *ns; ++ ++ list_for_each_entry(ns, head, list) { ++ if (!strcmp(ns->name, name)) ++ return ns; ++ } ++ ++ return NULL; ++} ++ ++/** ++ * __aa_find_profile - look up a profile on the profile list ++ * @name: name of profile to find ++ * @head: list to search ++ * ++ * Returns a pointer to the profile on the list, or NULL if no profile ++ * called @name exists. The caller must hold the profile_list_lock. ++ */ ++struct aa_profile *__aa_find_profile(const char *name, struct list_head *head) ++{ ++ struct aa_profile *profile; ++ ++ list_for_each_entry(profile, head, list) { ++ if (!strcmp(profile->name, name)) ++ return profile; ++ } ++ ++ return NULL; ++} ++ ++static void aa_profile_list_release(struct list_head *head) ++{ ++ struct aa_profile *profile, *tmp; ++ list_for_each_entry_safe(profile, tmp, head, list) { ++ /* Remove the profile from each task context it is on. */ ++ lock_profile(profile); ++ profile->isstale = 1; ++ aa_unconfine_tasks(profile); ++ list_del_init(&profile->list); ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ } ++} ++ ++/** ++ * aa_profilelist_release - Remove all profiles from profile_list ++ */ ++void aa_profile_ns_list_release(void) ++{ ++ struct aa_namespace *ns, *tmp; ++ ++ /* Remove and release all the profiles on namespace profile lists. */ ++ write_lock(&profile_ns_list_lock); ++ list_for_each_entry_safe(ns, tmp, &profile_ns_list, list) { ++ write_lock(&ns->lock); ++ aa_profile_list_release(&ns->profiles); ++ list_del_init(&ns->list); ++ write_unlock(&ns->lock); ++ aa_put_namespace(ns); ++ } ++ write_unlock(&profile_ns_list_lock); ++} ++ ++static struct aa_profile *next_profile(struct aa_profile *profile) ++{ ++ struct aa_profile *next = profile; ++ struct aa_namespace *ns; ++ ++ list_for_each_entry_continue(next, &profile->ns->profiles, list) ++ return next; ++ ++ ns = profile->ns; ++ read_unlock(&ns->lock); ++ list_for_each_entry_continue(ns, &profile_ns_list, list) { ++ read_lock(&ns->lock); ++ list_for_each_entry(profile, &ns->profiles, list) ++ return profile; ++ read_unlock(&ns->lock); ++ } ++ return NULL; ++} ++ ++static void *p_start(struct seq_file *f, loff_t *pos) ++{ ++ struct aa_namespace *ns; ++ loff_t l = *pos; ++ ++ read_lock(&profile_ns_list_lock); ++ if (!list_empty(&profile_ns_list)) { ++ struct aa_profile *profile = NULL; ++ ns = list_first_entry(&profile_ns_list, typeof(*ns), list); ++ read_lock(&ns->lock); ++ if (!list_empty(&ns->profiles)) ++ profile = list_first_entry(&ns->profiles, ++ typeof(*profile), list); ++ else ++ read_unlock(&ns->lock); ++ for ( ; profile && l > 0; l--) ++ profile = next_profile(profile); ++ return profile; ++ } ++ return NULL; ++} ++ ++static void *p_next(struct seq_file *f, void *p, loff_t *pos) ++{ ++ struct aa_profile *profile = (struct aa_profile *) p; ++ ++ (*pos)++; ++ profile = next_profile(profile); ++ ++ return profile; ++} ++ ++static void p_stop(struct seq_file *f, void *p) ++{ ++ struct aa_profile *profile = (struct aa_profile *) p; ++ ++ if (profile) ++ read_unlock(&profile->ns->lock); ++ read_unlock(&profile_ns_list_lock); ++} ++ ++static int seq_show_profile(struct seq_file *f, void *p) ++{ ++ struct aa_profile *profile = (struct aa_profile *)p; ++ if (profile->ns == default_namespace) ++ seq_printf(f, "%s (%s)\n", profile->name, ++ PROFILE_COMPLAIN(profile) ? "complain" : "enforce"); ++ else ++ seq_printf(f, ":%s:%s (%s)\n", profile->ns->name, profile->name, ++ PROFILE_COMPLAIN(profile) ? "complain" : "enforce"); ++ return 0; ++} ++ ++/* Used in apparmorfs.c */ ++struct seq_operations apparmorfs_profiles_op = { ++ .start = p_start, ++ .next = p_next, ++ .stop = p_stop, ++ .show = seq_show_profile, ++}; +diff -uprN e/security/apparmor/locking.txt f/security/apparmor/locking.txt +--- e/security/apparmor/locking.txt 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/locking.txt 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,68 @@ ++Locking in AppArmor ++=================== ++ ++Lock hierarchy: ++ ++ aa_interface_lock ++ profile_list_lock ++ aa_profile->lock ++ task_lock() ++ ++ ++Which lock protects what? ++ ++ /-----------------------+-------------------------------\ ++ | Variable | Lock | ++ >-----------------------+-------------------------------< ++ | profile_list | profile_list_lock | ++ +-----------------------+-------------------------------+ ++ | aa_profile | (reference count) | ++ +-----------------------+-------------------------------+ ++ | aa_profile-> | aa_profile->lock | ++ | isstale, | | ++ | task_contexts | | ++ +-----------------------+-------------------------------+ ++ | task_struct->security | read: RCU | ++ | | write: task_lock() | ++ +-----------------------+-------------------------------+ ++ | aa_profile->sub | handle on the profile (list | ++ | | is never modified) | ++ \-----------------------+-------------------------------/ ++ ++(Obviously, the list_heads embedded in data structures are always ++protected with the lock that also protects the list.) ++ ++When moving a task context from one profile to another, we grab both ++profile locks with lock_both_profiles(). This ensures that both locks ++are always taken in the same order, and so we won't deadlock. ++ ++Since task_struct->security is RCU protected the aa_task_struct it ++references is only guarenteed to exist for the rcu cycle. Where ++aa_task_context->profile is needed in blocking operations the ++profile's reference count is incremented and the profile reference ++is used. ++ ++Profiles on profile_list are never stale: when a profile becomes stale, ++it is removed from profile_list at the same time (under profile_list_lock ++and aa_profile->lock). ++ ++The aa_interface_lock is taken whenever user-space modifies the profile ++list, and can sleep. This ensures that profile loading/replacement/removal ++won't race with itself. We release the profile_list_lock as soon as ++possible to avoid stalling exec during profile loading/replacement/removal. ++ ++AppArmor uses lock subtyping to avoid false positives from lockdep. The ++profile lock is often taken nested, but it is guaranteed to be in a lock ++safe order and not the same lock when done, so it is safe. ++ ++A third lock type (aa_lock_task_release) is given to the profile lock ++when it is taken in soft irq context during task release (aa_release). ++This is to avoid a false positive between the task lock and the profile ++lock. In task context the profile lock wraps the task lock with irqs ++off, but the kernel takes the task lock with irqs enabled. This won't ++result in a deadlock because for a deadlock to occur the kernel must ++take dead task A's lock (irqs on), the rcu callback hook freeing ++dead task A must be run and AppArmor must be changing the profile on ++dead task A. The kernel should not be taking a dead task's task_lock ++at the same time the task is being freed by task rcu cleanup other wise ++the task would not be out of its quiescent period. +diff -uprN e/security/apparmor/lsm.c f/security/apparmor/lsm.c +--- e/security/apparmor/lsm.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/lsm.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,1108 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor LSM interface ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++/* Flag indicating whether initialization completed */ ++int apparmor_initialized = 0; ++ ++/* point to the apparmor module */ ++struct module *aa_module = NULL; ++ ++/* secondary ops if apparmor is stacked */ ++static struct security_operations *aa_secondary_ops = NULL; ++static DEFINE_MUTEX(aa_secondary_lock); ++ ++#define AA_SECONDARY(FN, ARGS...) \ ++ ({ \ ++ struct security_operations *__f1; \ ++ __f1 = rcu_dereference(aa_secondary_ops); \ ++ (unlikely(__f1) && __f1->FN) ? __f1->FN(ARGS) : 0; \ ++ }) ++ ++static int param_set_aabool(const char *val, struct kernel_param *kp); ++static int param_get_aabool(char *buffer, struct kernel_param *kp); ++#define param_check_aabool(name, p) __param_check(name, p, int) ++ ++static int param_set_aauint(const char *val, struct kernel_param *kp); ++static int param_get_aauint(char *buffer, struct kernel_param *kp); ++#define param_check_aauint(name, p) __param_check(name, p, int) ++ ++/* Flag values, also controllable via /sys/module/apparmor/parameters ++ * We define special types as we want to do additional mediation. ++ * ++ * Complain mode -- in complain mode access failures result in auditing only ++ * and task is allowed access. audit events are processed by userspace to ++ * generate policy. Default is 'enforce' (0). ++ * Value is also togglable per profile and referenced when global value is ++ * enforce. ++ */ ++int apparmor_complain = 0; ++module_param_named(complain, apparmor_complain, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_complain, "Toggle AppArmor complain mode"); ++ ++/* Debug mode */ ++int apparmor_debug = 0; ++module_param_named(debug, apparmor_debug, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_debug, "Toggle AppArmor debug mode"); ++ ++/* Audit mode */ ++int apparmor_audit = 0; ++module_param_named(audit, apparmor_audit, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_audit, "Toggle AppArmor audit mode"); ++ ++/* Syscall logging mode */ ++int apparmor_logsyscall = 0; ++module_param_named(logsyscall, apparmor_logsyscall, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_logsyscall, "Toggle AppArmor logsyscall mode"); ++ ++/* Maximum pathname length before accesses will start getting rejected */ ++unsigned int apparmor_path_max = 2 * PATH_MAX; ++module_param_named(path_max, apparmor_path_max, aauint, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_path_max, "Maximum pathname length allowed"); ++ ++/* Boot time disable flag */ ++#ifdef CONFIG_SECURITY_APPARMOR_DISABLE ++#define AA_ENABLED_PERMS 0600 ++#else ++#define AA_ENABLED_PERMS 0400 ++#endif ++static int param_set_aa_enabled(const char *val, struct kernel_param *kp); ++unsigned int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; ++module_param_call(enabled, param_set_aa_enabled, param_get_aauint, ++ &apparmor_enabled, AA_ENABLED_PERMS); ++MODULE_PARM_DESC(apparmor_enabled, "Enable/Disable Apparmor on boot"); ++ ++static int __init apparmor_enabled_setup(char *str) ++{ ++ apparmor_enabled = simple_strtol(str, NULL, 0); ++ return 1; ++} ++__setup("apparmor=", apparmor_enabled_setup); ++ ++static int param_set_aabool(const char *val, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_set_bool(val, kp); ++} ++ ++static int param_get_aabool(char *buffer, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_get_bool(buffer, kp); ++} ++ ++static int param_set_aauint(const char *val, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_set_uint(val, kp); ++} ++ ++static int param_get_aauint(char *buffer, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_get_uint(buffer, kp); ++} ++ ++/* allow run time disabling of apparmor */ ++static int param_set_aa_enabled(const char *val, struct kernel_param *kp) ++{ ++ char *endp; ++ unsigned long l; ++ ++ if (!apparmor_initialized) { ++ apparmor_enabled = 0; ++ return 0; ++ } ++ ++ if (aa_task_context(current)) ++ return -EPERM; ++ ++ if (!apparmor_enabled) ++ return -EINVAL; ++ ++ if (!val) ++ return -EINVAL; ++ ++ l = simple_strtoul(val, &endp, 0); ++ if (endp == val || l != 0) ++ return -EINVAL; ++ ++ apparmor_enabled = 0; ++ apparmor_disable(); ++ return 0; ++} ++ ++static int aa_reject_syscall(struct task_struct *task, gfp_t flags, ++ const char *name) ++{ ++ struct aa_profile *profile = aa_get_profile(task); ++ int error = 0; ++ ++ if (profile) { ++ error = aa_audit_syscallreject(profile, flags, name); ++ aa_put_profile(profile); ++ } ++ ++ return error; ++} ++ ++static int apparmor_ptrace(struct task_struct *parent, ++ struct task_struct *child) ++{ ++ struct aa_task_context *cxt; ++ int error = 0; ++ ++ /* ++ * parent can ptrace child when ++ * - parent is unconfined ++ * - parent & child are in the same namespace && ++ * - parent is in complain mode ++ * - parent and child are confined by the same profile ++ * - parent profile has CAP_SYS_PTRACE ++ */ ++ ++ rcu_read_lock(); ++ cxt = aa_task_context(parent); ++ if (cxt) { ++ if (parent->nsproxy != child->nsproxy) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "ptrace"; ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.parent = parent->pid; ++ sa.task = child->pid; ++ sa.info = "different namespaces"; ++ aa_audit_reject(cxt->profile, &sa); ++ error = -EPERM; ++ } else { ++ struct aa_task_context *child_cxt = ++ aa_task_context(child); ++ ++ error = aa_may_ptrace(cxt, child_cxt ? ++ child_cxt->profile : NULL); ++ if (PROFILE_COMPLAIN(cxt->profile)) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "ptrace"; ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.parent = parent->pid; ++ sa.task = child->pid; ++ aa_audit_hint(cxt->profile, &sa); ++ } ++ } ++ } ++ rcu_read_unlock(); ++ ++ return error; ++} ++ ++static int apparmor_capable(struct task_struct *task, int cap) ++{ ++ int error; ++ struct aa_task_context *cxt; ++ ++ /* cap_capable returns 0 on success, else -EPERM */ ++ error = cap_capable(task, cap); ++ ++ rcu_read_lock(); ++ cxt = aa_task_context(task); ++ if (cxt && (!error || cap_raised(cxt->profile->set_caps, cap))) ++ error = aa_capability(cxt, cap); ++ rcu_read_unlock(); ++ ++ return error; ++} ++ ++static int apparmor_sysctl(struct ctl_table *table, int op) ++{ ++ struct aa_profile *profile = aa_get_profile(current); ++ int error = 0; ++ ++ if (profile) { ++ char *buffer, *name; ++ int mask; ++ ++ mask = 0; ++ if (op & 4) ++ mask |= MAY_READ; ++ if (op & 2) ++ mask |= MAY_WRITE; ++ ++ error = -ENOMEM; ++ buffer = (char*)__get_free_page(GFP_KERNEL); ++ if (!buffer) ++ goto out; ++ name = sysctl_pathname(table, buffer, PAGE_SIZE); ++ if (name && name - buffer >= 5) { ++ name -= 5; ++ memcpy(name, "/proc", 5); ++ error = aa_perm_path(profile, "sysctl", name, mask, 0); ++ } ++ free_page((unsigned long)buffer); ++ } ++ ++out: ++ aa_put_profile(profile); ++ return error; ++} ++ ++static int apparmor_bprm_set_security(struct linux_binprm *bprm) ++{ ++ /* handle capability bits with setuid, etc */ ++ cap_bprm_set_security(bprm); ++ /* already set based on script name */ ++ if (bprm->sh_bang) ++ return 0; ++ return aa_register(bprm); ++} ++ ++static int apparmor_bprm_secureexec(struct linux_binprm *bprm) ++{ ++ int ret = cap_bprm_secureexec(bprm); ++ ++ if (!ret && (unsigned long)bprm->security & AA_SECURE_EXEC_NEEDED) { ++ AA_DEBUG("%s: secureexec required for %s\n", ++ __FUNCTION__, bprm->filename); ++ ret = 1; ++ } ++ ++ return ret; ++} ++ ++static int apparmor_sb_mount(char *dev_name, struct nameidata *nd, char *type, ++ unsigned long flags, void *data) ++{ ++ return aa_reject_syscall(current, GFP_KERNEL, "mount"); ++} ++ ++static int apparmor_umount(struct vfsmount *mnt, int flags) ++{ ++ return aa_reject_syscall(current, GFP_KERNEL, "umount"); ++} ++ ++static int apparmor_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if (!mnt || !mediated_filesystem(dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) ++ error = aa_perm_dir(profile, "inode_mkdir", dentry, mnt, ++ MAY_WRITE); ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if (!mnt || !mediated_filesystem(dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) ++ error = aa_perm_dir(profile, "inode_rmdir", dentry, mnt, ++ MAY_WRITE); ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int aa_permission(const char *operation, struct inode *inode, ++ struct dentry *dentry, struct vfsmount *mnt, ++ int mask, int check) ++{ ++ int error = 0; ++ ++ if (mnt && mediated_filesystem(inode)) { ++ struct aa_profile *profile; ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ error = aa_perm(profile, operation, dentry, mnt, mask, ++ check); ++ aa_put_profile(profile); ++ } ++ return error; ++} ++ ++static inline int aa_mask_permissions(int mask) ++{ ++ if (mask & MAY_APPEND) ++ mask &= (MAY_READ | MAY_APPEND | MAY_EXEC); ++ else ++ mask &= (MAY_READ | MAY_WRITE | MAY_EXEC); ++ return mask; ++} ++ ++static int apparmor_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) ++{ ++ return aa_permission("inode_create", dir, dentry, mnt, MAY_APPEND, 0); ++} ++ ++static int apparmor_inode_link(struct dentry *old_dentry, ++ struct vfsmount *old_mnt, struct inode *dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) ++{ ++ int error = 0; ++ struct aa_profile *profile; ++ ++ if (!old_mnt || !new_mnt || !mediated_filesystem(dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) ++ error = aa_link(profile, new_dentry, new_mnt, ++ old_dentry, old_mnt); ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) ++{ ++ int check = 0; ++ ++ if (S_ISDIR(dentry->d_inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ return aa_permission("inode_unlink", dir, dentry, mnt, MAY_WRITE, ++ check); ++} ++ ++static int apparmor_inode_symlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, const char *old_name) ++{ ++ return aa_permission("inode_symlink", dir, dentry, mnt, MAY_WRITE, 0); ++} ++ ++static int apparmor_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev) ++{ ++ return aa_permission("inode_mknod", dir, dentry, mnt, MAY_WRITE, 0); ++} ++ ++static int apparmor_inode_rename(struct inode *old_dir, ++ struct dentry *old_dentry, ++ struct vfsmount *old_mnt, ++ struct inode *new_dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if ((!old_mnt && !new_mnt) || !mediated_filesystem(old_dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) { ++ struct inode *inode = old_dentry->d_inode; ++ int check = 0; ++ ++ if (inode && S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ if (old_mnt) ++ error = aa_perm(profile, "inode_rename", old_dentry, ++ old_mnt, MAY_READ | MAY_WRITE, check); ++ ++ if (!error && new_mnt) { ++ error = aa_perm(profile, "inode_rename", new_dentry, ++ new_mnt, MAY_WRITE, check); ++ } ++ } ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_inode_permission(struct inode *inode, int mask, ++ struct nameidata *nd) ++{ ++ int check = 0, error = 0; ++ ++ if (!nd || nd->flags & (LOOKUP_PARENT | LOOKUP_CONTINUE)) ++ goto out; ++ mask = aa_mask_permissions(mask); ++ if (S_ISDIR(inode->i_mode)) { ++ check |= AA_CHECK_DIR; ++ /* allow traverse accesses to directories */ ++ mask &= ~MAY_EXEC; ++ } ++ error = aa_permission("inode_permission", inode, nd->path.dentry, ++ nd->path.mnt, ++ mask, check); ++ ++out: ++ if (!error) ++ error = AA_SECONDARY(inode_permission, inode, mask, nd); ++ ++ return error; ++} ++ ++static int apparmor_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) ++{ ++ int error = 0; ++ ++ if (!mnt) ++ goto out; ++ ++ if (mediated_filesystem(dentry->d_inode)) { ++ struct aa_profile *profile; ++ ++ profile = aa_get_profile(current); ++ /* ++ * Mediate any attempt to change attributes of a file ++ * (chmod, chown, chgrp, etc) ++ */ ++ if (profile) ++ error = aa_attr(profile, dentry, mnt, iattr); ++ ++ aa_put_profile(profile); ++ } ++ ++out: ++ return error; ++} ++ ++static int aa_xattr_permission(struct dentry *dentry, struct vfsmount *mnt, ++ const char *operation, int mask, ++ struct file *file) ++{ ++ int error = 0; ++ ++ if (mnt && mediated_filesystem(dentry->d_inode)) { ++ struct aa_profile *profile = aa_get_profile(current); ++ int check = file ? AA_CHECK_FD : 0; ++ ++ if (profile) ++ error = aa_perm_xattr(profile, operation, dentry, mnt, ++ mask, check); ++ aa_put_profile(profile); ++ } ++ ++ return error; ++} ++ ++static int apparmor_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, ++ int flags, struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr set", MAY_WRITE, file); ++} ++ ++static int apparmor_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr get", MAY_READ, file); ++} ++ ++static int apparmor_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr list", MAY_READ, file); ++} ++ ++static int apparmor_inode_removexattr(struct dentry *dentry, ++ struct vfsmount *mnt, char *name, ++ struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr remove", MAY_WRITE, ++ file); ++} ++ ++static int aa_file_permission(const char *op, struct file *file, int mask) ++{ ++ struct aa_profile *profile; ++ struct aa_profile *file_profile = (struct aa_profile*)file->f_security; ++ int error = 0; ++ ++ if (!file_profile) ++ goto out; ++ ++ /* ++ * If this file was opened under a different profile, we ++ * revalidate the access against the current profile. ++ */ ++ profile = aa_get_profile(current); ++ if (profile && (file_profile != profile || mask & AA_MAY_LOCK)) { ++ struct dentry *dentry = file->f_dentry; ++ struct vfsmount *mnt = file->f_vfsmnt; ++ struct inode *inode = dentry->d_inode; ++ int check = AA_CHECK_FD; ++ ++ /* ++ * FIXME: We should remember which profiles we revalidated ++ * against. ++ */ ++ if (S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ error = aa_permission(op, inode, dentry, mnt, mask, check); ++ } ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_file_permission(struct file *file, int mask) ++{ ++ return aa_file_permission("file_permission", file, ++ aa_mask_permissions(mask)); ++} ++ ++static inline int apparmor_file_lock (struct file *file, unsigned int cmd) ++{ ++ int mask = AA_MAY_LOCK; ++ if (cmd == F_WRLCK) ++ mask |= MAY_WRITE; ++ return aa_file_permission("file_lock", file, mask); ++} ++ ++static int apparmor_file_alloc_security(struct file *file) ++{ ++ struct aa_profile *profile; ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ file->f_security = profile; ++ ++ return 0; ++} ++ ++static void apparmor_file_free_security(struct file *file) ++{ ++ struct aa_profile *file_profile = (struct aa_profile*)file->f_security; ++ ++ aa_put_profile(file_profile); ++} ++ ++static inline int aa_mmap(struct file *file, const char *operation, ++ unsigned long prot, unsigned long flags) ++{ ++ struct dentry *dentry; ++ int mask = 0; ++ ++ if (!file || !file->f_security) ++ return 0; ++ ++ if (prot & PROT_READ) ++ mask |= MAY_READ; ++ /* Private mappings don't require write perms since they don't ++ * write back to the files */ ++ if ((prot & PROT_WRITE) && !(flags & MAP_PRIVATE)) ++ mask |= MAY_WRITE; ++ if (prot & PROT_EXEC) ++ mask |= AA_EXEC_MMAP; ++ ++ dentry = file->f_dentry; ++ return aa_permission(operation, dentry->d_inode, dentry, ++ file->f_vfsmnt, mask, AA_CHECK_FD); ++} ++ ++static int apparmor_file_mmap(struct file *file, unsigned long reqprot, ++ unsigned long prot, unsigned long flags, ++ unsigned long addr, unsigned long addr_only) ++{ ++ if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) { ++ struct aa_profile *profile = aa_get_profile(current); ++ if (profile) ++ /* future control check here */ ++ return -EACCES; ++ else ++ return -EACCES; ++ aa_put_profile(profile); ++ } ++ ++ return aa_mmap(file, "file_mmap", prot, flags); ++} ++ ++static int apparmor_file_mprotect(struct vm_area_struct *vma, ++ unsigned long reqprot, unsigned long prot) ++{ ++ return aa_mmap(vma->vm_file, "file_mprotect", prot, ++ !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0); ++} ++ ++static int apparmor_task_alloc_security(struct task_struct *task) ++{ ++ return aa_clone(task); ++} ++ ++/* ++ * Called from IRQ context from RCU callback. ++ */ ++static void apparmor_task_free_security(struct task_struct *task) ++{ ++ aa_release(task); ++} ++ ++static int apparmor_socket_create(int family, int type, int protocol, int kern) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if (kern) ++ return 0; ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ error = aa_net_perm(profile, "socket_create", family, ++ type, protocol); ++ aa_put_profile(profile); ++ ++ return error; ++} ++ ++static int apparmor_socket_post_create(struct socket *sock, int family, ++ int type, int protocol, int kern) ++{ ++ struct sock *sk = sock->sk; ++ ++ if (kern) ++ return 0; ++ ++ return aa_revalidate_sk(sk, "socket_post_create"); ++} ++ ++static int apparmor_socket_bind(struct socket *sock, ++ struct sockaddr *address, int addrlen) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_bind"); ++} ++ ++static int apparmor_socket_connect(struct socket *sock, ++ struct sockaddr *address, int addrlen) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_connect"); ++} ++ ++static int apparmor_socket_listen(struct socket *sock, int backlog) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_listen"); ++} ++ ++static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_accept"); ++} ++ ++static int apparmor_socket_sendmsg(struct socket *sock, ++ struct msghdr *msg, int size) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_sendmsg"); ++} ++ ++static int apparmor_socket_recvmsg(struct socket *sock, ++ struct msghdr *msg, int size, int flags) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_recvmsg"); ++} ++ ++static int apparmor_socket_getsockname(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_getsockname"); ++} ++ ++static int apparmor_socket_getpeername(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_getpeername"); ++} ++ ++static int apparmor_socket_getsockopt(struct socket *sock, int level, ++ int optname) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_getsockopt"); ++} ++ ++static int apparmor_socket_setsockopt(struct socket *sock, int level, ++ int optname) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_setsockopt"); ++} ++ ++static int apparmor_socket_shutdown(struct socket *sock, int how) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_shutdown"); ++} ++ ++static int apparmor_getprocattr(struct task_struct *task, char *name, ++ char **value) ++{ ++ unsigned len; ++ int error; ++ struct aa_profile *profile; ++ ++ /* AppArmor only supports the "current" process attribute */ ++ if (strcmp(name, "current") != 0) ++ return -EINVAL; ++ ++ /* must be task querying itself or admin */ ++ if (current != task && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ profile = aa_get_profile(task); ++ error = aa_getprocattr(profile, value, &len); ++ aa_put_profile(profile); ++ if (!error) ++ error = len; ++ ++ return error; ++} ++ ++static int apparmor_setprocattr(struct task_struct *task, char *name, ++ void *value, size_t size) ++{ ++ char *command, *args; ++ int error; ++ ++ if (strcmp(name, "current") != 0 || size == 0 || size >= PAGE_SIZE) ++ return -EINVAL; ++ args = value; ++ args[size] = '\0'; ++ args = strstrip(args); ++ command = strsep(&args, " "); ++ if (!args) ++ return -EINVAL; ++ while (isspace(*args)) ++ args++; ++ if (!*args) ++ return -EINVAL; ++ ++ if (strcmp(command, "changehat") == 0) { ++ if (current != task) ++ return -EACCES; ++ error = aa_setprocattr_changehat(args); ++ } else if (strcmp(command, "changeprofile") == 0) { ++ if (current != task) ++ return -EACCES; ++ error = aa_setprocattr_changeprofile(args); ++ } else if (strcmp(command, "setprofile") == 0) { ++ struct aa_profile *profile; ++ ++ /* Only an unconfined process with admin capabilities ++ * may change the profile of another task. ++ */ ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ profile = aa_get_profile(current); ++ if (profile) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_set"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.task = task->pid; ++ sa.info = "from confined process"; ++ aa_audit_reject(profile, &sa); ++ aa_put_profile(profile); ++ return -EACCES; ++ } ++ error = aa_setprocattr_setprofile(task, args); ++ } else { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "setprocattr"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.info = "invalid command"; ++ sa.name = command; ++ sa.task = task->pid; ++ aa_audit_reject(NULL, &sa); ++ return -EINVAL; ++ } ++ ++ if (!error) ++ error = size; ++ return error; ++} ++ ++static int apparmor_task_setrlimit(unsigned int resource, ++ struct rlimit *new_rlim) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ profile = aa_get_profile(current); ++ if (profile) { ++ error = aa_task_setrlimit(profile, resource, new_rlim); ++ } ++ aa_put_profile(profile); ++ ++ return error; ++} ++ ++int apparmor_register_subsecurity(const char *name, ++ struct security_operations *ops) ++{ ++ int error = 0; ++ ++ if (mutex_lock_interruptible(&aa_secondary_lock)) ++ return -ERESTARTSYS; ++ ++ /* allow dazuko and capability to stack. The stacking with ++ * capability is not needed since apparmor already composes ++ * capability using common cap. ++ */ ++ if (!aa_secondary_ops && (strcmp(name, "dazuko") == 0 || ++ strcmp(name, "capability") == 0)){ ++ /* The apparmor module needs to be pinned while a secondary is ++ * registered ++ */ ++ if (try_module_get(aa_module)) { ++ aa_secondary_ops = ops; ++ info_message("Registered secondary security module", ++ name); ++ } else { ++ error = -EINVAL; ++ } ++ } else { ++ info_message("Unable to register %s as a secondary security " ++ "module", name); ++ error = -EPERM; ++ } ++ mutex_unlock(&aa_secondary_lock); ++ return error; ++} ++ ++int apparmor_unregister_subsecurity(const char *name, ++ struct security_operations *ops) ++{ ++ int error = 0; ++ ++ if (mutex_lock_interruptible(&aa_secondary_lock)) ++ return -ERESTARTSYS; ++ ++ if (aa_secondary_ops && aa_secondary_ops == ops) { ++ rcu_assign_pointer(aa_secondary_ops, NULL); ++ synchronize_rcu(); ++ module_put(aa_module); ++ info_message("Unregistered secondary security module", name); ++ } else { ++ info_message("Unable to unregister secondary security module", ++ name); ++ error = -EPERM; ++ } ++ mutex_unlock(&aa_secondary_lock); ++ return error; ++} ++ ++struct security_operations apparmor_ops = { ++ .name = "apparmor", ++ .ptrace = apparmor_ptrace, ++ .capget = cap_capget, ++ .capset_check = cap_capset_check, ++ .capset_set = cap_capset_set, ++ .sysctl = apparmor_sysctl, ++ .capable = apparmor_capable, ++ .syslog = cap_syslog, ++ ++ .bprm_apply_creds = cap_bprm_apply_creds, ++ .bprm_set_security = apparmor_bprm_set_security, ++ .bprm_secureexec = apparmor_bprm_secureexec, ++ ++ .sb_mount = apparmor_sb_mount, ++ .sb_umount = apparmor_umount, ++ ++ .inode_mkdir = apparmor_inode_mkdir, ++ .inode_rmdir = apparmor_inode_rmdir, ++ .inode_create = apparmor_inode_create, ++ .inode_link = apparmor_inode_link, ++ .inode_unlink = apparmor_inode_unlink, ++ .inode_symlink = apparmor_inode_symlink, ++ .inode_mknod = apparmor_inode_mknod, ++ .inode_rename = apparmor_inode_rename, ++ .inode_permission = apparmor_inode_permission, ++ .inode_setattr = apparmor_inode_setattr, ++ .inode_setxattr = apparmor_inode_setxattr, ++ .inode_getxattr = apparmor_inode_getxattr, ++ .inode_listxattr = apparmor_inode_listxattr, ++ .inode_removexattr = apparmor_inode_removexattr, ++ .file_permission = apparmor_file_permission, ++ .file_alloc_security = apparmor_file_alloc_security, ++ .file_free_security = apparmor_file_free_security, ++ .file_mmap = apparmor_file_mmap, ++ .file_mprotect = apparmor_file_mprotect, ++ .file_lock = apparmor_file_lock, ++ ++ .task_alloc_security = apparmor_task_alloc_security, ++ .task_free_security = apparmor_task_free_security, ++ .task_post_setuid = cap_task_post_setuid, ++ .task_reparent_to_init = cap_task_reparent_to_init, ++ .task_setrlimit = apparmor_task_setrlimit, ++ ++ .getprocattr = apparmor_getprocattr, ++ .setprocattr = apparmor_setprocattr, ++ ++ .register_security = apparmor_register_subsecurity, ++ ++ .socket_create = apparmor_socket_create, ++ .socket_post_create = apparmor_socket_post_create, ++ .socket_bind = apparmor_socket_bind, ++ .socket_connect = apparmor_socket_connect, ++ .socket_listen = apparmor_socket_listen, ++ .socket_accept = apparmor_socket_accept, ++ .socket_sendmsg = apparmor_socket_sendmsg, ++ .socket_recvmsg = apparmor_socket_recvmsg, ++ .socket_getsockname = apparmor_socket_getsockname, ++ .socket_getpeername = apparmor_socket_getpeername, ++ .socket_getsockopt = apparmor_socket_getsockopt, ++ .socket_setsockopt = apparmor_socket_setsockopt, ++ .socket_shutdown = apparmor_socket_shutdown, ++}; ++ ++void info_message(const char *str, const char *name) ++{ ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.gfp_mask = GFP_KERNEL; ++ sa.info = str; ++ sa.name = name; ++ printk(KERN_INFO "AppArmor: %s %s\n", str, name); ++ if (audit_enabled) ++ aa_audit_message(NULL, &sa, AUDIT_APPARMOR_STATUS); ++} ++ ++static int __init apparmor_init(void) ++{ ++ int error; ++ ++ if (!apparmor_enabled) { ++ info_message("AppArmor disabled by boottime parameter", ""); ++ return 0; ++ } ++ ++ if ((error = create_apparmorfs())) { ++ AA_ERROR("Unable to activate AppArmor filesystem\n"); ++ goto createfs_out; ++ } ++ ++ if ((error = alloc_default_namespace())){ ++ AA_ERROR("Unable to allocate default profile namespace\n"); ++ goto alloc_out; ++ } ++ ++ if ((error = register_security(&apparmor_ops))) { ++ AA_ERROR("Unable to register AppArmor\n"); ++ goto register_security_out; ++ } ++ ++ /* Report that AppArmor successfully initialized */ ++ apparmor_initialized = 1; ++ if (apparmor_complain) ++ info_message("AppArmor initialized: complainmode enabled", ++ NULL); ++ else ++ info_message("AppArmor initialized", NULL); ++ ++ return error; ++ ++register_security_out: ++ free_default_namespace(); ++ ++alloc_out: ++ destroy_apparmorfs(); ++ ++createfs_out: ++ return error; ++ ++} ++ ++security_initcall(apparmor_init); ++ ++void apparmor_disable(void) ++{ ++ /* Remove and release all the profiles on the profile list. */ ++ mutex_lock(&aa_interface_lock); ++ aa_profile_ns_list_release(); ++ ++ /* FIXME: cleanup profiles references on files */ ++ free_default_namespace(); ++ ++ /* ++ * Delay for an rcu cycle to make sure that all active task ++ * context readers have finished, and all profiles have been ++ * freed by their rcu callbacks. ++ */ ++ synchronize_rcu(); ++ ++ destroy_apparmorfs(); ++ mutex_unlock(&aa_interface_lock); ++ ++ apparmor_initialized = 0; ++ ++ info_message("AppArmor protection removed", NULL); ++} ++ ++MODULE_DESCRIPTION("AppArmor process confinement"); ++MODULE_AUTHOR("Novell/Immunix, http://bugs.opensuse.org"); ++MODULE_LICENSE("GPL"); +diff -uprN e/security/apparmor/main.c f/security/apparmor/main.c +--- e/security/apparmor/main.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/main.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,1689 @@ ++/* ++ * Copyright (C) 2002-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor Core ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "apparmor.h" ++ ++#include "inline.h" ++ ++/* ++ * Table of capability names: we generate it from capabilities.h. ++ */ ++static const char *capability_names[] = { ++#include "capability_names.h" ++}; ++ ++struct aa_namespace *default_namespace; ++ ++static int aa_inode_mode(struct inode *inode) ++{ ++ /* if the inode doesn't exist the user is creating it */ ++ if (!inode || current->fsuid == inode->i_uid) ++ return AA_USER_SHIFT; ++ return AA_OTHER_SHIFT; ++} ++ ++int alloc_default_namespace(void) ++{ ++ struct aa_namespace *ns; ++ char *name = kstrdup("default", GFP_KERNEL); ++ if (!name) ++ return -ENOMEM; ++ ns = alloc_aa_namespace(name); ++ if (!ns) { ++ kfree(name); ++ return -ENOMEM; ++ } ++ ++ write_lock(&profile_ns_list_lock); ++ default_namespace = ns; ++ aa_get_namespace(ns); ++ list_add(&ns->list, &profile_ns_list); ++ write_unlock(&profile_ns_list_lock); ++ ++ return 0; ++} ++ ++void free_default_namespace(void) ++{ ++ write_lock(&profile_ns_list_lock); ++ list_del_init(&default_namespace->list); ++ write_unlock(&profile_ns_list_lock); ++ aa_put_namespace(default_namespace); ++ default_namespace = NULL; ++} ++ ++static void aa_audit_file_sub_mask(struct audit_buffer *ab, char *buffer, ++ int mask) ++{ ++ const char unsafex[] = "upcn"; ++ const char safex[] = "UPCN"; ++ char *m = buffer; ++ ++ if (mask & AA_EXEC_MMAP) ++ *m++ = 'm'; ++ if (mask & MAY_READ) ++ *m++ = 'r'; ++ if (mask & MAY_WRITE) ++ *m++ = 'w'; ++ else if (mask & MAY_APPEND) ++ *m++ = 'a'; ++ if (mask & MAY_EXEC) { ++ int index = AA_EXEC_INDEX(mask); ++ /* all indexes > 4 are also named transitions */ ++ if (index > 4) ++ index = 4; ++ if (index > 0) { ++ if (mask & AA_EXEC_UNSAFE) ++ *m++ = unsafex[index - 1]; ++ else ++ *m++ = safex[index - 1]; ++ } ++ if (mask & AA_EXEC_INHERIT) ++ *m++ = 'i'; ++ *m++ = 'x'; ++ } ++ if (mask & AA_MAY_LINK) ++ *m++ = 'l'; ++ if (mask & AA_MAY_LOCK) ++ *m++ = 'k'; ++ *m++ = '\0'; ++} ++ ++static void aa_audit_file_mask(struct audit_buffer *ab, const char *name, ++ int mask) ++{ ++ char user[10], other[10]; ++ ++ aa_audit_file_sub_mask(ab, user, ++ (mask & AA_USER_PERMS) >> AA_USER_SHIFT); ++ aa_audit_file_sub_mask(ab, other, ++ (mask & AA_OTHER_PERMS) >> AA_OTHER_SHIFT); ++ ++ audit_log_format(ab, " %s=\"%s::%s\"", name, user, other); ++} ++ ++static const char *address_families[] = { ++#include "af_names.h" ++}; ++ ++static const char *sock_types[] = { ++ "unknown(0)", ++ "stream", ++ "dgram", ++ "raw", ++ "rdm", ++ "seqpacket", ++ "dccp", ++ "unknown(7)", ++ "unknown(8)", ++ "unknown(9)", ++ "packet", ++}; ++ ++/** ++ * aa_audit - Log an audit event to the audit subsystem ++ * @profile: profile to check against ++ * @sa: audit event ++ * @audit_cxt: audit context to log message to ++ * @type: audit event number ++ */ ++static int aa_audit_base(struct aa_profile *profile, struct aa_audit *sa, ++ struct audit_context *audit_cxt, int type) ++{ ++ struct audit_buffer *ab = NULL; ++ ++ ab = audit_log_start(audit_cxt, sa->gfp_mask, type); ++ ++ if (!ab) { ++ AA_ERROR("Unable to log event (%d) to audit subsys\n", ++ type); ++ /* don't fail operations in complain mode even if logging ++ * fails */ ++ return type == AUDIT_APPARMOR_ALLOWED ? 0 : -ENOMEM; ++ } ++ ++ if (sa->operation) ++ audit_log_format(ab, "operation=\"%s\"", sa->operation); ++ ++ if (sa->info) { ++ audit_log_format(ab, " info=\"%s\"", sa->info); ++ if (sa->error_code) ++ audit_log_format(ab, " error=%d", sa->error_code); ++ } ++ ++ if (sa->request_mask) ++ aa_audit_file_mask(ab, "requested_mask", sa->request_mask); ++ ++ if (sa->denied_mask) ++ aa_audit_file_mask(ab, "denied_mask", sa->denied_mask); ++ ++ if (sa->request_mask) ++ audit_log_format(ab, " fsuid=%d", current->fsuid); ++ ++ if (sa->rlimit) ++ audit_log_format(ab, " rlimit=%d", sa->rlimit - 1); ++ ++ if (sa->iattr) { ++ struct iattr *iattr = sa->iattr; ++ ++ audit_log_format(ab, " attribute=\"%s%s%s%s%s%s%s\"", ++ iattr->ia_valid & ATTR_MODE ? "mode," : "", ++ iattr->ia_valid & ATTR_UID ? "uid," : "", ++ iattr->ia_valid & ATTR_GID ? "gid," : "", ++ iattr->ia_valid & ATTR_SIZE ? "size," : "", ++ iattr->ia_valid & (ATTR_ATIME | ATTR_ATIME_SET) ? ++ "atime," : "", ++ iattr->ia_valid & (ATTR_MTIME | ATTR_MTIME_SET) ? ++ "mtime," : "", ++ iattr->ia_valid & ATTR_CTIME ? "ctime," : ""); ++ } ++ ++ if (sa->task) ++ audit_log_format(ab, " task=%d", sa->task); ++ ++ if (sa->parent) ++ audit_log_format(ab, " parent=%d", sa->parent); ++ ++ if (sa->name) { ++ audit_log_format(ab, " name="); ++ audit_log_untrustedstring(ab, sa->name); ++ } ++ ++ if (sa->name2) { ++ audit_log_format(ab, " name2="); ++ audit_log_untrustedstring(ab, sa->name2); ++ } ++ ++ if (sa->family || sa->type) { ++ if (address_families[sa->family]) ++ audit_log_format(ab, " family=\"%s\"", ++ address_families[sa->family]); ++ else ++ audit_log_format(ab, " family=\"unknown(%d)\"", ++ sa->family); ++ ++ if (sock_types[sa->type]) ++ audit_log_format(ab, " sock_type=\"%s\"", ++ sock_types[sa->type]); ++ else ++ audit_log_format(ab, " sock_type=\"unknown(%d)\"", ++ sa->type); ++ ++ audit_log_format(ab, " protocol=%d", sa->protocol); ++ } ++ ++ audit_log_format(ab, " pid=%d", current->pid); ++ ++ if (profile) { ++ audit_log_format(ab, " profile="); ++ audit_log_untrustedstring(ab, profile->name); ++ ++ if (profile->ns != default_namespace) { ++ audit_log_format(ab, " namespace="); ++ audit_log_untrustedstring(ab, profile->ns->name); ++ } ++ } ++ ++ audit_log_end(ab); ++ ++ return type == AUDIT_APPARMOR_ALLOWED ? 0 : sa->error_code; ++} ++ ++/** ++ * aa_audit_syscallreject - Log a syscall rejection to the audit subsystem ++ * @profile: profile to check against ++ * @gfp: memory allocation flags ++ * @msg: string describing syscall being rejected ++ */ ++int aa_audit_syscallreject(struct aa_profile *profile, gfp_t gfp, ++ const char *msg) ++{ ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "syscall"; ++ sa.name = msg; ++ sa.gfp_mask = gfp; ++ sa.error_code = -EPERM; ++ ++ return aa_audit_base(profile, &sa, current->audit_context, ++ AUDIT_APPARMOR_DENIED); ++} ++ ++int aa_audit_message(struct aa_profile *profile, struct aa_audit *sa, ++ int type) ++{ ++ struct audit_context *audit_cxt; ++ ++ audit_cxt = apparmor_logsyscall ? current->audit_context : NULL; ++ return aa_audit_base(profile, sa, audit_cxt, type); ++} ++ ++void aa_audit_hint(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ aa_audit_message(profile, sa, AUDIT_APPARMOR_HINT); ++} ++ ++void aa_audit_status(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ aa_audit_message(profile, sa, AUDIT_APPARMOR_STATUS); ++} ++ ++int aa_audit_reject(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ return aa_audit_message(profile, sa, AUDIT_APPARMOR_DENIED); ++} ++ ++/** ++ * aa_audit - Log an audit event to the audit subsystem ++ * @profile: profile to check against ++ * @sa: audit event ++ */ ++int aa_audit(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ int type = AUDIT_APPARMOR_DENIED; ++ struct audit_context *audit_cxt; ++ ++ if (likely(!sa->error_code)) ++ type = AUDIT_APPARMOR_AUDIT; ++ else if (PROFILE_COMPLAIN(profile)) ++ type = AUDIT_APPARMOR_ALLOWED; ++ ++ audit_cxt = apparmor_logsyscall ? current->audit_context : NULL; ++ return aa_audit_base(profile, sa, audit_cxt, type); ++} ++ ++static int aa_audit_file(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ if (likely(!sa->error_code)) { ++ int mask = sa->audit_mask & AUDIT_FILE_MASK; ++ ++ if (unlikely(PROFILE_AUDIT(profile))) ++ mask |= AUDIT_FILE_MASK; ++ ++ if (likely(!(sa->request_mask & mask))) ++ return 0; ++ ++ /* mask off perms that are not being force audited */ ++ sa->request_mask &= mask | ALL_AA_EXEC_TYPE; ++ } else { ++ int mask = AUDIT_QUIET_MASK(sa->audit_mask); ++ ++ if (!(sa->denied_mask & ~mask)) ++ return sa->error_code; ++ ++ /* mask off perms whose denial is being silenced */ ++ sa->denied_mask &= (~mask) | ALL_AA_EXEC_TYPE; ++ } ++ ++ return aa_audit(profile, sa); ++} ++ ++static int aa_audit_caps(struct aa_profile *profile, struct aa_audit *sa, ++ int cap) ++{ ++ if (likely(!sa->error_code)) { ++ if (likely(!PROFILE_AUDIT(profile) && ++ !cap_raised(profile->audit_caps, cap))) ++ return 0; ++ } ++ ++ /* quieting of capabilities is handled the caps_logged cache */ ++ return aa_audit(profile, sa); ++} ++ ++/** ++ * aa_file_denied - check for @mask access on a file ++ * @profile: profile to check against ++ * @name: pathname of file ++ * @mask: permission mask requested for file ++ * @audit_mask: return audit mask for the match ++ * ++ * Return %0 on success, or else the permissions in @mask that the ++ * profile denies. ++ */ ++static int aa_file_denied(struct aa_profile *profile, const char *name, ++ int mask, int *audit_mask) ++{ ++ return (mask & ~aa_match(profile->file_rules, name, audit_mask)); ++} ++ ++/** ++ * aa_link_denied - check for permission to link a file ++ * @profile: profile to check against ++ * @link: pathname of link being created ++ * @target: pathname of target to be linked to ++ * @target_mode: UGO shift for target inode ++ * @request_mask: the permissions subset valid only if link succeeds ++ * @audit_mask: return the audit_mask for the link permission ++ * Return %0 on success, or else the permissions that the profile denies. ++ */ ++static int aa_link_denied(struct aa_profile *profile, const char *link, ++ const char *target, int target_mode, ++ int *request_mask, int *audit_mask) ++{ ++ unsigned int state; ++ int l_mode, t_mode, l_x, t_x, denied_mask = 0; ++ int link_mask = AA_MAY_LINK << target_mode; ++ ++ *request_mask = link_mask; ++ ++ l_mode = aa_match_state(profile->file_rules, DFA_START, link, &state); ++ ++ if (l_mode & link_mask) { ++ int mode; ++ /* test to see if target can be paired with link */ ++ state = aa_dfa_null_transition(profile->file_rules, state); ++ mode = aa_match_state(profile->file_rules, state, target, ++ &state); ++ ++ if (!(mode & link_mask)) ++ denied_mask |= link_mask; ++ ++ *audit_mask = dfa_audit_mask(profile->file_rules, state); ++ ++ /* return if link subset test is not required */ ++ if (!(mode & (AA_LINK_SUBSET_TEST << target_mode))) ++ return denied_mask; ++ } ++ ++ /* Do link perm subset test requiring permission on link are a ++ * subset of the permissions on target. ++ * If a subset test is required a permission subset test of the ++ * perms for the link are done against the user::other of the ++ * target's 'r', 'w', 'x', 'a', 'k', and 'm' permissions. ++ * ++ * If the link has 'x', an exact match of all the execute flags ++ * must match. ++ */ ++ denied_mask |= ~l_mode & link_mask; ++ ++ t_mode = aa_match(profile->file_rules, target, NULL); ++ ++ l_x = l_mode & (ALL_AA_EXEC_TYPE | AA_EXEC_BITS); ++ t_x = t_mode & (ALL_AA_EXEC_TYPE | AA_EXEC_BITS); ++ ++ /* For actual subset test ignore valid-profile-transition flags, ++ * and link bits ++ */ ++ l_mode &= AA_FILE_PERMS & ~AA_LINK_BITS; ++ t_mode &= AA_FILE_PERMS & ~AA_LINK_BITS; ++ ++ *request_mask = l_mode | link_mask; ++ ++ if (l_mode) { ++ int x = l_x | (t_x & ALL_AA_EXEC_UNSAFE); ++ denied_mask |= l_mode & ~t_mode; ++ /* mask off x modes not used by link */ ++ ++ /* handle exec subset ++ * - link safe exec issubset of unsafe exec ++ * - no link x perm is subset of target having x perm ++ */ ++ if ((l_mode & AA_USER_EXEC) && ++ (x & AA_USER_EXEC_TYPE) != (t_x & AA_USER_EXEC_TYPE)) ++ denied_mask = AA_USER_EXEC | (l_x & AA_USER_EXEC_TYPE); ++ if ((l_mode & AA_OTHER_EXEC) && ++ (x & AA_OTHER_EXEC_TYPE) != (t_x & AA_OTHER_EXEC_TYPE)) ++ denied_mask = AA_OTHER_EXEC | (l_x & AA_OTHER_EXEC_TYPE); ++ } ++ ++ return denied_mask; ++} ++ ++/** ++ * aa_get_name - compute the pathname of a file ++ * @dentry: dentry of the file ++ * @mnt: vfsmount of the file ++ * @buffer: buffer that aa_get_name() allocated ++ * @check: AA_CHECK_DIR is set if the file is a directory ++ * ++ * Returns a pointer to the beginning of the pathname (which usually differs ++ * from the beginning of the buffer), or an error code. ++ * ++ * We need @check to indicate whether the file is a directory or not because ++ * the file may not yet exist, and so we cannot check the inode's file type. ++ */ ++static char *aa_get_name(struct dentry *dentry, struct vfsmount *mnt, ++ char **buffer, int check) ++{ ++ char *name; ++ int is_dir, size = 256; ++ ++ is_dir = (check & AA_CHECK_DIR) ? 1 : 0; ++ ++ for (;;) { ++ char *buf = kmalloc(size, GFP_KERNEL); ++ if (!buf) ++ return ERR_PTR(-ENOMEM); ++ ++ name = d_namespace_path(dentry, mnt, buf, size - is_dir); ++ if (!IS_ERR(name)) { ++ if (name[0] != '/') { ++ /* ++ * This dentry is not connected to the ++ * namespace root -- reject access. ++ */ ++ kfree(buf); ++ return ERR_PTR(-ENOENT); ++ } ++ if (is_dir && name[1] != '\0') { ++ /* ++ * Append "/" to the pathname. The root ++ * directory is a special case; it already ++ * ends in slash. ++ */ ++ buf[size - 2] = '/'; ++ buf[size - 1] = '\0'; ++ } ++ ++ *buffer = buf; ++ return name; ++ } ++ if (PTR_ERR(name) != -ENAMETOOLONG) ++ return name; ++ ++ kfree(buf); ++ size <<= 1; ++ if (size > apparmor_path_max) ++ return ERR_PTR(-ENAMETOOLONG); ++ } ++} ++ ++static char *new_compound_name(const char *n1, const char *n2) ++{ ++ char *name = kmalloc(strlen(n1) + strlen(n2) + 3, GFP_KERNEL); ++ if (name) ++ sprintf(name, "%s//%s", n1, n2); ++ return name; ++} ++static inline void aa_put_name_buffer(char *buffer) ++{ ++ kfree(buffer); ++} ++ ++/** ++ * aa_perm_dentry - check if @profile allows @mask for a file ++ * @profile: profile to check against ++ * @dentry: dentry of the file ++ * @mnt: vfsmount o the file ++ * @sa: audit context ++ * @mask: requested profile permissions ++ * @check: kind of check to perform ++ * ++ * Returns 0 upon success, or else an error code. ++ * ++ * @check indicates the file type, and whether the file was accessed through ++ * an open file descriptor (AA_CHECK_FD) or not. ++ */ ++static int aa_perm_dentry(struct aa_profile *profile, struct dentry *dentry, ++ struct vfsmount *mnt, struct aa_audit *sa, int check) ++{ ++ int error; ++ char *buffer = NULL; ++ ++ sa->name = aa_get_name(dentry, mnt, &buffer, check); ++ sa->request_mask <<= aa_inode_mode(dentry->d_inode); ++ if (IS_ERR(sa->name)) { ++ /* ++ * deleted files are given a pass on permission checks when ++ * accessed through a file descriptor. ++ */ ++ if (PTR_ERR(sa->name) == -ENOENT && (check & AA_CHECK_FD)) ++ sa->denied_mask = 0; ++ else { ++ sa->denied_mask = sa->request_mask; ++ sa->error_code = PTR_ERR(sa->name); ++ if (sa->error_code == -ENOENT) ++ sa->info = "Failed name resolution - object not a valid entry"; ++ else if (sa->error_code == -ENAMETOOLONG) ++ sa->info = "Failed name resolution - name too long"; ++ else ++ sa->info = "Failed name resolution"; ++ } ++ sa->name = NULL; ++ } else ++ sa->denied_mask = aa_file_denied(profile, sa->name, ++ sa->request_mask, ++ &sa->audit_mask); ++ ++ if (!sa->denied_mask) ++ sa->error_code = 0; ++ ++ error = aa_audit_file(profile, sa); ++ aa_put_name_buffer(buffer); ++ ++ return error; ++} ++ ++/** ++ * aa_attr - check if attribute change is allowed ++ * @profile: profile to check against ++ * @dentry: dentry of the file to check ++ * @mnt: vfsmount of the file to check ++ * @iattr: attribute changes requested ++ */ ++int aa_attr(struct aa_profile *profile, struct dentry *dentry, ++ struct vfsmount *mnt, struct iattr *iattr) ++{ ++ struct inode *inode = dentry->d_inode; ++ int error, check; ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "setattr"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.iattr = iattr; ++ sa.request_mask = MAY_WRITE; ++ sa.error_code = -EACCES; ++ ++ check = 0; ++ if (inode && S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ if (iattr->ia_valid & ATTR_FILE) ++ check |= AA_CHECK_FD; ++ ++ error = aa_perm_dentry(profile, dentry, mnt, &sa, check); ++ ++ return error; ++} ++ ++/** ++ * aa_perm_xattr - check if xattr attribute change is allowed ++ * @profile: profile to check against ++ * @dentry: dentry of the file to check ++ * @mnt: vfsmount of the file to check ++ * @operation: xattr operation being done ++ * @mask: access mode requested ++ * @check: kind of check to perform ++ */ ++int aa_perm_xattr(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask, ++ int check) ++{ ++ struct inode *inode = dentry->d_inode; ++ int error; ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.error_code = -EACCES; ++ ++ if (inode && S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ ++ error = aa_perm_dentry(profile, dentry, mnt, &sa, check); ++ ++ return error; ++} ++ ++/** ++ * aa_perm - basic apparmor permissions check ++ * @profile: profile to check against ++ * @dentry: dentry of the file to check ++ * @mnt: vfsmount of the file to check ++ * @mask: access mode requested ++ * @check: kind of check to perform ++ * ++ * Determine if access @mask for the file is authorized by @profile. ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_perm(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask, int check) ++{ ++ struct aa_audit sa; ++ int error = 0; ++ ++ if (mask == 0) ++ goto out; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.error_code = -EACCES; ++ ++ error = aa_perm_dentry(profile, dentry, mnt, &sa, check); ++ ++out: ++ return error; ++} ++ ++/** ++ * aa_perm_dir ++ * @profile: profile to check against ++ * @dentry: dentry of directory to check ++ * @mnt: vfsmount of directory to check ++ * @operation: directory operation being performed ++ * @mask: access mode requested ++ * ++ * Determine if directory operation (make/remove) for dentry is authorized ++ * by @profile. ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_perm_dir(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask) ++{ ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.error_code = -EACCES; ++ ++ return aa_perm_dentry(profile, dentry, mnt, &sa, AA_CHECK_DIR); ++} ++ ++int aa_perm_path(struct aa_profile *profile, const char *operation, ++ const char *name, int mask, uid_t uid) ++{ ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.name = name; ++ if (current->fsuid == uid) ++ sa.request_mask = mask << AA_USER_SHIFT; ++ else ++ sa.request_mask = mask << AA_OTHER_SHIFT; ++ ++ sa.denied_mask = aa_file_denied(profile, name, sa.request_mask, ++ &sa.audit_mask) ; ++ sa.error_code = sa.denied_mask ? -EACCES : 0; ++ ++ return aa_audit_file(profile, &sa); ++} ++ ++/** ++ * aa_capability - test permission to use capability ++ * @cxt: aa_task_context with profile to check against ++ * @cap: capability to be tested ++ * ++ * Look up capability in profile capability set. ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_capability(struct aa_task_context *cxt, int cap) ++{ ++ int error = cap_raised(cxt->profile->capabilities, cap) ? 0 : -EPERM; ++ struct aa_audit sa; ++ ++ /* test if cap has alread been logged */ ++ if (cap_raised(cxt->caps_logged, cap)) { ++ if (PROFILE_COMPLAIN(cxt->profile)) ++ error = 0; ++ return error; ++ } else ++ /* don't worry about rcu replacement of the cxt here. ++ * caps_logged is a cache to reduce the occurence of ++ * duplicate messages in the log. The worst that can ++ * happen is duplicate capability messages shows up in ++ * the audit log ++ */ ++ cap_raise(cxt->caps_logged, cap); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "capable"; ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.name = capability_names[cap]; ++ sa.error_code = error; ++ ++ error = aa_audit_caps(cxt->profile, &sa, cap); ++ ++ return error; ++} ++ ++/* must be used inside rcu_read_lock or task_lock */ ++int aa_may_ptrace(struct aa_task_context *cxt, struct aa_profile *tracee) ++{ ++ if (!cxt || cxt->profile == tracee) ++ return 0; ++ return aa_capability(cxt, CAP_SYS_PTRACE); ++} ++ ++/** ++ * aa_link - hard link check ++ * @profile: profile to check against ++ * @link: dentry of link being created ++ * @link_mnt: vfsmount of link being created ++ * @target: dentry of link target ++ * @target_mnt: vfsmunt of link target ++ * ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_link(struct aa_profile *profile, ++ struct dentry *link, struct vfsmount *link_mnt, ++ struct dentry *target, struct vfsmount *target_mnt) ++{ ++ int error; ++ struct aa_audit sa; ++ char *buffer = NULL, *buffer2 = NULL; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "inode_link"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.name = aa_get_name(link, link_mnt, &buffer, 0); ++ sa.name2 = aa_get_name(target, target_mnt, &buffer2, 0); ++ ++ if (IS_ERR(sa.name)) { ++ sa.error_code = PTR_ERR(sa.name); ++ sa.name = NULL; ++ } ++ if (IS_ERR(sa.name2)) { ++ sa.error_code = PTR_ERR(sa.name2); ++ sa.name2 = NULL; ++ } ++ ++ if (sa.name && sa.name2) { ++ sa.denied_mask = aa_link_denied(profile, sa.name, sa.name2, ++ aa_inode_mode(target->d_inode), ++ &sa.request_mask, ++ &sa.audit_mask); ++ sa.error_code = sa.denied_mask ? -EACCES : 0; ++ } ++ ++ error = aa_audit_file(profile, &sa); ++ ++ aa_put_name_buffer(buffer); ++ aa_put_name_buffer(buffer2); ++ ++ return error; ++} ++ ++int aa_net_perm(struct aa_profile *profile, char *operation, ++ int family, int type, int protocol) ++{ ++ struct aa_audit sa; ++ int error = 0; ++ u16 family_mask, audit_mask, quiet_mask; ++ ++ if ((family < 0) || (family >= AF_MAX)) ++ return -EINVAL; ++ ++ if ((type < 0) || (type >= SOCK_MAX)) ++ return -EINVAL; ++ ++ /* unix domain and netlink sockets are handled by ipc */ ++ if (family == AF_UNIX || family == AF_NETLINK) ++ return 0; ++ ++ family_mask = profile->network_families[family]; ++ audit_mask = profile->audit_network[family]; ++ quiet_mask = profile->quiet_network[family]; ++ ++ error = (family_mask & (1 << type)) ? 0 : -EACCES; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.family = family; ++ sa.type = type; ++ sa.protocol = protocol; ++ sa.error_code = error; ++ ++ if (likely(!error)) { ++ if (!PROFILE_AUDIT(profile) && !(family_mask & audit_mask)) ++ return 0; ++ } else if (!((1 << type) & ~quiet_mask)) { ++ return error; ++ } ++ ++ error = aa_audit(profile, &sa); ++ ++ return error; ++} ++ ++int aa_revalidate_sk(struct sock *sk, char *operation) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ /* this is some debugging code to flush out the network hooks that ++ that are called in interrupt context */ ++ if (in_interrupt()) { ++ printk("AppArmor Debug: Hook being called from interrupt context\n"); ++ dump_stack(); ++ return 0; ++ } ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ error = aa_net_perm(profile, operation, ++ sk->sk_family, sk->sk_type, ++ sk->sk_protocol); ++ aa_put_profile(profile); ++ ++ return error; ++} ++/** ++ * aa_task_setrlimit - test permission to set an rlimit ++ * @profile - profile confining the task ++ * @resource - the resource being set ++ * @new_rlim - the new resource limit ++ * ++ * Control raising the processes hard limit. ++ */ ++int aa_task_setrlimit(struct aa_profile *profile, unsigned int resource, ++ struct rlimit *new_rlim) ++{ ++ struct aa_audit sa; ++ int error = 0; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "setrlimit"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.rlimit = resource + 1; ++ ++ if (profile->rlimits.mask & (1 << resource) && ++ new_rlim->rlim_max > profile->rlimits.limits[resource].rlim_max) { ++ sa.error_code = -EACCES; ++ ++ error = aa_audit(profile, &sa); ++ } ++ ++ return error; ++} ++ ++static int aa_rlimit_nproc(struct aa_profile *profile) { ++ if (profile && (profile->rlimits.mask & (1 << RLIMIT_NPROC)) && ++ profile->task_count >= profile->rlimits.limits[RLIMIT_NPROC].rlim_max) ++ return -EAGAIN; ++ return 0; ++} ++ ++void aa_set_rlimits(struct task_struct *task, struct aa_profile *profile) ++{ ++ int i, mask; ++ ++ if (!profile) ++ return; ++ ++ if (!profile->rlimits.mask) ++ return; ++ ++ task_lock(task->group_leader); ++ mask = 1; ++ for (i = 0; i < RLIM_NLIMITS; i++, mask <<= 1) { ++ struct rlimit new_rlim, *old_rlim; ++ ++ /* check to see if NPROC which is per profile and handled ++ * in clone/exec or whether this is a limit to be set ++ * can't set cpu limit either right now ++ */ ++ if (i == RLIMIT_NPROC || i == RLIMIT_CPU) ++ continue; ++ ++ old_rlim = task->signal->rlim + i; ++ new_rlim = *old_rlim; ++ ++ if (mask & profile->rlimits.mask && ++ profile->rlimits.limits[i].rlim_max < new_rlim.rlim_max) { ++ new_rlim.rlim_max = profile->rlimits.limits[i].rlim_max; ++ /* soft limit should not exceed hard limit */ ++ if (new_rlim.rlim_cur > new_rlim.rlim_max) ++ new_rlim.rlim_cur = new_rlim.rlim_max; ++ } ++ ++ *old_rlim = new_rlim; ++ } ++ task_unlock(task->group_leader); ++} ++ ++/******************************* ++ * Global task related functions ++ *******************************/ ++ ++/** ++ * aa_clone - initialize the task context for a new task ++ * @child: task that is being created ++ * ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_clone(struct task_struct *child) ++{ ++ struct aa_audit sa; ++ struct aa_task_context *cxt, *child_cxt; ++ struct aa_profile *profile; ++ ++ if (!aa_task_context(current)) ++ return 0; ++ child_cxt = aa_alloc_task_context(GFP_KERNEL); ++ if (!child_cxt) ++ return -ENOMEM; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "clone"; ++ sa.task = child->pid; ++ sa.gfp_mask = GFP_KERNEL; ++ ++repeat: ++ profile = aa_get_profile(current); ++ if (profile) { ++ lock_profile(profile); ++ cxt = aa_task_context(current); ++ if (unlikely(profile->isstale || !cxt || ++ cxt->profile != profile)) { ++ /** ++ * Race with profile replacement or removal, or with ++ * task context removal. ++ */ ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ goto repeat; ++ } ++ ++ if (aa_rlimit_nproc(profile)) { ++ sa.info = "rlimit nproc limit exceeded"; ++ unlock_profile(profile); ++ aa_audit_reject(profile, &sa); ++ aa_put_profile(profile); ++ return -EAGAIN; ++ } ++ ++ /* No need to grab the child's task lock here. */ ++ aa_change_task_context(child, child_cxt, profile, ++ cxt->cookie, cxt->previous_profile); ++ ++ unlock_profile(profile); ++ ++ if (APPARMOR_COMPLAIN(child_cxt) && ++ profile == profile->ns->null_complain_profile) { ++ aa_audit_hint(profile, &sa); ++ } ++ aa_put_profile(profile); ++ } else ++ aa_free_task_context(child_cxt); ++ ++ return 0; ++} ++ ++static struct aa_profile * ++aa_register_find(struct aa_profile *profile, const char* ns_name, ++ const char *name, int mandatory, int complain, ++ struct aa_audit *sa) ++{ ++ struct aa_namespace *ns; ++ struct aa_profile *new_profile; ++ int ns_ref = 0; ++ ++ if (profile) ++ ns = profile->ns; ++ else ++ ns = default_namespace; ++ ++ if (ns_name) { ++ /* locate the profile namespace */ ++ ns = aa_find_namespace(ns_name); ++ if (!ns) { ++ if (mandatory) { ++ sa->info = "profile namespace not found"; ++ sa->denied_mask = sa->request_mask; ++ sa->error_code = -ENOENT; ++ return ERR_PTR(-ENOENT); ++ } else { ++ return NULL; ++ } ++ } ++ ns_ref++; ++ } ++ ++ /* Locate new profile */ ++ new_profile = aa_find_profile(ns, name); ++ ++ if (new_profile) { ++ AA_DEBUG("%s: setting profile %s\n", ++ __FUNCTION__, new_profile->name); ++ } else if (mandatory && profile) { ++ sa->info = "mandatory profile missing"; ++ sa->denied_mask = sa->request_mask; /* shifted MAY_EXEC */ ++ if (complain) { ++ aa_audit_hint(profile, sa); ++ new_profile = ++ aa_dup_profile(profile->ns->null_complain_profile); ++ } else { ++ sa->error_code = -EACCES; ++ if (ns_ref) ++ aa_put_namespace(ns); ++ return ERR_PTR(-EACCES); ++ } ++ } else { ++ /* Only way we can get into this code is if task ++ * is unconfined, pix, nix. ++ */ ++ AA_DEBUG("%s: No profile found for exec image '%s'\n", ++ __FUNCTION__, ++ name); ++ } ++ if (ns_ref) ++ aa_put_namespace(ns); ++ return new_profile; ++} ++ ++static struct aa_profile * ++aa_x_to_profile(struct aa_profile *profile, const char *filename, int xmode, ++ struct aa_audit *sa, char **child) ++{ ++ struct aa_profile *new_profile = NULL; ++ int ix = xmode & AA_EXEC_INHERIT; ++ int complain = PROFILE_COMPLAIN(profile); ++ int index; ++ ++ *child = NULL; ++ switch (xmode & AA_EXEC_MODIFIERS) { ++ case 0: ++ /* only valid with ix flag */ ++ ix = 1; ++ break; ++ case AA_EXEC_UNCONFINED: ++ /* only valid without ix flag */ ++ ix = 0; ++ break; ++ case AA_EXEC_PROFILE: ++ new_profile = aa_register_find(profile, NULL, filename, !ix, ++ complain, sa); ++ break; ++ case AA_EXEC_CHILD: ++ *child = new_compound_name(profile->name, filename); ++ sa->name2 = *child; ++ if (!*child) { ++ sa->info = "Failed name resolution - exec failed"; ++ sa->error_code = -ENOMEM; ++ new_profile = ERR_PTR(-ENOMEM); ++ } else { ++ new_profile = aa_register_find(profile, NULL, *child, ++ !ix, complain, sa); ++ } ++ break; ++ default: ++ /* all other indexes are named transitions */ ++ index = AA_EXEC_INDEX(xmode); ++ if (index - 4 > profile->exec_table_size) { ++ sa->info = "invalid named transition - exec failed"; ++ sa->error_code = -EACCES; ++ new_profile = ERR_PTR(-EACCES); ++ } else { ++ char *ns_name = NULL; ++ char *name = profile->exec_table[index - 4]; ++ if (*name == ':') { ++ ns_name = name + 1; ++ name = ns_name + strlen(ns_name) + 1; ++ } ++ sa->name2 = name; ++ sa->name3 = ns_name; ++ new_profile = ++ aa_register_find(profile, ns_name, name, ++ !ix, complain, sa); ++ } ++ } ++ if (IS_ERR(new_profile)) ++ /* all these failures must be audited - no quieting */ ++ return ERR_PTR(aa_audit_reject(profile, sa)); ++ return new_profile; ++} ++ ++/** ++ * aa_register - register a new program ++ * @bprm: binprm of program being registered ++ * ++ * Try to register a new program during execve(). This should give the ++ * new program a valid aa_task_context if confined. ++ */ ++int aa_register(struct linux_binprm *bprm) ++{ ++ const char *filename; ++ char *buffer = NULL, *child = NULL; ++ struct file *filp = bprm->file; ++ struct aa_profile *profile, *old_profile, *new_profile = NULL; ++ int exec_mode, complain = 0, shift; ++ struct aa_audit sa; ++ ++ AA_DEBUG("%s\n", __FUNCTION__); ++ ++ profile = aa_get_profile(current); ++ ++ shift = aa_inode_mode(filp->f_dentry->d_inode); ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "exec"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = MAY_EXEC << shift; ++ ++ filename = aa_get_name(filp->f_dentry, filp->f_vfsmnt, &buffer, 0); ++ if (IS_ERR(filename)) { ++ if (profile) { ++ sa.info = "Failed name resolution - exec failed"; ++ sa.error_code = PTR_ERR(filename); ++ aa_audit_file(profile, &sa); ++ return sa.error_code; ++ } else ++ return 0; ++ } ++ sa.name = filename; ++ ++ exec_mode = AA_EXEC_UNSAFE << shift; ++ ++repeat: ++ if (profile) { ++ complain = PROFILE_COMPLAIN(profile); ++ ++ /* Confined task, determine what mode inherit, unconfined or ++ * mandatory to load new profile ++ */ ++ exec_mode = aa_match(profile->file_rules, filename, ++ &sa.audit_mask); ++ ++ ++ if (exec_mode & sa.request_mask) { ++ int xm = exec_mode >> shift; ++ new_profile = aa_x_to_profile(profile, filename, ++ xm, &sa, &child); ++ ++ if (!new_profile && (xm & AA_EXEC_INHERIT)) ++ /* (p|c|n|)ix - don't change profile */ ++ goto cleanup; ++ /* error case caught below */ ++ ++ } else if (sa.request_mask & AUDIT_QUIET_MASK(sa.audit_mask)) { ++ /* quiet failed exit */ ++ new_profile = ERR_PTR(-EACCES); ++ } else if (complain) { ++ /* There was no entry in calling profile ++ * describing mode to execute image in. ++ * Drop into null-profile (disabling secure exec). ++ */ ++ new_profile = ++ aa_dup_profile(profile->ns->null_complain_profile); ++ exec_mode |= AA_EXEC_UNSAFE << shift; ++ } else { ++ sa.denied_mask = sa.request_mask; ++ sa.error_code = -EACCES; ++ new_profile = ERR_PTR(aa_audit_file(profile, &sa)); ++ } ++ } else { ++ /* Unconfined task, load profile if it exists */ ++ new_profile = aa_register_find(NULL, NULL, filename, 0, 0, &sa); ++ if (new_profile == NULL) ++ goto cleanup; ++ } ++ ++ if (IS_ERR(new_profile)) ++ goto cleanup; ++ ++ old_profile = __aa_replace_profile(current, new_profile); ++ if (IS_ERR(old_profile)) { ++ aa_put_profile(new_profile); ++ aa_put_profile(profile); ++ if (PTR_ERR(old_profile) == -ESTALE) { ++ profile = aa_get_profile(current); ++ goto repeat; ++ } ++ if (PTR_ERR(old_profile) == -EPERM) { ++ sa.denied_mask = sa.request_mask; ++ sa.info = "unable to set profile due to ptrace"; ++ sa.task = current->parent->pid; ++ aa_audit_reject(profile, &sa); ++ } ++ if (PTR_ERR(old_profile) == -EAGAIN) { ++ sa.info = "rlimit nproc limit exceeded"; ++ aa_audit_reject(profile, &sa); ++ } ++ new_profile = old_profile; ++ goto cleanup; ++ } ++ aa_put_profile(old_profile); ++ aa_put_profile(profile); ++ ++ /* Handle confined exec. ++ * Can be at this point for the following reasons: ++ * 1. unconfined switching to confined ++ * 2. confined switching to different confinement ++ * 3. confined switching to unconfined ++ * ++ * Cases 2 and 3 are marked as requiring secure exec ++ * (unless policy specified "unsafe exec") ++ */ ++ if (!(exec_mode & (AA_EXEC_UNSAFE << shift))) { ++ unsigned long bprm_flags; ++ ++ bprm_flags = AA_SECURE_EXEC_NEEDED; ++ bprm->security = (void*) ++ ((unsigned long)bprm->security | bprm_flags); ++ } ++ ++ if (complain && new_profile && ++ new_profile == new_profile->ns->null_complain_profile) { ++ sa.request_mask = 0; ++ sa.name = NULL; ++ sa.info = "set profile"; ++ aa_audit_hint(new_profile, &sa); ++ } ++ ++cleanup: ++ aa_put_name_buffer(child); ++ aa_put_name_buffer(buffer); ++ if (IS_ERR(new_profile)) ++ return PTR_ERR(new_profile); ++ aa_put_profile(new_profile); ++ return 0; ++} ++ ++/** ++ * aa_release - release a task context ++ * @task: task being released ++ * ++ * This is called after a task has exited and the parent has reaped it. ++ */ ++void aa_release(struct task_struct *task) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile; ++ /* ++ * While the task context is still on a profile's task context ++ * list, another process could replace the profile under us, ++ * leaving us with a locked profile that is no longer attached ++ * to this task. So after locking the profile, we check that ++ * the profile is still attached. The profile lock is ++ * sufficient to prevent the replacement race so we do not lock ++ * the task. ++ * ++ * Use lock subtyping to avoid lockdep reporting a false irq ++ * possible inversion between the task_lock and profile_lock ++ * ++ * We also avoid taking the task_lock here because lock_dep ++ * would report another false {softirq-on-W} potential irq_lock ++ * inversion. ++ * ++ * If the task does not have a profile attached we are safe; ++ * nothing can race with us at this point. ++ */ ++ ++repeat: ++ profile = aa_get_profile(task); ++ if (profile) { ++ lock_profile_nested(profile, aa_lock_task_release); ++ cxt = aa_task_context(task); ++ if (unlikely(!cxt || cxt->profile != profile)) { ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ goto repeat; ++ } ++ aa_change_task_context(task, NULL, NULL, 0, NULL); ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ } ++} ++ ++static int do_change_profile(struct aa_profile *expected, ++ struct aa_namespace *ns, const char *name, ++ u64 cookie, int restore, int hat, ++ struct aa_audit *sa) ++{ ++ struct aa_profile *new_profile = NULL, *old_profile = NULL, ++ *previous_profile = NULL; ++ struct aa_task_context *new_cxt, *cxt; ++ int error = 0; ++ ++ sa->name = name; ++ ++ new_cxt = aa_alloc_task_context(GFP_KERNEL); ++ if (!new_cxt) ++ return -ENOMEM; ++ ++ new_profile = aa_find_profile(ns, name); ++ if (!new_profile && !restore) { ++ if (!PROFILE_COMPLAIN(expected)) { ++ aa_free_task_context(new_cxt); ++ return -ENOENT; ++ } ++ new_profile = aa_dup_profile(ns->null_complain_profile); ++ } else if (new_profile && hat && !PROFILE_IS_HAT(new_profile)) { ++ aa_free_task_context(new_cxt); ++ aa_put_profile(new_profile); ++ return error; ++ } ++ ++ cxt = lock_task_and_profiles(current, new_profile); ++ if (!cxt) { ++ error = -EPERM; ++ goto out; ++ } ++ old_profile = cxt->profile; ++ ++ if (cxt->profile != expected || (new_profile && new_profile->isstale)) { ++ error = -ESTALE; ++ goto out; ++ } ++ ++ if (cxt->previous_profile) { ++ if (cxt->cookie != cookie) { ++ error = -EACCES; ++ sa->info = "killing process"; ++ aa_audit_reject(cxt->profile, sa); ++ /* terminate process */ ++ (void)send_sig_info(SIGKILL, NULL, current); ++ goto out; ++ } ++ ++ if (!restore) ++ previous_profile = cxt->previous_profile; ++ } else ++ previous_profile = cxt->profile; ++ ++ if ((current->ptrace & PT_PTRACED) && aa_may_ptrace(cxt, new_profile)) { ++ error = -EACCES; ++ goto out; ++ } ++ ++ if ((error = aa_rlimit_nproc(new_profile))) { ++ sa->info = "rlimit nproc limit exceeded"; ++ aa_audit_reject(cxt->profile, sa); ++ goto out; ++ } ++ ++ if (new_profile == ns->null_complain_profile) ++ aa_audit_hint(cxt->profile, sa); ++ ++ if (APPARMOR_AUDIT(cxt)) ++ aa_audit_message(cxt->profile, sa, AUDIT_APPARMOR_AUDIT); ++ ++ if (!restore && cookie) ++ aa_change_task_context(current, new_cxt, new_profile, cookie, ++ previous_profile); ++ else ++ /* either return to previous_profile, or a permanent change */ ++ aa_change_task_context(current, new_cxt, new_profile, 0, NULL); ++ ++out: ++ if (aa_task_context(current) != new_cxt) ++ aa_free_task_context(new_cxt); ++ task_unlock(current); ++ unlock_both_profiles(old_profile, new_profile); ++ aa_put_profile(new_profile); ++ return error; ++} ++ ++/** ++ * aa_change_profile - perform a one-way profile transition ++ * @ns_name: name of the profile namespace to change to ++ * @name: name of profile to change to ++ * Change to new profile @name. Unlike with hats, there is no way ++ * to change back. ++ * ++ * Returns %0 on success, error otherwise. ++ */ ++int aa_change_profile(const char *ns_name, const char *name) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile = NULL; ++ struct aa_namespace *ns = NULL; ++ struct aa_audit sa; ++ unsigned int state; ++ int error = -EINVAL; ++ ++ if (!name) ++ return -EINVAL; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.operation = "change_profile"; ++ ++repeat: ++ task_lock(current); ++ cxt = aa_task_context(current); ++ if (cxt) ++ profile = aa_dup_profile(cxt->profile); ++ task_unlock(current); ++ ++ if (ns_name) ++ ns = aa_find_namespace(ns_name); ++ else if (profile) ++ ns = aa_get_namespace(profile->ns); ++ else ++ ns = aa_get_namespace(default_namespace); ++ ++ if (!ns) { ++ aa_put_profile(profile); ++ return -ENOENT; ++ } ++ ++ if (!profile || PROFILE_COMPLAIN(profile) || ++ (ns == profile->ns && ++ (aa_match(profile->file_rules, name, NULL) & AA_CHANGE_PROFILE))) ++ error = do_change_profile(profile, ns, name, 0, 0, 0, &sa); ++ else { ++ /* check for a rule with a namespace prepended */ ++ aa_match_state(profile->file_rules, DFA_START, ns->name, ++ &state); ++ state = aa_dfa_null_transition(profile->file_rules, state); ++ if ((aa_match_state(profile->file_rules, state, name, NULL) & ++ AA_CHANGE_PROFILE)) ++ error = do_change_profile(profile, ns, name, 0, 0, 0, ++ &sa); ++ else ++ /* no permission to transition to profile @name */ ++ error = -EACCES; ++ } ++ ++ aa_put_namespace(ns); ++ aa_put_profile(profile); ++ if (error == -ESTALE) ++ goto repeat; ++ ++ return error; ++} ++ ++/** ++ * aa_change_hat - change hat to/from subprofile ++ * @hat_name: hat to change to ++ * @cookie: magic value to validate the hat change ++ * ++ * Change to new @hat_name, and store the @hat_magic in the current task ++ * context. If the new @hat_name is %NULL and the @cookie matches that ++ * stored in the current task context and is not 0, return to the top level ++ * profile. ++ * Returns %0 on success, error otherwise. ++ */ ++int aa_change_hat(const char *hat_name, u64 cookie) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile, *previous_profile; ++ struct aa_audit sa; ++ int error = 0; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.operation = "change_hat"; ++ ++repeat: ++ task_lock(current); ++ cxt = aa_task_context(current); ++ if (!cxt) { ++ task_unlock(current); ++ return -EPERM; ++ } ++ profile = aa_dup_profile(cxt->profile); ++ previous_profile = aa_dup_profile(cxt->previous_profile); ++ task_unlock(current); ++ ++ if (hat_name) { ++ char *name, *profile_name; ++ ++ if (previous_profile) ++ profile_name = previous_profile->name; ++ else ++ profile_name = profile->name; ++ ++ name = new_compound_name(profile_name, hat_name); ++ if (!name) { ++ error = -ENOMEM; ++ goto out; ++ } ++ error = do_change_profile(profile, profile->ns, name, cookie, ++ 0, 1, &sa); ++ aa_put_name_buffer(name); ++ } else if (previous_profile) ++ error = do_change_profile(profile, profile->ns, ++ previous_profile->name, cookie, 1, 0, ++ &sa); ++ /* else ignore restores when there is no saved profile */ ++ ++out: ++ aa_put_profile(previous_profile); ++ aa_put_profile(profile); ++ if (error == -ESTALE) ++ goto repeat; ++ ++ return error; ++} ++ ++/** ++ * __aa_replace_profile - replace a task's profile ++ * @task: task to switch the profile of ++ * @profile: profile to switch to ++ * ++ * Returns a handle to the previous profile upon success, or else an ++ * error code. ++ */ ++struct aa_profile *__aa_replace_profile(struct task_struct *task, ++ struct aa_profile *profile) ++{ ++ struct aa_task_context *cxt, *new_cxt = NULL; ++ struct aa_profile *old_profile = NULL; ++ ++ if (profile) { ++ new_cxt = aa_alloc_task_context(GFP_KERNEL); ++ if (!new_cxt) ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ cxt = lock_task_and_profiles(task, profile); ++ if (unlikely(profile && profile->isstale)) { ++ old_profile = ERR_PTR(-ESTALE); ++ goto error; ++ } ++ ++ if ((current->ptrace & PT_PTRACED) && aa_may_ptrace(cxt, profile)) { ++ old_profile = ERR_PTR(-EPERM); ++ goto error; ++ } ++ ++ if (aa_rlimit_nproc(profile)) { ++ old_profile = ERR_PTR(-EAGAIN); ++ goto error; ++ } ++ ++ if (cxt) ++ old_profile = aa_dup_profile(cxt->profile); ++ aa_change_task_context(task, new_cxt, profile, 0, NULL); ++ ++ task_unlock(task); ++ aa_set_rlimits(task, profile); ++ unlock_both_profiles(profile, old_profile); ++ return old_profile; ++ ++error: ++ task_unlock(task); ++ unlock_both_profiles(profile, cxt ? cxt->profile : NULL); ++ aa_free_task_context(new_cxt); ++ return old_profile; ++} ++ ++/** ++ * lock_task_and_profiles - lock the task and confining profiles and @profile ++ * @task: task to lock ++ * @profile: extra profile to lock in addition to the current profile ++ * ++ * Handle the spinning on locking to make sure the task context and ++ * profile are consistent once all locks are aquired. ++ * ++ * return the aa_task_context currently confining the task. The task lock ++ * will be held whether or not the task is confined. ++ */ ++struct aa_task_context * ++lock_task_and_profiles(struct task_struct *task, struct aa_profile *profile) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *old_profile = NULL; ++ ++ rcu_read_lock(); ++repeat: ++ cxt = aa_task_context(task); ++ if (cxt) ++ old_profile = cxt->profile; ++ ++ lock_both_profiles(profile, old_profile); ++ task_lock(task); ++ ++ /* check for race with profile transition, replacement or removal */ ++ if (unlikely(cxt != aa_task_context(task))) { ++ task_unlock(task); ++ unlock_both_profiles(profile, old_profile); ++ old_profile = NULL; ++ goto repeat; ++ } ++ rcu_read_unlock(); ++ return cxt; ++} ++ ++static void free_aa_task_context_rcu_callback(struct rcu_head *head) ++{ ++ struct aa_task_context *cxt; ++ ++ cxt = container_of(head, struct aa_task_context, rcu); ++ aa_free_task_context(cxt); ++} ++ ++/** ++ * aa_change_task_context - switch a task to use a new context and profile ++ * @task: task that is having its task context changed ++ * @new_cxt: new task context to use after the switch ++ * @profile: new profile to use after the switch ++ * @cookie: magic value to switch to ++ * @previous_profile: profile the task can return to ++ */ ++void aa_change_task_context(struct task_struct *task, ++ struct aa_task_context *new_cxt, ++ struct aa_profile *profile, u64 cookie, ++ struct aa_profile *previous_profile) ++{ ++ struct aa_task_context *old_cxt = aa_task_context(task); ++ ++ if (old_cxt) { ++ list_del_init(&old_cxt->list); ++ old_cxt->profile->task_count--; ++ call_rcu(&old_cxt->rcu, free_aa_task_context_rcu_callback); ++ } ++ if (new_cxt) { ++ /* set the caps_logged cache to the quiet_caps mask ++ * this has the effect of quieting caps that are not ++ * supposed to be logged ++ */ ++ new_cxt->caps_logged = profile->quiet_caps; ++ new_cxt->cookie = cookie; ++ new_cxt->task = task; ++ new_cxt->profile = aa_dup_profile(profile); ++ profile->task_count++; ++ new_cxt->previous_profile = aa_dup_profile(previous_profile); ++ list_move(&new_cxt->list, &profile->task_contexts); ++ } ++ rcu_assign_pointer(task->security, new_cxt); ++} +diff -uprN e/security/apparmor/match.c f/security/apparmor/match.c +--- e/security/apparmor/match.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/match.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,364 @@ ++/* ++ * Copyright (C) 2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * Regular expression transition table matching ++ */ ++ ++#include ++#include ++#include ++#include "apparmor.h" ++#include "match.h" ++#include "inline.h" ++ ++static struct table_header *unpack_table(void *blob, size_t bsize) ++{ ++ struct table_header *table = NULL; ++ struct table_header th; ++ size_t tsize; ++ ++ if (bsize < sizeof(struct table_header)) ++ goto out; ++ ++ th.td_id = be16_to_cpu(*(u16 *) (blob)); ++ th.td_flags = be16_to_cpu(*(u16 *) (blob + 2)); ++ th.td_lolen = be32_to_cpu(*(u32 *) (blob + 8)); ++ blob += sizeof(struct table_header); ++ ++ if (!(th.td_flags == YYTD_DATA16 || th.td_flags == YYTD_DATA32 || ++ th.td_flags == YYTD_DATA8)) ++ goto out; ++ ++ tsize = table_size(th.td_lolen, th.td_flags); ++ if (bsize < tsize) ++ goto out; ++ ++ table = kmalloc(tsize, GFP_KERNEL); ++ if (table) { ++ *table = th; ++ if (th.td_flags == YYTD_DATA8) ++ UNPACK_ARRAY(table->td_data, blob, th.td_lolen, ++ u8, byte_to_byte); ++ else if (th.td_flags == YYTD_DATA16) ++ UNPACK_ARRAY(table->td_data, blob, th.td_lolen, ++ u16, be16_to_cpu); ++ else ++ UNPACK_ARRAY(table->td_data, blob, th.td_lolen, ++ u32, be32_to_cpu); ++ } ++ ++out: ++ return table; ++} ++ ++int unpack_dfa(struct aa_dfa *dfa, void *blob, size_t size) ++{ ++ int hsize, i; ++ int error = -ENOMEM; ++ ++ /* get dfa table set header */ ++ if (size < sizeof(struct table_set_header)) ++ goto fail; ++ ++ if (ntohl(*(u32 *)blob) != YYTH_MAGIC) ++ goto fail; ++ ++ hsize = ntohl(*(u32 *)(blob + 4)); ++ if (size < hsize) ++ goto fail; ++ ++ blob += hsize; ++ size -= hsize; ++ ++ error = -EPROTO; ++ while (size > 0) { ++ struct table_header *table; ++ table = unpack_table(blob, size); ++ if (!table) ++ goto fail; ++ ++ switch(table->td_id) { ++ case YYTD_ID_ACCEPT: ++ case YYTD_ID_ACCEPT2: ++ case YYTD_ID_BASE: ++ dfa->tables[table->td_id - 1] = table; ++ if (table->td_flags != YYTD_DATA32) ++ goto fail; ++ break; ++ case YYTD_ID_DEF: ++ case YYTD_ID_NXT: ++ case YYTD_ID_CHK: ++ dfa->tables[table->td_id - 1] = table; ++ if (table->td_flags != YYTD_DATA16) ++ goto fail; ++ break; ++ case YYTD_ID_EC: ++ dfa->tables[table->td_id - 1] = table; ++ if (table->td_flags != YYTD_DATA8) ++ goto fail; ++ break; ++ default: ++ kfree(table); ++ goto fail; ++ } ++ ++ blob += table_size(table->td_lolen, table->td_flags); ++ size -= table_size(table->td_lolen, table->td_flags); ++ } ++ ++ return 0; ++ ++fail: ++ for (i = 0; i < ARRAY_SIZE(dfa->tables); i++) { ++ if (dfa->tables[i]) { ++ kfree(dfa->tables[i]); ++ dfa->tables[i] = NULL; ++ } ++ } ++ return error; ++} ++ ++/** ++ * verify_dfa - verify that all the transitions and states in the dfa tables ++ * are in bounds. ++ * @dfa: dfa to test ++ * ++ * assumes dfa has gone through the verification done by unpacking ++ */ ++int verify_dfa(struct aa_dfa *dfa) ++{ ++ size_t i, state_count, trans_count; ++ int error = -EPROTO; ++ ++ /* check that required tables exist */ ++ if (!(dfa->tables[YYTD_ID_ACCEPT - 1] && ++ dfa->tables[YYTD_ID_ACCEPT2 - 1] && ++ dfa->tables[YYTD_ID_DEF - 1] && ++ dfa->tables[YYTD_ID_BASE - 1] && ++ dfa->tables[YYTD_ID_NXT - 1] && ++ dfa->tables[YYTD_ID_CHK - 1])) ++ goto out; ++ ++ /* accept.size == default.size == base.size */ ++ state_count = dfa->tables[YYTD_ID_BASE - 1]->td_lolen; ++ if (!(state_count == dfa->tables[YYTD_ID_DEF - 1]->td_lolen && ++ state_count == dfa->tables[YYTD_ID_ACCEPT - 1]->td_lolen && ++ state_count == dfa->tables[YYTD_ID_ACCEPT2 - 1]->td_lolen)) ++ goto out; ++ ++ /* next.size == chk.size */ ++ trans_count = dfa->tables[YYTD_ID_NXT - 1]->td_lolen; ++ if (trans_count != dfa->tables[YYTD_ID_CHK - 1]->td_lolen) ++ goto out; ++ ++ /* if equivalence classes then its table size must be 256 */ ++ if (dfa->tables[YYTD_ID_EC - 1] && ++ dfa->tables[YYTD_ID_EC - 1]->td_lolen != 256) ++ goto out; ++ ++ for (i = 0; i < state_count; i++) { ++ if (DEFAULT_TABLE(dfa)[i] >= state_count) ++ goto out; ++ if (BASE_TABLE(dfa)[i] >= trans_count + 256) ++ goto out; ++ } ++ ++ for (i = 0; i < trans_count ; i++) { ++ if (NEXT_TABLE(dfa)[i] >= state_count) ++ goto out; ++ if (CHECK_TABLE(dfa)[i] >= state_count) ++ goto out; ++ } ++ ++ /* verify accept permissions */ ++ for (i = 0; i < state_count; i++) { ++ int mode = ACCEPT_TABLE(dfa)[i]; ++ ++ if (mode & ~AA_VALID_PERM_MASK) ++ goto out; ++ if (ACCEPT_TABLE2(dfa)[i] & ~AA_VALID_PERM2_MASK) ++ goto out; ++ ++ /* if any exec modifier is set MAY_EXEC must be set */ ++ if ((mode & AA_USER_EXEC_TYPE) && !(mode & AA_USER_EXEC)) ++ goto out; ++ if ((mode & AA_OTHER_EXEC_TYPE) && !(mode & AA_OTHER_EXEC)) ++ goto out; ++ } ++ ++ error = 0; ++out: ++ return error; ++} ++ ++struct aa_dfa *aa_match_alloc(void) ++{ ++ return kzalloc(sizeof(struct aa_dfa), GFP_KERNEL); ++} ++ ++void aa_match_free(struct aa_dfa *dfa) ++{ ++ if (dfa) { ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(dfa->tables); i++) ++ kfree(dfa->tables[i]); ++ } ++ kfree(dfa); ++} ++ ++/** ++ * aa_dfa_next_state_len - traverse @dfa to find state @str stops at ++ * @dfa: the dfa to match @str against ++ * @start: the state of the dfa to start matching in ++ * @str: the string of bytes to match against the dfa ++ * @len: length of the string of bytes to match ++ * ++ * aa_dfa_next_state will match @str against the dfa and return the state it ++ * finished matching in. The final state can be used to look up the accepting ++ * label, or as the start state of a continuing match. ++ * ++ * aa_dfa_next_state could be implement using this function by doing ++ * return aa_dfa_next_state_len(dfa, start, str, strlen(str)); ++ * but that would require traversing the string twice and be slightly ++ * slower. ++ */ ++unsigned int aa_dfa_next_state_len(struct aa_dfa *dfa, unsigned int start, ++ const char *str, int len) ++{ ++ u16 *def = DEFAULT_TABLE(dfa); ++ u32 *base = BASE_TABLE(dfa); ++ u16 *next = NEXT_TABLE(dfa); ++ u16 *check = CHECK_TABLE(dfa); ++ unsigned int state = start, pos; ++ ++ if (state == 0) ++ return 0; ++ ++ /* current state is , matching character *str */ ++ if (dfa->tables[YYTD_ID_EC - 1]) { ++ u8 *equiv = EQUIV_TABLE(dfa); ++ for (; len; len--) { ++ pos = base[state] + equiv[(u8)*str++]; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } else { ++ for (; len; len--) { ++ pos = base[state] + (u8)*str++; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } ++ return state; ++} ++ ++/** ++ * aa_dfa_next_state - traverse @dfa to find state @str stops at ++ * @dfa: the dfa to match @str against ++ * @start: the state of the dfa to start matching in ++ * @str: the null terminated string of bytes to match against the dfa ++ * ++ * aa_dfa_next_state will match @str against the dfa and return the state it ++ * finished matching in. The final state can be used to look up the accepting ++ * label, or as the start state of a continuing match. ++ */ ++unsigned int aa_dfa_next_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str) ++{ ++ u16 *def = DEFAULT_TABLE(dfa); ++ u32 *base = BASE_TABLE(dfa); ++ u16 *next = NEXT_TABLE(dfa); ++ u16 *check = CHECK_TABLE(dfa); ++ unsigned int state = start, pos; ++ ++ if (state == 0) ++ return 0; ++ ++ /* current state is , matching character *str */ ++ if (dfa->tables[YYTD_ID_EC - 1]) { ++ u8 *equiv = EQUIV_TABLE(dfa); ++ while (*str) { ++ pos = base[state] + equiv[(u8)*str++]; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } else { ++ while (*str) { ++ pos = base[state] + (u8)*str++; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } ++ return state; ++} ++ ++/** ++ * aa_dfa_null_transition - step to next state after null character ++ * @dfa: the dfa to match against ++ * @start: the state of the dfa to start matching in ++ * ++ * aa_dfa_null_transition transitions to the next state after a null ++ * character which is not used in standard matching and is only ++ * used to seperate pairs. ++ */ ++unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, unsigned int start) ++{ ++ return aa_dfa_next_state_len(dfa, start, "", 1); ++} ++ ++/** ++ * aa_dfa_match - find accept perm for @str in @dfa ++ * @dfa: the dfa to match @str against ++ * @str: the string to match against the dfa ++ * @audit_mask: the audit_mask for the final state ++ * ++ * aa_dfa_match will match @str and return the accept perms for the ++ * final state. ++ */ ++unsigned int aa_dfa_match(struct aa_dfa *dfa, const char *str, int *audit_mask) ++{ ++ int state = aa_dfa_next_state(dfa, DFA_START, str); ++ if (audit_mask) ++ *audit_mask = dfa_audit_mask(dfa, state); ++ return ACCEPT_TABLE(dfa)[state]; ++} ++ ++/** ++ * aa_match_state - find accept perm and state for @str in @dfa ++ * @dfa: the dfa to match @str against ++ * @start: the state to start the match from ++ * @str: the string to match against the dfa ++ * @final: the state that the match finished in ++ * ++ * aa_match_state will match @str and return the accept perms, and @final ++ * state, the match occured in. ++ */ ++unsigned int aa_match_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str, unsigned int *final) ++{ ++ unsigned int state; ++ if (dfa) { ++ state = aa_dfa_next_state(dfa, start, str); ++ if (final) ++ *final = state; ++ return ACCEPT_TABLE(dfa)[state]; ++ } ++ if (final) ++ *final = 0; ++ return 0; ++} ++ +diff -uprN e/security/apparmor/match.h f/security/apparmor/match.h +--- e/security/apparmor/match.h 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/match.h 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (C) 2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor submodule (match) prototypes ++ */ ++ ++#ifndef __MATCH_H ++#define __MATCH_H ++ ++#define DFA_START 1 ++ ++/** ++ * The format used for transition tables is based on the GNU flex table ++ * file format (--tables-file option; see Table File Format in the flex ++ * info pages and the flex sources for documentation). The magic number ++ * used in the header is 0x1B5E783D insted of 0xF13C57B1 though, because ++ * the YY_ID_CHK (check) and YY_ID_DEF (default) tables are used ++ * slightly differently (see the apparmor-parser package). ++ */ ++ ++#define YYTH_MAGIC 0x1B5E783D ++ ++struct table_set_header { ++ u32 th_magic; /* YYTH_MAGIC */ ++ u32 th_hsize; ++ u32 th_ssize; ++ u16 th_flags; ++ char th_version[]; ++}; ++ ++#define YYTD_ID_ACCEPT 1 ++#define YYTD_ID_BASE 2 ++#define YYTD_ID_CHK 3 ++#define YYTD_ID_DEF 4 ++#define YYTD_ID_EC 5 ++#define YYTD_ID_META 6 ++#define YYTD_ID_ACCEPT2 7 ++#define YYTD_ID_NXT 8 ++ ++ ++#define YYTD_DATA8 1 ++#define YYTD_DATA16 2 ++#define YYTD_DATA32 4 ++ ++struct table_header { ++ u16 td_id; ++ u16 td_flags; ++ u32 td_hilen; ++ u32 td_lolen; ++ char td_data[]; ++}; ++ ++#define DEFAULT_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_DEF - 1]->td_data)) ++#define BASE_TABLE(DFA) ((u32 *)((DFA)->tables[YYTD_ID_BASE - 1]->td_data)) ++#define NEXT_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_NXT - 1]->td_data)) ++#define CHECK_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_CHK - 1]->td_data)) ++#define EQUIV_TABLE(DFA) ((u8 *)((DFA)->tables[YYTD_ID_EC - 1]->td_data)) ++#define ACCEPT_TABLE(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT - 1]->td_data)) ++#define ACCEPT_TABLE2(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT2 -1]->td_data)) ++ ++struct aa_dfa { ++ struct table_header *tables[YYTD_ID_NXT]; ++}; ++ ++#define byte_to_byte(X) (X) ++ ++#define UNPACK_ARRAY(TABLE, BLOB, LEN, TYPE, NTOHX) \ ++ do { \ ++ typeof(LEN) __i; \ ++ TYPE *__t = (TYPE *) TABLE; \ ++ TYPE *__b = (TYPE *) BLOB; \ ++ for (__i = 0; __i < LEN; __i++) { \ ++ __t[__i] = NTOHX(__b[__i]); \ ++ } \ ++ } while (0) ++ ++static inline size_t table_size(size_t len, size_t el_size) ++{ ++ return ALIGN(sizeof(struct table_header) + len * el_size, 8); ++} ++ ++#endif /* __MATCH_H */ +diff -uprN e/security/apparmor/module_interface.c f/security/apparmor/module_interface.c +--- e/security/apparmor/module_interface.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/module_interface.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,966 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor userspace policy interface ++ */ ++ ++#include ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++/* ++ * This mutex is used to synchronize profile adds, replacements, and ++ * removals: we only allow one of these operations at a time. ++ * We do not use the profile list lock here in order to avoid blocking ++ * exec during those operations. (Exec involves a profile list lookup ++ * for named-profile transitions.) ++ */ ++DEFINE_MUTEX(aa_interface_lock); ++ ++/* ++ * The AppArmor interface treats data as a type byte followed by the ++ * actual data. The interface has the notion of a a named entry ++ * which has a name (AA_NAME typecode followed by name string) followed by ++ * the entries typecode and data. Named types allow for optional ++ * elements and extensions to be added and tested for without breaking ++ * backwards compatability. ++ */ ++ ++enum aa_code { ++ AA_U8, ++ AA_U16, ++ AA_U32, ++ AA_U64, ++ AA_NAME, /* same as string except it is items name */ ++ AA_STRING, ++ AA_BLOB, ++ AA_STRUCT, ++ AA_STRUCTEND, ++ AA_LIST, ++ AA_LISTEND, ++ AA_ARRAY, ++ AA_ARRAYEND, ++}; ++ ++/* ++ * aa_ext is the read of the buffer containing the serialized profile. The ++ * data is copied into a kernel buffer in apparmorfs and then handed off to ++ * the unpack routines. ++ */ ++struct aa_ext { ++ void *start; ++ void *end; ++ void *pos; /* pointer to current position in the buffer */ ++ u32 version; ++ char *ns_name; ++}; ++ ++static inline int aa_inbounds(struct aa_ext *e, size_t size) ++{ ++ return (size <= e->end - e->pos); ++} ++ ++/** ++ * aa_u16_chunck - test and do bounds checking for a u16 size based chunk ++ * @e: serialized data read head ++ * @chunk: start address for chunk of data ++ * ++ * return the size of chunk found with the read head at the end of ++ * the chunk. ++ */ ++static size_t aa_is_u16_chunk(struct aa_ext *e, char **chunk) ++{ ++ void *pos = e->pos; ++ size_t size = 0; ++ ++ if (!aa_inbounds(e, sizeof(u16))) ++ goto fail; ++ size = le16_to_cpu(get_unaligned((u16 *)e->pos)); ++ e->pos += sizeof(u16); ++ if (!aa_inbounds(e, size)) ++ goto fail; ++ *chunk = e->pos; ++ e->pos += size; ++ return size; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static inline int aa_is_X(struct aa_ext *e, enum aa_code code) ++{ ++ if (!aa_inbounds(e, 1)) ++ return 0; ++ if (*(u8 *) e->pos != code) ++ return 0; ++ e->pos++; ++ return 1; ++} ++ ++/** ++ * aa_is_nameX - check is the next element is of type X with a name of @name ++ * @e: serialized data extent information ++ * @code: type code ++ * @name: name to match to the serialized element. ++ * ++ * check that the next serialized data element is of type X and has a tag ++ * name @name. If @name is specified then there must be a matching ++ * name element in the stream. If @name is NULL any name element will be ++ * skipped and only the typecode will be tested. ++ * returns 1 on success (both type code and name tests match) and the read ++ * head is advanced past the headers ++ * returns %0 if either match failes, the read head does not move ++ */ ++static int aa_is_nameX(struct aa_ext *e, enum aa_code code, const char *name) ++{ ++ void *pos = e->pos; ++ /* ++ * Check for presence of a tagname, and if present name size ++ * AA_NAME tag value is a u16. ++ */ ++ if (aa_is_X(e, AA_NAME)) { ++ char *tag; ++ size_t size = aa_is_u16_chunk(e, &tag); ++ /* if a name is specified it must match. otherwise skip tag */ ++ if (name && (!size || strcmp(name, tag))) ++ goto fail; ++ } else if (name) { ++ /* if a name is specified and there is no name tag fail */ ++ goto fail; ++ } ++ ++ /* now check if type code matches */ ++ if (aa_is_X(e, code)) ++ return 1; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_u16(struct aa_ext *e, u16 *data, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_U16, name)) { ++ if (!aa_inbounds(e, sizeof(u16))) ++ goto fail; ++ if (data) ++ *data = le16_to_cpu(get_unaligned((u16 *)e->pos)); ++ e->pos += sizeof(u16); ++ return 1; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_u32(struct aa_ext *e, u32 *data, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_U32, name)) { ++ if (!aa_inbounds(e, sizeof(u32))) ++ goto fail; ++ if (data) ++ *data = le32_to_cpu(get_unaligned((u32 *)e->pos)); ++ e->pos += sizeof(u32); ++ return 1; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_u64(struct aa_ext *e, u64 *data, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_U64, name)) { ++ if (!aa_inbounds(e, sizeof(u64))) ++ goto fail; ++ if (data) ++ *data = le64_to_cpu(get_unaligned((u64 *)e->pos)); ++ e->pos += sizeof(u64); ++ return 1; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static size_t aa_is_array(struct aa_ext *e, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_ARRAY, name)) { ++ int size; ++ if (!aa_inbounds(e, sizeof(u16))) ++ goto fail; ++ size = (int) le16_to_cpu(get_unaligned((u16 *)e->pos)); ++ e->pos += sizeof(u16); ++ return size; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static size_t aa_is_blob(struct aa_ext *e, char **blob, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_BLOB, name)) { ++ u32 size; ++ if (!aa_inbounds(e, sizeof(u32))) ++ goto fail; ++ size = le32_to_cpu(get_unaligned((u32 *)e->pos)); ++ e->pos += sizeof(u32); ++ if (aa_inbounds(e, (size_t) size)) { ++ * blob = e->pos; ++ e->pos += size; ++ return size; ++ } ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_dynstring(struct aa_ext *e, char **string, const char *name) ++{ ++ char *src_str; ++ size_t size = 0; ++ void *pos = e->pos; ++ *string = NULL; ++ if (aa_is_nameX(e, AA_STRING, name) && ++ (size = aa_is_u16_chunk(e, &src_str))) { ++ char *str; ++ if (!(str = kmalloc(size, GFP_KERNEL))) ++ goto fail; ++ memcpy(str, src_str, size); ++ *string = str; ++ } ++ ++ return size; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++/** ++ * aa_unpack_dfa - unpack a file rule dfa ++ * @e: serialized data extent information ++ * ++ * returns dfa or ERR_PTR ++ */ ++static struct aa_dfa *aa_unpack_dfa(struct aa_ext *e) ++{ ++ char *blob = NULL; ++ size_t size, error = 0; ++ struct aa_dfa *dfa = NULL; ++ ++ size = aa_is_blob(e, &blob, "aadfa"); ++ if (size) { ++ dfa = aa_match_alloc(); ++ if (dfa) { ++ /* ++ * The dfa is aligned with in the blob to 8 bytes ++ * from the beginning of the stream. ++ */ ++ size_t sz = blob - (char *) e->start; ++ size_t pad = ALIGN(sz, 8) - sz; ++ error = unpack_dfa(dfa, blob + pad, size - pad); ++ if (!error) ++ error = verify_dfa(dfa); ++ } else { ++ error = -ENOMEM; ++ } ++ ++ if (error) { ++ aa_match_free(dfa); ++ dfa = ERR_PTR(error); ++ } ++ } ++ ++ return dfa; ++} ++ ++static int aa_unpack_exec_table(struct aa_ext *e, struct aa_profile *profile) ++{ ++ void *pos = e->pos; ++ ++ /* exec table is optional */ ++ if (aa_is_nameX(e, AA_STRUCT, "xtable")) { ++ int i, size; ++ ++ size = aa_is_array(e, NULL); ++ /* currently 4 exec bits and entries 0-3 are reserved iupcx */ ++ if (size > 16 - 4) ++ goto fail; ++ profile->exec_table = kzalloc(sizeof(char *) * size, ++ GFP_KERNEL); ++ if (!profile->exec_table) ++ goto fail; ++ ++ for (i = 0; i < size; i++) { ++ char *tmp; ++ if (!aa_is_dynstring(e, &tmp, NULL)) ++ goto fail; ++ /* note: strings beginning with a : have an embedded ++ \0 seperating the profile ns name from the profile ++ name */ ++ profile->exec_table[i] = tmp; ++ } ++ if (!aa_is_nameX(e, AA_ARRAYEND, NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ } ++ return 1; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++int aa_unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) ++{ ++ void *pos = e->pos; ++ ++ /* rlimits are optional */ ++ if (aa_is_nameX(e, AA_STRUCT, "rlimits")) { ++ int i, size; ++ u32 tmp = 0; ++ if (!aa_is_u32(e, &tmp, NULL)) ++ goto fail; ++ profile->rlimits.mask = tmp; ++ ++ size = aa_is_array(e, NULL); ++ if (size > RLIM_NLIMITS) ++ goto fail; ++ for (i = 0; i < size; i++) { ++ u64 tmp = 0; ++ if (!aa_is_u64(e, &tmp, NULL)) ++ goto fail; ++ profile->rlimits.limits[i].rlim_max = tmp; ++ } ++ if (!aa_is_nameX(e, AA_ARRAYEND, NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ } ++ return 1; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++/** ++ * aa_unpack_profile - unpack a serialized profile ++ * @e: serialized data extent information ++ * @sa: audit struct for the operation ++ */ ++static struct aa_profile *aa_unpack_profile(struct aa_ext *e, ++ struct aa_audit *sa) ++{ ++ struct aa_profile *profile = NULL; ++ size_t size = 0; ++ int i, error = -EPROTO; ++ ++ profile = alloc_aa_profile(); ++ if (!profile) ++ return ERR_PTR(-ENOMEM); ++ ++ /* check that we have the right struct being passed */ ++ if (!aa_is_nameX(e, AA_STRUCT, "profile")) ++ goto fail; ++ if (!aa_is_dynstring(e, &profile->name, NULL)) ++ goto fail; ++ ++ /* per profile debug flags (complain, audit) */ ++ if (!aa_is_nameX(e, AA_STRUCT, "flags")) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->flags.hat), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->flags.complain), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->flags.audit), NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ ++ if (!aa_is_u32(e, &(profile->capabilities.cap[0]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->audit_caps.cap[0]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->quiet_caps.cap[0]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->set_caps.cap[0]), NULL)) ++ goto fail; ++ ++ if (aa_is_nameX(e, AA_STRUCT, "caps64")) { ++ /* optional upper half of 64 bit caps */ ++ if (!aa_is_u32(e, &(profile->capabilities.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->audit_caps.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->quiet_caps.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->set_caps.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ } ++ ++ if (!aa_unpack_rlimits(e, profile)) ++ goto fail; ++ ++ size = aa_is_array(e, "net_allowed_af"); ++ if (size) { ++ if (size > AF_MAX) ++ goto fail; ++ ++ for (i = 0; i < size; i++) { ++ if (!aa_is_u16(e, &profile->network_families[i], NULL)) ++ goto fail; ++ if (!aa_is_u16(e, &profile->audit_network[i], NULL)) ++ goto fail; ++ if (!aa_is_u16(e, &profile->quiet_network[i], NULL)) ++ goto fail; ++ } ++ if (!aa_is_nameX(e, AA_ARRAYEND, NULL)) ++ goto fail; ++ /* allow unix domain and netlink sockets they are handled ++ * by IPC ++ */ ++ } ++ profile->network_families[AF_UNIX] = 0xffff; ++ profile->network_families[AF_NETLINK] = 0xffff; ++ ++ /* get file rules */ ++ profile->file_rules = aa_unpack_dfa(e); ++ if (IS_ERR(profile->file_rules)) { ++ error = PTR_ERR(profile->file_rules); ++ profile->file_rules = NULL; ++ goto fail; ++ } ++ ++ if (!aa_unpack_exec_table(e, profile)) ++ goto fail; ++ ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ ++ return profile; ++ ++fail: ++ sa->name = profile && profile->name ? profile->name : "unknown"; ++ if (!sa->info) ++ sa->info = "failed to unpack profile"; ++ aa_audit_status(NULL, sa); ++ ++ if (profile) ++ free_aa_profile(profile); ++ ++ return ERR_PTR(error); ++} ++ ++/** ++ * aa_verify_head - unpack serialized stream header ++ * @e: serialized data read head ++ * @operation: operation header is being verified for ++ * ++ * returns error or 0 if header is good ++ */ ++static int aa_verify_header(struct aa_ext *e, struct aa_audit *sa) ++{ ++ /* get the interface version */ ++ if (!aa_is_u32(e, &e->version, "version")) { ++ sa->info = "invalid profile format"; ++ aa_audit_status(NULL, sa); ++ return -EPROTONOSUPPORT; ++ } ++ ++ /* check that the interface version is currently supported */ ++ if (e->version != 5) { ++ sa->info = "unsupported interface version"; ++ aa_audit_status(NULL, sa); ++ return -EPROTONOSUPPORT; ++ } ++ ++ /* read the namespace if present */ ++ if (!aa_is_dynstring(e, &e->ns_name, "namespace")) { ++ e->ns_name = NULL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * aa_add_profile - Unpack and add a new profile to the profile list ++ * @data: serialized data stream ++ * @size: size of the serialized data stream ++ */ ++ssize_t aa_add_profile(void *data, size_t size) ++{ ++ struct aa_profile *profile = NULL; ++ struct aa_namespace *ns = NULL; ++ struct aa_ext e = { ++ .start = data, ++ .end = data + size, ++ .pos = data, ++ .ns_name = NULL ++ }; ++ ssize_t error; ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_load"; ++ sa.gfp_mask = GFP_KERNEL; ++ ++ error = aa_verify_header(&e, &sa); ++ if (error) ++ return error; ++ ++ profile = aa_unpack_profile(&e, &sa); ++ if (IS_ERR(profile)) ++ return PTR_ERR(profile); ++ ++ mutex_lock(&aa_interface_lock); ++ write_lock(&profile_ns_list_lock); ++ if (e.ns_name) ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ else ++ ns = default_namespace; ++ if (!ns) { ++ struct aa_namespace *new_ns; ++ write_unlock(&profile_ns_list_lock); ++ new_ns = alloc_aa_namespace(e.ns_name); ++ if (!new_ns) { ++ mutex_unlock(&aa_interface_lock); ++ return -ENOMEM; ++ } ++ write_lock(&profile_ns_list_lock); ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ if (!ns) { ++ list_add(&new_ns->list, &profile_ns_list); ++ ns = new_ns; ++ } else ++ free_aa_namespace(new_ns); ++ } ++ ++ write_lock(&ns->lock); ++ if (__aa_find_profile(profile->name, &ns->profiles)) { ++ /* A profile with this name exists already. */ ++ write_unlock(&ns->lock); ++ write_unlock(&profile_ns_list_lock); ++ sa.name = profile->name; ++ sa.name2 = ns->name; ++ sa.info = "failed: profile already loaded"; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ aa_put_profile(profile); ++ return -EEXIST; ++ } ++ profile->ns = aa_get_namespace(ns); ++ ns->profile_count++; ++ list_add(&profile->list, &ns->profiles); ++ write_unlock(&ns->lock); ++ write_unlock(&profile_ns_list_lock); ++ ++ sa.name = profile->name; ++ sa.name2 = ns->name; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ return size; ++} ++ ++/** ++ * task_replace - replace a task's profile ++ * @task: task to replace profile on ++ * @new_cxt: new aa_task_context to do replacement with ++ * @new_profile: new profile ++ */ ++static inline void task_replace(struct task_struct *task, ++ struct aa_task_context *new_cxt, ++ struct aa_profile *new_profile) ++{ ++ struct aa_task_context *cxt = aa_task_context(task); ++ ++ AA_DEBUG("%s: replacing profile for task %d " ++ "profile=%s (%p)\n", ++ __FUNCTION__, ++ cxt->task->pid, ++ cxt->profile->name, cxt->profile); ++ ++ aa_change_task_context(task, new_cxt, new_profile, cxt->cookie, ++ cxt->previous_profile); ++} ++ ++/** ++ * aa_replace_profile - replace a profile on the profile list ++ * @udata: serialized data stream ++ * @size: size of the serialized data stream ++ * ++ * unpack and replace a profile on the profile list and uses of that profile ++ * by any aa_task_context. If the profile does not exist on the profile list ++ * it is added. Return %0 or error. ++ */ ++ssize_t aa_replace_profile(void *udata, size_t size) ++{ ++ struct aa_profile *old_profile, *new_profile; ++ struct aa_namespace *ns; ++ struct aa_task_context *new_cxt; ++ struct aa_ext e = { ++ .start = udata, ++ .end = udata + size, ++ .pos = udata, ++ .ns_name = NULL ++ }; ++ ssize_t error; ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_replace"; ++ sa.gfp_mask = GFP_KERNEL; ++ ++ error = aa_verify_header(&e, &sa); ++ if (error) ++ return error; ++ ++ new_profile = aa_unpack_profile(&e, &sa); ++ if (IS_ERR(new_profile)) ++ return PTR_ERR(new_profile); ++ ++ mutex_lock(&aa_interface_lock); ++ write_lock(&profile_ns_list_lock); ++ if (e.ns_name) ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ else ++ ns = default_namespace; ++ if (!ns) { ++ struct aa_namespace *new_ns; ++ write_unlock(&profile_ns_list_lock); ++ new_ns = alloc_aa_namespace(e.ns_name); ++ if (!new_ns) { ++ mutex_unlock(&aa_interface_lock); ++ return -ENOMEM; ++ } ++ write_lock(&profile_ns_list_lock); ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ if (!ns) { ++ list_add(&new_ns->list, &profile_ns_list); ++ ns = new_ns; ++ } else ++ free_aa_namespace(new_ns); ++ } ++ ++ write_lock(&ns->lock); ++ old_profile = __aa_find_profile(new_profile->name, &ns->profiles); ++ if (old_profile) { ++ lock_profile(old_profile); ++ old_profile->isstale = 1; ++ list_del_init(&old_profile->list); ++ unlock_profile(old_profile); ++ ns->profile_count--; ++ } ++ new_profile->ns = aa_get_namespace(ns); ++ ns->profile_count++; ++ /* not don't need an extra ref count to keep new_profile as ++ * it is protect by the interface mutex */ ++ list_add(&new_profile->list, &ns->profiles); ++ write_unlock(&ns->lock); ++ write_unlock(&profile_ns_list_lock); ++ ++ if (!old_profile) { ++ sa.operation = "profile_load"; ++ goto out; ++ } ++ /* do not fail replacement based off of profile's NPROC rlimit */ ++ ++ /* ++ * Replacement needs to allocate a new aa_task_context for each ++ * task confined by old_profile. To do this the profile locks ++ * are only held when the actual switch is done per task. While ++ * looping to allocate a new aa_task_context the old_task list ++ * may get shorter if tasks exit/change their profile but will ++ * not get longer as new task will not use old_profile detecting ++ * that is stale. ++ */ ++ do { ++ new_cxt = aa_alloc_task_context(GFP_KERNEL | __GFP_NOFAIL); ++ ++ lock_both_profiles(old_profile, new_profile); ++ if (!list_empty(&old_profile->task_contexts)) { ++ struct task_struct *task = ++ list_entry(old_profile->task_contexts.next, ++ struct aa_task_context, list)->task; ++ task_lock(task); ++ task_replace(task, new_cxt, new_profile); ++ task_unlock(task); ++ aa_set_rlimits(task, new_profile); ++ new_cxt = NULL; ++ } ++ unlock_both_profiles(old_profile, new_profile); ++ } while (!new_cxt); ++ aa_free_task_context(new_cxt); ++ aa_put_profile(old_profile); ++ ++out: ++ sa.name = new_profile->name; ++ sa.name2 = ns->name; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ return size; ++} ++ ++/** ++ * aa_remove_profile - remove a profile from the system ++ * @name: name of the profile to remove ++ * @size: size of the name ++ * ++ * remove a profile from the profile list and all aa_task_context references ++ * to said profile. ++ * NOTE: removing confinement does not restore rlimits to preconfinemnet values ++ */ ++ssize_t aa_remove_profile(char *name, size_t size) ++{ ++ struct aa_namespace *ns; ++ struct aa_profile *profile; ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_remove"; ++ sa.gfp_mask = GFP_KERNEL; ++ ++ mutex_lock(&aa_interface_lock); ++ write_lock(&profile_ns_list_lock); ++ ++ if (name[0] == ':') { ++ char *split = strchr(name + 1, ':'); ++ if (!split) ++ goto noent; ++ *split = 0; ++ ns = __aa_find_namespace(name + 1, &profile_ns_list); ++ name = split + 1; ++ } else { ++ ns = default_namespace; ++ } ++ ++ if (!ns) ++ goto noent; ++ sa.name2 = ns->name; ++ write_lock(&ns->lock); ++ profile = __aa_find_profile(name, &ns->profiles); ++ if (!profile) { ++ write_unlock(&ns->lock); ++ goto noent; ++ } ++ sa.name = profile->name; ++ ++ /* Remove the profile from each task context it is on. */ ++ lock_profile(profile); ++ profile->isstale = 1; ++ aa_unconfine_tasks(profile); ++ list_del_init(&profile->list); ++ ns->profile_count--; ++ unlock_profile(profile); ++ /* Release the profile itself. */ ++ write_unlock(&ns->lock); ++ /* check to see if the namespace has become stale */ ++ if (ns != default_namespace && ns->profile_count == 0) { ++ list_del_init(&ns->list); ++ aa_put_namespace(ns); ++ } ++ write_unlock(&profile_ns_list_lock); ++ ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ aa_put_profile(profile); ++ ++ return size; ++ ++noent: ++ write_unlock(&profile_ns_list_lock); ++ sa.info = "failed: profile does not exist"; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ return -ENOENT; ++} ++ ++/** ++ * free_aa_namespace_kref - free aa_namespace by kref (see aa_put_namespace) ++ * @kr: kref callback for freeing of a namespace ++ */ ++void free_aa_namespace_kref(struct kref *kref) ++{ ++ struct aa_namespace *ns=container_of(kref, struct aa_namespace, count); ++ ++ free_aa_namespace(ns); ++} ++ ++/** ++ * alloc_aa_namespace - allocate, initialize and return a new namespace ++ * @name: a preallocated name ++ * Returns NULL on failure. ++ */ ++struct aa_namespace *alloc_aa_namespace(char *name) ++{ ++ struct aa_namespace *ns; ++ ++ ns = kzalloc(sizeof(*ns), GFP_KERNEL); ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, ns); ++ if (ns) { ++ ns->name = name; ++ INIT_LIST_HEAD(&ns->list); ++ INIT_LIST_HEAD(&ns->profiles); ++ kref_init(&ns->count); ++ rwlock_init(&ns->lock); ++ ++ ns->null_complain_profile = alloc_aa_profile(); ++ if (!ns->null_complain_profile) { ++ if (!name) ++ kfree(ns->name); ++ kfree(ns); ++ return NULL; ++ } ++ ns->null_complain_profile->name = ++ kstrdup("null-complain-profile", GFP_KERNEL); ++ if (!ns->null_complain_profile->name) { ++ free_aa_profile(ns->null_complain_profile); ++ if (!name) ++ kfree(ns->name); ++ kfree(ns); ++ return NULL; ++ } ++ ns->null_complain_profile->flags.complain = 1; ++ /* null_complain_profile doesn't contribute to ns ref count */ ++ ns->null_complain_profile->ns = ns; ++ } ++ return ns; ++} ++ ++/** ++ * free_aa_namespace - free a profile namespace ++ * @namespace: the namespace to free ++ * ++ * Free a namespace. All references to the namespace must have been put. ++ * If the namespace was referenced by a profile confining a task, ++ * free_aa_namespace will be called indirectly (through free_aa_profile) ++ * from an rcu callback routine, so we must not sleep here. ++ */ ++void free_aa_namespace(struct aa_namespace *ns) ++{ ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, ns); ++ ++ if (!ns) ++ return; ++ ++ /* namespace still contains profiles -- invalid */ ++ if (!list_empty(&ns->profiles)) { ++ AA_ERROR("%s: internal error, " ++ "namespace '%s' still contains profiles\n", ++ __FUNCTION__, ++ ns->name); ++ BUG(); ++ } ++ if (!list_empty(&ns->list)) { ++ AA_ERROR("%s: internal error, " ++ "namespace '%s' still on list\n", ++ __FUNCTION__, ++ ns->name); ++ BUG(); ++ } ++ /* null_complain_profile doesn't contribute to ns ref counting */ ++ ns->null_complain_profile->ns = NULL; ++ aa_put_profile(ns->null_complain_profile); ++ kfree(ns->name); ++ kfree(ns); ++} ++ ++/** ++ * free_aa_profile_kref - free aa_profile by kref (called by aa_put_profile) ++ * @kr: kref callback for freeing of a profile ++ */ ++void free_aa_profile_kref(struct kref *kref) ++{ ++ struct aa_profile *p=container_of(kref, struct aa_profile, count); ++ ++ free_aa_profile(p); ++} ++ ++/** ++ * alloc_aa_profile - allocate, initialize and return a new profile ++ * Returns NULL on failure. ++ */ ++struct aa_profile *alloc_aa_profile(void) ++{ ++ struct aa_profile *profile; ++ ++ profile = kzalloc(sizeof(*profile), GFP_KERNEL); ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, profile); ++ if (profile) { ++ INIT_LIST_HEAD(&profile->list); ++ kref_init(&profile->count); ++ INIT_LIST_HEAD(&profile->task_contexts); ++ spin_lock_init(&profile->lock); ++ } ++ return profile; ++} ++ ++/** ++ * free_aa_profile - free a profile ++ * @profile: the profile to free ++ * ++ * Free a profile, its hats and null_profile. All references to the profile, ++ * its hats and null_profile must have been put. ++ * ++ * If the profile was referenced from a task context, free_aa_profile() will ++ * be called from an rcu callback routine, so we must not sleep here. ++ */ ++void free_aa_profile(struct aa_profile *profile) ++{ ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, profile); ++ ++ if (!profile) ++ return; ++ ++ /* profile is still on profile namespace list -- invalid */ ++ if (!list_empty(&profile->list)) { ++ AA_ERROR("%s: internal error, " ++ "profile '%s' still on global list\n", ++ __FUNCTION__, ++ profile->name); ++ BUG(); ++ } ++ aa_put_namespace(profile->ns); ++ ++ aa_match_free(profile->file_rules); ++ ++ if (profile->name) { ++ AA_DEBUG("%s: %s\n", __FUNCTION__, profile->name); ++ kfree(profile->name); ++ } ++ ++ kfree(profile); ++} ++ ++/** ++ * aa_unconfine_tasks - remove tasks on a profile's task context list ++ * @profile: profile to remove tasks from ++ * ++ * Assumes that @profile lock is held. ++ */ ++void aa_unconfine_tasks(struct aa_profile *profile) ++{ ++ while (!list_empty(&profile->task_contexts)) { ++ struct task_struct *task = ++ list_entry(profile->task_contexts.next, ++ struct aa_task_context, list)->task; ++ task_lock(task); ++ aa_change_task_context(task, NULL, NULL, 0, NULL); ++ task_unlock(task); ++ } ++} +diff -uprN e/security/apparmor/procattr.c f/security/apparmor/procattr.c +--- e/security/apparmor/procattr.c 1970-01-01 00:00:00.000000000 +0000 ++++ f/security/apparmor/procattr.c 2008-05-28 20:29:29.410207000 +0000 +@@ -0,0 +1,195 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor /proc/pid/attr handling ++ */ ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++int aa_getprocattr(struct aa_profile *profile, char **string, unsigned *len) ++{ ++ char *str; ++ ++ if (profile) { ++ const char *mode_str = PROFILE_COMPLAIN(profile) ? ++ " (complain)" : " (enforce)"; ++ int mode_len, name_len, ns_len = 0; ++ ++ mode_len = strlen(mode_str); ++ name_len = strlen(profile->name); ++ if (profile->ns != default_namespace) ++ ns_len = strlen(profile->ns->name) + 2; ++ *len = mode_len + ns_len + name_len + 1; ++ str = kmalloc(*len, GFP_ATOMIC); ++ if (!str) ++ return -ENOMEM; ++ ++ if (ns_len) { ++ *str++ = ':'; ++ memcpy(str, profile->ns->name, ns_len - 2); ++ str += ns_len - 2; ++ *str++ = ':'; ++ } ++ memcpy(str, profile->name, name_len); ++ str += name_len; ++ memcpy(str, mode_str, mode_len); ++ str += mode_len; ++ *str++ = '\n'; ++ str -= *len; ++ } else { ++ const char *unconfined_str = "unconfined\n"; ++ ++ *len = strlen(unconfined_str); ++ str = kmalloc(*len, GFP_ATOMIC); ++ if (!str) ++ return -ENOMEM; ++ ++ memcpy(str, unconfined_str, *len); ++ } ++ *string = str; ++ ++ return 0; ++} ++ ++static char *split_token_from_name(const char *op, char *args, u64 *cookie) ++{ ++ char *name; ++ ++ *cookie = simple_strtoull(args, &name, 16); ++ if ((name == args) || *name != '^') { ++ AA_ERROR("%s: Invalid input '%s'", op, args); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ name++; /* skip ^ */ ++ if (!*name) ++ name = NULL; ++ return name; ++} ++ ++int aa_setprocattr_changehat(char *args) ++{ ++ char *hat; ++ u64 cookie; ++ ++ hat = split_token_from_name("change_hat", args, &cookie); ++ if (IS_ERR(hat)) ++ return PTR_ERR(hat); ++ ++ if (!hat && !cookie) { ++ AA_ERROR("change_hat: Invalid input, NULL hat and NULL magic"); ++ return -EINVAL; ++ } ++ ++ AA_DEBUG("%s: Magic 0x%llx Hat '%s'\n", ++ __FUNCTION__, cookie, hat ? hat : NULL); ++ ++ return aa_change_hat(hat, cookie); ++} ++ ++int aa_setprocattr_changeprofile(char *args) ++{ ++ char *name = args, *ns_name = NULL; ++ ++ if (name[0] == ':') { ++ char *split = strchr(&name[1], ':'); ++ if (split) { ++ *split = 0; ++ ns_name = &name[1]; ++ name = split + 1; ++ } ++ } ++ ++ return aa_change_profile(ns_name, name); ++} ++ ++int aa_setprocattr_setprofile(struct task_struct *task, char *args) ++{ ++ struct aa_profile *old_profile, *new_profile; ++ struct aa_namespace *ns; ++ struct aa_audit sa; ++ char *name, *ns_name = NULL; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_set"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.task = task->pid; ++ ++ AA_DEBUG("%s: current %d\n", ++ __FUNCTION__, current->pid); ++ ++ name = args; ++ if (args[0] != '/') { ++ char *split = strchr(args, ':'); ++ if (split) { ++ *split = 0; ++ ns_name = args; ++ name = split + 1; ++ } ++ } ++ if (ns_name) ++ ns = aa_find_namespace(ns_name); ++ else ++ ns = aa_get_namespace(default_namespace); ++ if (!ns) { ++ sa.name = ns_name; ++ sa.info = "unknown namespace"; ++ aa_audit_reject(NULL, &sa); ++ aa_put_namespace(ns); ++ return -EINVAL; ++ } ++ ++repeat: ++ if (strcmp(name, "unconfined") == 0) ++ new_profile = NULL; ++ else { ++ new_profile = aa_find_profile(ns, name); ++ if (!new_profile) { ++ sa.name = ns_name; ++ sa.name2 = name; ++ sa.info = "unknown profile"; ++ aa_audit_reject(NULL, &sa); ++ aa_put_namespace(ns); ++ return -EINVAL; ++ } ++ } ++ ++ old_profile = __aa_replace_profile(task, new_profile); ++ if (IS_ERR(old_profile)) { ++ int error; ++ ++ aa_put_profile(new_profile); ++ error = PTR_ERR(old_profile); ++ if (error == -ESTALE) ++ goto repeat; ++ aa_put_namespace(ns); ++ return error; ++ } ++ ++ if (new_profile) { ++ sa.name = ns_name; ++ sa.name2 = name; ++ sa.name3 = old_profile ? old_profile->name : ++ "unconfined"; ++ aa_audit_status(NULL, &sa); ++ } else { ++ if (old_profile) { ++ sa.name = "unconfined"; ++ sa.name2 = old_profile->name; ++ aa_audit_status(NULL, &sa); ++ } else { ++ sa.info = "task is unconfined"; ++ aa_audit_status(NULL, &sa); ++ } ++ } ++ aa_put_namespace(ns); ++ aa_put_profile(old_profile); ++ aa_put_profile(new_profile); ++ return 0; ++} +diff -uprN e/security/commoncap.c f/security/commoncap.c +--- e/security/commoncap.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/security/commoncap.c 2008-05-28 20:29:29.410207000 +0000 +@@ -386,8 +386,8 @@ int cap_bprm_secureexec (struct linux_bi + current->egid != current->gid); + } + +-int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, +- size_t size, int flags) ++int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, char *name, ++ void *value, size_t size, int flags, struct file *file) + { + if (!strcmp(name, XATTR_NAME_CAPS)) { + if (!capable(CAP_SETFCAP)) +@@ -400,7 +400,8 @@ int cap_inode_setxattr(struct dentry *de + return 0; + } + +-int cap_inode_removexattr(struct dentry *dentry, char *name) ++int cap_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file) + { + if (!strcmp(name, XATTR_NAME_CAPS)) { + if (!capable(CAP_SETFCAP)) +diff -uprN e/security/dummy.c f/security/dummy.c +--- e/security/dummy.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/security/dummy.c 2008-05-28 20:29:29.410207000 +0000 +@@ -287,54 +287,60 @@ static int dummy_inode_init_security (st + } + + static int dummy_inode_create (struct inode *inode, struct dentry *dentry, +- int mask) ++ struct vfsmount *mnt, int mask) + { + return 0; + } + +-static int dummy_inode_link (struct dentry *old_dentry, struct inode *inode, +- struct dentry *new_dentry) ++static int dummy_inode_link (struct dentry *old_dentry, ++ struct vfsmount *old_mnt, struct inode *inode, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return 0; + } + +-static int dummy_inode_unlink (struct inode *inode, struct dentry *dentry) ++static int dummy_inode_unlink (struct inode *inode, struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } + + static int dummy_inode_symlink (struct inode *inode, struct dentry *dentry, +- const char *name) ++ struct vfsmount *mnt, const char *name) + { + return 0; + } + + static int dummy_inode_mkdir (struct inode *inode, struct dentry *dentry, +- int mask) ++ struct vfsmount *mnt, int mask) + { + return 0; + } + +-static int dummy_inode_rmdir (struct inode *inode, struct dentry *dentry) ++static int dummy_inode_rmdir (struct inode *inode, struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } + + static int dummy_inode_mknod (struct inode *inode, struct dentry *dentry, +- int mode, dev_t dev) ++ struct vfsmount *mnt, int mode, dev_t dev) + { + return 0; + } + + static int dummy_inode_rename (struct inode *old_inode, + struct dentry *old_dentry, ++ struct vfsmount *old_mnt, + struct inode *new_inode, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return 0; + } + +-static int dummy_inode_readlink (struct dentry *dentry) ++static int dummy_inode_readlink (struct dentry *dentry, struct vfsmount *mnt) + { + return 0; + } +@@ -350,7 +356,8 @@ static int dummy_inode_permission (struc + return 0; + } + +-static int dummy_inode_setattr (struct dentry *dentry, struct iattr *iattr) ++static int dummy_inode_setattr (struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) + { + return 0; + } +@@ -365,8 +372,9 @@ static void dummy_inode_delete (struct i + return; + } + +-static int dummy_inode_setxattr (struct dentry *dentry, char *name, void *value, +- size_t size, int flags) ++static int dummy_inode_setxattr (struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, ++ int flags, struct file *file) + { + if (!strncmp(name, XATTR_SECURITY_PREFIX, + sizeof(XATTR_SECURITY_PREFIX) - 1) && +@@ -375,22 +383,28 @@ static int dummy_inode_setxattr (struct + return 0; + } + +-static void dummy_inode_post_setxattr (struct dentry *dentry, char *name, void *value, ++static void dummy_inode_post_setxattr (struct dentry *dentry, ++ struct vfsmount *mnt, ++ char *name, void *value, + size_t size, int flags) + { + } + +-static int dummy_inode_getxattr (struct dentry *dentry, char *name) ++static int dummy_inode_getxattr (struct dentry *dentry, ++ struct vfsmount *mnt, char *name, ++ struct file *file) + { + return 0; + } + +-static int dummy_inode_listxattr (struct dentry *dentry) ++static int dummy_inode_listxattr (struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) + { + return 0; + } + +-static int dummy_inode_removexattr (struct dentry *dentry, char *name) ++static int dummy_inode_removexattr (struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file) + { + if (!strncmp(name, XATTR_SECURITY_PREFIX, + sizeof(XATTR_SECURITY_PREFIX) - 1) && +diff -uprN e/security/security.c f/security/security.c +--- e/security/security.c 2008-04-17 02:49:44.000000000 +0000 ++++ f/security/security.c 2008-05-28 20:29:29.410207000 +0000 +@@ -352,72 +352,80 @@ int security_inode_init_security(struct + } + EXPORT_SYMBOL(security_inode_init_security); + +-int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) ++int security_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_create(dir, dentry, mode); ++ return security_ops->inode_create(dir, dentry, mnt, mode); + } + +-int security_inode_link(struct dentry *old_dentry, struct inode *dir, +- struct dentry *new_dentry) ++int security_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + if (unlikely(IS_PRIVATE(old_dentry->d_inode))) + return 0; +- return security_ops->inode_link(old_dentry, dir, new_dentry); ++ return security_ops->inode_link(old_dentry, old_mnt, dir, ++ new_dentry, new_mnt); + } + +-int security_inode_unlink(struct inode *dir, struct dentry *dentry) ++int security_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_unlink(dir, dentry); ++ return security_ops->inode_unlink(dir, dentry, mnt); + } + + int security_inode_symlink(struct inode *dir, struct dentry *dentry, +- const char *old_name) ++ struct vfsmount *mnt, const char *old_name) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_symlink(dir, dentry, old_name); ++ return security_ops->inode_symlink(dir, dentry, mnt, old_name); + } + +-int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++int security_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_mkdir(dir, dentry, mode); ++ return security_ops->inode_mkdir(dir, dentry, mnt, mode); + } + +-int security_inode_rmdir(struct inode *dir, struct dentry *dentry) ++int security_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_rmdir(dir, dentry); ++ return security_ops->inode_rmdir(dir, dentry, mnt); + } + +-int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++int security_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_mknod(dir, dentry, mode, dev); ++ return security_ops->inode_mknod(dir, dentry, mnt, mode, dev); + } + + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + return 0; +- return security_ops->inode_rename(old_dir, old_dentry, +- new_dir, new_dentry); ++ return security_ops->inode_rename(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + } + +-int security_inode_readlink(struct dentry *dentry) ++int security_inode_readlink(struct dentry *dentry, struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_readlink(dentry); ++ return security_ops->inode_readlink(dentry, mnt); + } + + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd) +@@ -434,11 +442,12 @@ int security_inode_permission(struct ino + return security_ops->inode_permission(inode, mask, nd); + } + +-int security_inode_setattr(struct dentry *dentry, struct iattr *attr) ++int security_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_setattr(dentry, attr); ++ return security_ops->inode_setattr(dentry, mnt, attr); + } + + int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +@@ -455,41 +464,48 @@ void security_inode_delete(struct inode + security_ops->inode_delete(inode); + } + +-int security_inode_setxattr(struct dentry *dentry, char *name, +- void *value, size_t size, int flags) ++int security_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, int flags, ++ struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_setxattr(dentry, name, value, size, flags); ++ return security_ops->inode_setxattr(dentry, mnt, name, value, size, ++ flags, file); + } + +-void security_inode_post_setxattr(struct dentry *dentry, char *name, +- void *value, size_t size, int flags) ++void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, ++ int flags) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return; +- security_ops->inode_post_setxattr(dentry, name, value, size, flags); ++ security_ops->inode_post_setxattr(dentry, mnt, name, value, size, ++ flags); + } + +-int security_inode_getxattr(struct dentry *dentry, char *name) ++int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_getxattr(dentry, name); ++ return security_ops->inode_getxattr(dentry, mnt, name, file); + } + +-int security_inode_listxattr(struct dentry *dentry) ++int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_listxattr(dentry); ++ return security_ops->inode_listxattr(dentry, mnt, file); + } + +-int security_inode_removexattr(struct dentry *dentry, char *name) ++int security_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_removexattr(dentry, name); ++ return security_ops->inode_removexattr(dentry, mnt, name, file); + } + + int security_inode_need_killpriv(struct dentry *dentry) +diff -uprN e/security/selinux/hooks.c f/security/selinux/hooks.c +--- e/security/selinux/hooks.c 2008-05-28 20:32:27.897940261 +0000 ++++ f/security/selinux/hooks.c 2008-05-28 20:29:29.410207000 +0000 +@@ -1712,40 +1712,15 @@ static int selinux_capable(struct task_s + + static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid) + { +- int buflen, rc; +- char *buffer, *path, *end; ++ char *buffer, *path; ++ int rc = -ENOMEM; + +- rc = -ENOMEM; + buffer = (char*)__get_free_page(GFP_KERNEL); + if (!buffer) + goto out; +- +- buflen = PAGE_SIZE; +- end = buffer+buflen; +- *--end = '\0'; +- buflen--; +- path = end-1; +- *path = '/'; +- while (table) { +- const char *name = table->procname; +- size_t namelen = strlen(name); +- buflen -= namelen + 1; +- if (buflen < 0) +- goto out_free; +- end -= namelen; +- memcpy(end, name, namelen); +- *--end = '/'; +- path = end; +- table = table->parent; +- } +- buflen -= 4; +- if (buflen < 0) +- goto out_free; +- end -= 4; +- memcpy(end, "/sys", 4); +- path = end; +- rc = security_genfs_sid("proc", path, tclass, sid); +-out_free: ++ path = sysctl_pathname(table, buffer, PAGE_SIZE); ++ if (path) ++ rc = security_genfs_sid("proc", path, tclass, sid); + free_page((unsigned long)buffer); + out: + return rc; +@@ -2458,64 +2433,79 @@ static int selinux_inode_init_security(s + return 0; + } + +-static int selinux_inode_create(struct inode *dir, struct dentry *dentry, int mask) ++static int selinux_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) + { + return may_create(dir, dentry, SECCLASS_FILE); + } + +-static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) ++static int selinux_inode_link(struct dentry *old_dentry, ++ struct vfsmount *old_mnt, ++ struct inode *dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + int rc; + +- rc = secondary_ops->inode_link(old_dentry,dir,new_dentry); ++ rc = secondary_ops->inode_link(old_dentry, old_mnt, dir, new_dentry, ++ new_mnt); + if (rc) + return rc; + return may_link(dir, old_dentry, MAY_LINK); + } + +-static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry) ++static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + int rc; + +- rc = secondary_ops->inode_unlink(dir, dentry); ++ rc = secondary_ops->inode_unlink(dir, dentry, mnt); + if (rc) + return rc; + return may_link(dir, dentry, MAY_UNLINK); + } + +-static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const char *name) ++static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, const char *name) + { + return may_create(dir, dentry, SECCLASS_LNK_FILE); + } + +-static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, int mask) ++static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) + { + return may_create(dir, dentry, SECCLASS_DIR); + } + +-static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry) ++static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + return may_link(dir, dentry, MAY_RMDIR); + } + +-static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev) + { + int rc; + +- rc = secondary_ops->inode_mknod(dir, dentry, mode, dev); ++ rc = secondary_ops->inode_mknod(dir, dentry, mnt, mode, dev); + if (rc) + return rc; + + return may_create(dir, dentry, inode_mode_to_security_class(mode)); + } + +-static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry, +- struct inode *new_inode, struct dentry *new_dentry) ++static int selinux_inode_rename(struct inode *old_inode, ++ struct dentry *old_dentry, ++ struct vfsmount *old_mnt, ++ struct inode *new_inode, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return may_rename(old_inode, old_dentry, new_inode, new_dentry); + } + +-static int selinux_inode_readlink(struct dentry *dentry) ++static int selinux_inode_readlink(struct dentry *dentry, struct vfsmount *mnt) + { + return dentry_has_perm(current, NULL, dentry, FILE__READ); + } +@@ -2548,11 +2538,12 @@ static int selinux_inode_permission(stru + file_mask_to_av(inode->i_mode, mask), NULL); + } + +-static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ++static int selinux_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) + { + int rc; + +- rc = secondary_ops->inode_setattr(dentry, iattr); ++ rc = secondary_ops->inode_setattr(dentry, mnt, iattr); + if (rc) + return rc; + +@@ -2590,7 +2581,9 @@ static int selinux_inode_setotherxattr(s + return dentry_has_perm(current, NULL, dentry, FILE__SETATTR); + } + +-static int selinux_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags) ++static int selinux_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *name, void *value, size_t size, ++ int flags, struct file *file) + { + struct task_security_struct *tsec = current->security; + struct inode *inode = dentry->d_inode; +@@ -2639,7 +2632,9 @@ static int selinux_inode_setxattr(struct + &ad); + } + +-static void selinux_inode_post_setxattr(struct dentry *dentry, char *name, ++static void selinux_inode_post_setxattr(struct dentry *dentry, ++ struct vfsmount *mnt, ++ char *name, + void *value, size_t size, int flags) + { + struct inode *inode = dentry->d_inode; +@@ -2663,17 +2658,21 @@ static void selinux_inode_post_setxattr( + return; + } + +-static int selinux_inode_getxattr (struct dentry *dentry, char *name) ++static int selinux_inode_getxattr (struct dentry *dentry, struct vfsmount *mnt, ++ char *name, struct file *file) + { + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } + +-static int selinux_inode_listxattr (struct dentry *dentry) ++static int selinux_inode_listxattr (struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) + { + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } + +-static int selinux_inode_removexattr (struct dentry *dentry, char *name) ++static int selinux_inode_removexattr (struct dentry *dentry, ++ struct vfsmount *mnt, char *name, ++ struct file *file) + { + if (strcmp(name, XATTR_NAME_SELINUX)) + return selinux_inode_setotherxattr(dentry, name); +--- s/fs/namei.c~ 2008-05-29 00:47:22.000000000 +0200 ++++ s/fs/namei.c 2008-05-29 00:53:55.814585231 +0200 +@@ -3022,8 +3022,8 @@ + new_path.dentry->d_name.len, + old_path.dentry->d_name.len, old_path.dentry->d_name.name, + old_path.dentry->d_name.len); +- ret = vfs_rename(dir_nd.path.dentry->d_inode, new_path.dentry, +- old_nd.path.dentry->d_parent->d_inode, old_path.dentry); ++ ret = vfs_rename(dir_nd.path.dentry->d_inode, new_path.dentry, new_path.mnt, ++ old_nd.path.dentry->d_parent->d_inode, old_path.dentry, old_path.mnt); + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret); + res = new_path.dentry; + +--- s/fs/namei.c~ 2008-05-29 00:59:12.000000000 +0200 ++++ s/fs/namei.c 2008-05-29 00:59:40.790875766 +0200 +@@ -3048,7 +3048,7 @@ + goto out_redo; + + /* error path cleanup */ +- vfs_unlink(dir->d_inode, new_path.dentry, &dir_nd); ++ vfs_unlink(dir->d_inode, new_path.dentry, &dir_nd, new_path.mnt); + dput(new_path.dentry); + + out_redo: +--- s/include/linux/xattr.h~ 2008-05-29 01:40:01.000000000 +0200 ++++ s/include/linux/xattr.h 2008-05-29 01:40:06.624153911 +0200 +@@ -16,6 +16,7 @@ + #ifdef __KERNEL__ + + #include ++#include + + /* Namespaces */ + #define XATTR_OS2_PREFIX "os2." +--- + security/apparmor/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -503,10 +503,10 @@ static char *aa_get_name(struct dentry * + *buffer = buf; + return name; + } ++ kfree(buf); + if (PTR_ERR(name) != -ENAMETOOLONG) + return name; + +- kfree(buf); + size <<= 1; + if (size > apparmor_path_max) + return ERR_PTR(-ENAMETOOLONG); diff --git a/kernel-atm-vbr.patch b/kernel-atm-vbr.patch new file mode 100644 index 00000000..ca4b665c --- /dev/null +++ b/kernel-atm-vbr.patch @@ -0,0 +1,162 @@ +Index: linux/include/linux/atm.h +=================================================================== +RCS file: /afs/cmf/project/cvsroot/linux/include/linux/atm.h,v +retrieving revision 1.2 +diff -u -r1.2 atm.h +--- linux/include/linux/atm.h 12 Feb 2003 20:56:33 -0000 1.2 ++++ linux/include/linux/atm.h 9 Apr 2003 12:08:38 -0000 +@@ -72,7 +72,7 @@ + /* connection identifier range; socket must be + bound or connected */ + #define SO_ATMQOS __SO_ENCODE(SOL_ATM,2,struct atm_qos) +- /* Quality of Service setting */ ++ /* Quality of Service setting (with vbr support) */ + #define SO_ATMSAP __SO_ENCODE(SOL_ATM,3,struct atm_sap) + /* Service Access Point */ + #define SO_ATMPVC __SO_ENCODE(SOL_ATM,4,struct sockaddr_atmpvc) +@@ -127,9 +127,11 @@ + #define ATM_NONE 0 /* no traffic */ + #define ATM_UBR 1 + #define ATM_CBR 2 +-#define ATM_VBR 3 ++#define ATM_VBR_NRT 3 ++#define ATM_VBR ATM_VBR_NRT /* for backward compatibility */ + #define ATM_ABR 4 + #define ATM_ANYCLASS 5 /* compatible with everything */ ++#define ATM_VBR_RT 6 + + #define ATM_MAX_PCR -1 /* maximum available PCR */ + +@@ -140,6 +142,11 @@ + int min_pcr; /* minimum PCR in cells per second */ + int max_cdv; /* maximum CDV in microseconds */ + int max_sdu; /* maximum SDU in bytes */ ++ ++ /* extra params for VBR */ ++ int scr; /* sustained rate in cells per second */ ++ int mbs; /* maximum burst size (MBS) in cells */ ++ + /* extra params for ABR */ + unsigned int icr; /* Initial Cell Rate (24-bit) */ + unsigned int tbe; /* Transient Buffer Exposure (24-bit) */ +@@ -243,4 +251,37 @@ + }; + + typedef unsigned short atm_backend_t; ++struct atm_trafprm_compat { ++ unsigned char traffic_class; /* traffic class (ATM_UBR, ...) */ ++ int max_pcr; /* maximum PCR in cells per second */ ++ int pcr; /* desired PCR in cells per second */ ++ int min_pcr; /* minimum PCR in cells per second */ ++ int max_cdv; /* maximum CDV in microseconds */ ++ int max_sdu; /* maximum SDU in bytes */ ++ /* extra params for ABR */ ++ unsigned int icr; /* Initial Cell Rate (24-bit) */ ++ unsigned int tbe; /* Transient Buffer Exposure (24-bit) */ ++ unsigned int frtt : 24; /* Fixed Round Trip Time (24-bit) */ ++ unsigned int rif : 4; /* Rate Increment Factor (4-bit) */ ++ unsigned int rdf : 4; /* Rate Decrease Factor (4-bit) */ ++ unsigned int nrm_pres :1; /* nrm present bit */ ++ unsigned int trm_pres :1; /* rm present bit */ ++ unsigned int adtf_pres :1; /* adtf present bit */ ++ unsigned int cdf_pres :1; /* cdf present bit*/ ++ unsigned int nrm :3; /* Max # of Cells for each forward RM cell (3-bit) */ ++ unsigned int trm :3; /* Time between forward RM cells (3-bit) */ ++ unsigned int adtf :10; /* ACR Decrease Time Factor (10-bit) */ ++ unsigned int cdf :3; /* Cutoff Decrease Factor (3-bit) */ ++ unsigned int spare :9; /* spare bits */ ++}; ++ ++struct atm_qos_compat { ++ struct atm_trafprm_compat txtp; /* parameters in TX direction */ ++ struct atm_trafprm_compat rxtp __ATM_API_ALIGN; ++ /* parameters in RX direction */ ++ unsigned char aal __ATM_API_ALIGN; ++}; ++ ++#define SO_ATMQOS_COMPAT __SO_ENCODE(SOL_ATM,2,struct atm_qos_compat) ++ /* Quality of Service setting (no vbr support) */ + #endif +Index: linux/net/atm/common.c +=================================================================== +RCS file: /afs/cmf/project/cvsroot/linux/net/atm/common.c,v +retrieving revision 1.13 +diff -u -r1.13 common.c +--- linux/net/atm/common.c 17 Mar 2003 16:13:12 -0000 1.13 ++++ linux/net/atm/common.c 9 Apr 2003 12:10:28 -0000 +@@ -1085,6 +1085,43 @@ + + vcc = ATM_SD(sock); + switch (optname) { ++ case SO_ATMQOS_COMPAT: ++ { ++ struct atm_qos_compat qos_compat; ++ struct atm_qos qos; ++ ++ if (copy_from_user(&qos_compat,optval,sizeof(qos_compat))) ++ return -EFAULT; ++ ++ /* convert old atm_qos to new atm_qos */ ++ qos.aal = qos_compat.aal; ++ qos.rxtp.traffic_class = qos_compat.rxtp.traffic_class; ++ qos.rxtp.max_pcr = qos_compat.rxtp.max_pcr; ++ qos.rxtp.pcr = qos_compat.rxtp.pcr; ++ qos.rxtp.min_pcr = qos_compat.rxtp.min_pcr; ++ qos.rxtp.max_cdv = qos_compat.rxtp.max_cdv; ++ qos.rxtp.max_sdu = qos_compat.rxtp.max_sdu; ++ qos.rxtp.scr = 0; ++ qos.rxtp.mbs = 0; ++ qos.txtp.traffic_class = qos_compat.txtp.traffic_class; ++ qos.txtp.max_pcr = qos_compat.txtp.max_pcr; ++ qos.txtp.pcr = qos_compat.txtp.pcr; ++ qos.txtp.min_pcr = qos_compat.txtp.min_pcr; ++ qos.txtp.max_cdv = qos_compat.txtp.max_cdv; ++ qos.txtp.max_sdu = qos_compat.txtp.max_sdu; ++ qos.txtp.scr = 0; ++ qos.txtp.mbs = 0; ++ ++ error = check_qos(&qos); ++ if (error) return error; ++ if (sock->state == SS_CONNECTED) ++ return atm_change_qos(vcc,&qos); ++ if (sock->state != SS_UNCONNECTED) ++ return -EBADFD; ++ vcc->qos = qos; ++ set_bit(ATM_VF_HASQOS,&vcc->flags); ++ return 0; ++ } + case SO_ATMQOS: + { + struct atm_qos qos; +@@ -1132,6 +1169,31 @@ + + vcc = ATM_SD(sock); + switch (optname) { ++ case SO_ATMQOS_COMPAT: ++ { ++ struct atm_qos_compat qos_compat; ++ ++ if (!test_bit(ATM_VF_HASQOS,&vcc->flags)) ++ return -EINVAL; ++ ++ /* convert new atm_qos to old atm_qos */ ++ qos_compat.aal = vcc->qos.aal; ++ qos_compat.rxtp.traffic_class = vcc->qos.rxtp.traffic_class; ++ qos_compat.rxtp.max_pcr = vcc->qos.rxtp.max_pcr; ++ qos_compat.rxtp.pcr = vcc->qos.rxtp.pcr; ++ qos_compat.rxtp.min_pcr = vcc->qos.rxtp.min_pcr; ++ qos_compat.rxtp.max_cdv = vcc->qos.rxtp.max_cdv; ++ qos_compat.rxtp.max_sdu = vcc->qos.rxtp.max_sdu; ++ qos_compat.txtp.traffic_class = vcc->qos.txtp.traffic_class; ++ qos_compat.txtp.max_pcr = vcc->qos.txtp.max_pcr; ++ qos_compat.txtp.pcr = vcc->qos.txtp.pcr; ++ qos_compat.txtp.min_pcr = vcc->qos.txtp.min_pcr; ++ qos_compat.txtp.max_cdv = vcc->qos.txtp.max_cdv; ++ qos_compat.txtp.max_sdu = vcc->qos.txtp.max_sdu; ++ ++ return copy_to_user(optval,&qos_compat,sizeof(qos_compat)) ? ++ -EFAULT : 0; ++ } + case SO_ATMQOS: + if (!test_bit(ATM_VF_HASQOS,&vcc->flags)) + return -EINVAL; diff --git a/kernel-atmdd.patch b/kernel-atmdd.patch new file mode 100644 index 00000000..be2cb534 --- /dev/null +++ b/kernel-atmdd.patch @@ -0,0 +1,954 @@ +diff -urN linux-2.4.25/drivers/atm/Makefile linux-2.4.25-atmdd/drivers/atm/Makefile +--- linux-2.4.25/drivers/atm/Makefile 2004-02-23 15:18:29.000000000 +0100 ++++ linux-2.4.25-atmdd/drivers/atm/Makefile 2004-02-29 22:51:26.000000000 +0100 +@@ -31,6 +31,7 @@ + endif + + obj-$(CONFIG_ATM_DUMMY) += adummy.o ++obj-$(CONFIG_ATM_DD) += atmdd.o + obj-$(CONFIG_ATM_TCP) += atmtcp.o + obj-$(CONFIG_ATM_FIRESTREAM) += firestream.o + obj-$(CONFIG_ATM_LANAI) += lanai.o +diff -urN linux-2.4.25/drivers/atm/Kconfig linux-2.4.25-atmdd/drivers/atm/Kconfig +--- linux-2.4.25/drivers/atm/Kcnfig 2003-08-25 13:44:41.000000000 +0200 ++++ linux-2.4.25-atmdd/drivers/atm/Kconfig 2004-02-29 22:52:59.000000000 +0100 +@@ -4,6 +4,14 @@ + default y + + if ATM_DRIVERS && NETDEVICES && ATM ++ ++config ATM_DD ++ tristate "ATM loopback" ++ depends on INET && ATM ++ help ++ This is an example atm driver. It does not require any actual ATM ++ hardware. It supports AAL5 and AAL0. Frames are merely looped back ++ to the sender on the same VC they were sent. + + config ATM_DUMMY + tristate "Dummy ATM driver" +diff -urN linux-2.4.25/drivers/atm/atmdd.c linux-2.4.25-atmdd/drivers/atm/atmdd.c +--- linux-2.4.25/drivers/atm/atmdd.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.4.25-atmdd/drivers/atm/atmdd.c 2004-02-29 22:58:11.000000000 +0100 +@@ -0,0 +1,921 @@ ++/* ++####################################################################### ++# ++# (C) Copyright 2001 ++# Alex Zeffertt, Cambridge Broadband Ltd, ajz@cambridgebroadband.com ++# ++# This program is free software; you can redistribute it and/or ++# modify it under the terms of the GNU General Public License as ++# published by the Free Software Foundation; either version 2 of ++# the License, or (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software ++# Foundation, Inc., 59 Temple Place, Suite 330, Boston, ++# MA 02111-1307 USA ++####################################################################### ++# Notes: ++# ++# This is an example atm driver. It does not require any actual ATM ++# hardware. It supports AAL5 and AAL0. frames are merely looped back ++# to the sender on the same VC they were sent. ++# ++####################################################################### ++*/ ++ ++/*############ Includes ###############################################*/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for xtime */ ++ ++/*############ Defines ################################################*/ ++ ++#define MYATMDD "atmdd" ++#define KLOG_PREAMBLE MYATMDD ": " ++#define MYATMDD_VPI_BITS 1 /* Allow ?.1.? but not ?.2.? */ ++#define MYATMDD_VCI_BITS 11 /* Allow ?.?.2047 but not ?.?.2048 */ ++#define MYATMDD_PCR 100000 ++#define RXQ_SZ 16 ++#define TXQ_SZ 16 ++#define AAL5_MTU (1510+8) /* Default AAL5 Maximum Transmission Unit (and length of AAL5 buffers) */ ++#define AAL5_BUFLEN (((AAL5_MTU + 47)/48)*48) /* Round up to n*48 bytes */ ++#if 0 ++# define DEBUG(format,args...) printk(format,##args) ++#else ++# define DEBUG(format,args...) ++#endif ++/*############ Types ##################################################*/ ++ ++/* status flags shared between s/w and emulated h/w */ ++typedef enum { ++ RX_EMPTY, /* No sk_buff present */ ++ RX_FULL, /* sk_buff present and awaiting data */ ++ RX_RECVD, /* sk_buff present and contains valid data */ ++} myatmdd_rxstatus_e; ++ ++/* status flags shared between s/w and emulated h/w */ ++typedef enum { ++ TX_EMPTY, /* No sk_buff present */ ++ TX_FULL, /* sk_buff present and awaiting transmission */ ++ TX_SENT, /* sk_buff present and has been sent */ ++} myatmdd_txstatus_e; ++ ++typedef struct { ++ struct sk_buff **start; ++ struct sk_buff **end; ++ struct sk_buff **head; ++ struct sk_buff **tail; ++ ++ /* everything below this line emulates h/w */ ++ myatmdd_rxstatus_e *status; ++ struct sk_buff **hw_ptr; ++ int *pkt_len; ++ ++} myatmdd_rxq_t; ++ ++typedef struct { ++ struct sk_buff **start; ++ struct sk_buff **end; ++ struct sk_buff **head; ++ struct sk_buff **tail; ++ ++ /* everything below this line emulates h/w */ ++ myatmdd_txstatus_e *status; ++ struct sk_buff **hw_ptr; ++ int *pkt_len; ++ ++} myatmdd_txq_t; ++ ++typedef struct { ++} myatmdd_devdata_t; ++ ++typedef struct { ++ myatmdd_rxq_t rxqueue; ++ myatmdd_txq_t txqueue; ++} myatmdd_vccdata_t; ++ ++/*############ Module paramters #######################################*/ ++ ++MODULE_AUTHOR("Alex Zeffertt, ajz@cambridgebroadband.com"); ++MODULE_DESCRIPTION("Example ATM device driver (loopback)"); ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("GPL"); ++#endif ++/*#################### Forward declarations ###########################*/ ++ ++static void myatmdd_emulate_loopback_hardware(struct atm_vcc *vcc); ++ ++static void myatmdd_free_tx_skb(struct sk_buff *skb); ++ ++/* these functions will need modifying in a real ATM driver */ ++static void myatmdd_rx_interrupt(struct atm_vcc *vcc); ++static void myatmdd_tx_interrupt(struct atm_vcc *vcc); ++ ++/* functions for manipulating circular bufs */ ++static int myatmdd_init_rxq(myatmdd_rxq_t *queue, int size); ++static int myatmdd_init_txq(myatmdd_txq_t *queue, int size); ++static int myatmdd_release_rxq(myatmdd_rxq_t *queue); ++static int myatmdd_release_txq(myatmdd_txq_t *queue); ++static int myatmdd_txq_enqueue(myatmdd_txq_t *queue, struct sk_buff *skb); ++static int myatmdd_rxq_enqueue(myatmdd_rxq_t *queue, struct sk_buff *skb /* empty buffer */); ++static struct sk_buff *myatmdd_txq_dequeue(myatmdd_txq_t *queue); ++static struct sk_buff *myatmdd_rxq_dequeue(myatmdd_rxq_t *queue, int *pkt_len); ++ ++/* myatmdd_ops registered by ATM device */ ++static int myatmdd_open(struct atm_vcc *vcc); ++static void myatmdd_close(struct atm_vcc *vcc); ++static int myatmdd_ioctl(struct atm_dev *dev, unsigned int cmd,void *arg); ++static int myatmdd_setsockopt(struct atm_vcc *vcc,int level,int optname, void *optval,int optlen); ++static int myatmdd_getsockopt(struct atm_vcc *vcc,int level,int optname, void *optval,int optlen); ++static int myatmdd_send(struct atm_vcc *vcc,struct sk_buff *skb); ++static int myatmdd_change_qos(struct atm_vcc *vcc,struct atm_qos *qos,int flgs); ++static int myatmdd_proc_read(struct atm_dev *dev,loff_t *pos,char *page); ++ ++/* myatmdd_phy_ops registered by phy driver */ ++static void myatmdd_phy_int(struct atm_dev *dev); ++static int myatmdd_phy_start(struct atm_dev *dev); /* <-- This is the only thing exported by PHY driver */ ++static int myatmdd_phy_ioctl(struct atm_dev *dev,unsigned int cmd,void *arg); ++ ++/*#################### Global scope variables #########################*/ ++ ++/* operations registered by the atm device */ ++static const struct atmdev_ops myatmdd_ops = ++{ ++ open: myatmdd_open, ++ close: myatmdd_close, ++ ioctl: myatmdd_ioctl, ++ getsockopt: myatmdd_getsockopt, ++ setsockopt: myatmdd_setsockopt, ++ send: myatmdd_send, ++ change_qos: myatmdd_change_qos, ++ proc_read: myatmdd_proc_read, ++ owner: THIS_MODULE, ++}; ++ ++/* operations registered by the phy driver */ ++static const struct atmphy_ops myatmdd_phy_ops = { ++ start: myatmdd_phy_start, ++ ioctl: myatmdd_phy_ioctl, ++ interrupt: myatmdd_phy_int, ++}; ++ ++struct atm_dev *myatmdd_dev; ++ ++/*#################### Function definitions ###########################*/ ++ ++ ++/* ++######################################################### ++# ++# Function : myatmdd_rx_interrupt, and myatmdd_tx_interrupt ++# ++# Purpose : handle interrupt from hardware. In first ++# case this means extract recvd buffers and pass ++# it up protocol stack. In 2nd case this means ++# free the sent buffers. ++# ++# Args : pointer to private data of the VCC concerned ++# ++# Returns : nowt ++# ++# Notes : ++# ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++ ++static void myatmdd_rx_interrupt(struct atm_vcc *vcc) ++{ ++ struct sk_buff *skb; ++ myatmdd_vccdata_t *priv = vcc->dev_data; ++ int pkt_len; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ while ((skb = myatmdd_rxq_dequeue(&priv->rxqueue, &pkt_len))) ++ { ++ struct sk_buff *newskb; ++ struct timeval stamp; ++ ++ /* Get a new skb to replace the one just consumed */ ++ if (!(newskb = dev_alloc_skb(AAL5_BUFLEN))) ++ { ++ atomic_inc(&vcc->stats->rx_err); ++ printk(KERN_ERR KLOG_PREAMBLE "cannot receive packet - out of memory\n"); ++ /* put skb back in rx queue) */ ++ myatmdd_rxq_enqueue(&priv->rxqueue, skb); ++ return; ++ } ++ myatmdd_rxq_enqueue(&priv->rxqueue, newskb); ++ ++ if (!atm_charge (vcc, skb->truesize)) ++ { ++ /* Exceeded memory quota for this vcc ++ * NOTE: if atm_charge succeeds you must then push or accounting will screw up ++ */ ++ dev_kfree_skb(skb); ++ /* &vcc->stats->drop stats incremented in atm_charge */ ++ } ++ else ++ { ++ /* sk_buff passed all sanity checks! */ ++ ++ /* Add received length to socket buffer */ ++ skb_put(skb, pkt_len); ++ ++ /* update device stats */ ++ atomic_inc(&vcc->stats->rx); ++ ++ /* add timestamp for upper layers to use */ ++ do_gettimeofday(&stamp); ++ skb->tstamp = timeval_to_ktime(stamp); ++ ++ /* Point socket buffer at the right VCC before giving to socket layer */ ++ ATM_SKB(skb)->vcc = vcc; ++ ++ /* push socket buffer up to ATM layer */ ++ vcc->push(vcc, skb); ++ } ++ } ++} ++ ++static void myatmdd_tx_interrupt(struct atm_vcc *vcc) ++{ ++ struct sk_buff *skb; ++ myatmdd_vccdata_t *priv = vcc->dev_data; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ while ((skb = myatmdd_txq_dequeue(&priv->txqueue))) ++ { ++ // Update channel stats and free the memory ++ atomic_inc(&vcc->stats->tx); ++ myatmdd_free_tx_skb(skb); ++ } ++} ++ ++/* ++######################################################### ++# ++# Function : myatmdd_emulate_loopback_hardware ++# ++# Purpose : emulate things normally done by hardware ++# i.e. copying tx bufs to rx bufs (we're modelling ++# a loopback system here), calling the tx done ++# interrupt, and calling the rx done interrupt. ++# ++# Args : priv = data private to VCC ++# ++# Returns : nowt ++# ++# Notes : ++# ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++static void myatmdd_emulate_loopback_hardware(struct atm_vcc *vcc) ++{ ++ myatmdd_vccdata_t *priv = vcc->dev_data; ++ struct sk_buff **ptxskb; ++ struct sk_buff **prxskb; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ ptxskb = priv->txqueue.hw_ptr; ++ prxskb = priv->rxqueue.hw_ptr; ++ ++ /* Send each tx buff waiting to go */ ++ while (priv->txqueue.status[ptxskb - priv->txqueue.start] == TX_FULL) ++ { ++ struct sk_buff *txskb = *ptxskb; ++ struct sk_buff *rxskb = *prxskb; ++ int pkt_len = priv->txqueue.pkt_len[ptxskb - priv->txqueue.start]; ++ ++ /* Is there an rx buffer? */ ++ if (priv->rxqueue.status[prxskb - priv->rxqueue.start] == RX_FULL) ++ { ++ /* Yes - Is the length in range? */ ++ if (pkt_len <= AAL5_BUFLEN) ++ { ++ /* Yes - do the copy */ ++ memcpy(rxskb->data, txskb->data,pkt_len); ++ priv->rxqueue.pkt_len[prxskb - priv->rxqueue.start] = pkt_len; ++ ++ /* Indicate rx buffer recvd */ ++ priv->rxqueue.status[prxskb - priv->rxqueue.start] = RX_RECVD; ++ ++ /* increment and maybe wrap rx pointer */ ++ if (++prxskb == priv->rxqueue.end) ++ prxskb = priv->rxqueue.start; ++ priv->rxqueue.hw_ptr = prxskb; ++ } ++ else ++ { ++ /* No - then h/w cannot do a recv */ ++ printk(KERN_ERR KLOG_PREAMBLE "recvd frame too long - discarded\n"); ++ } ++ } ++ else ++ { ++ /* No - then h/w cannot do a recv */ ++ printk(KERN_ERR KLOG_PREAMBLE "no rx buffers available\n"); ++ } ++ ++ /* Indicate tx buffer sent */ ++ priv->txqueue.status[ptxskb - priv->txqueue.start] = TX_SENT; ++ ++ /* increment and maybe wrap tx pointer */ ++ if (++ptxskb == priv->txqueue.end) ++ ptxskb = priv->txqueue.start; ++ priv->txqueue.hw_ptr = ptxskb; ++ ++ /* Call tx ring interrupt handler */ ++ myatmdd_tx_interrupt(vcc); ++ ++ /* Call tx ring interrupt handler */ ++ myatmdd_rx_interrupt(vcc); ++ } ++} ++ ++/* ++######################################################### ++# ++# Function : functions for manipulating circular buffs ++# ++# Purpose : ++# ++# Args : ++# ++# Returns : ++# ++# Notes : ++# ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++ ++static int myatmdd_init_rxq(myatmdd_rxq_t *queue, int size) ++{ ++ /* TODO - cope with kmalloc failure */ ++ struct sk_buff **pskb; ++ int i; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ queue->hw_ptr = queue->head = queue->tail = ++ queue->start = kmalloc(size * sizeof(struct sk_buff *), GFP_KERNEL); ++ queue->end = &queue->start[size]; ++ for (pskb = queue->start; pskb < queue->end; pskb++) ++ *pskb = NULL; ++ ++ queue->status = kmalloc(size * sizeof(myatmdd_rxstatus_e),GFP_KERNEL); ++ for (i = 0; i < size; i++) ++ queue->status[i] = RX_EMPTY; ++ ++ queue->pkt_len = kmalloc(size * sizeof(int),GFP_KERNEL); ++ for (i = 0; i < size; i++) ++ queue->pkt_len[i] = 0; ++ ++ return 0; ++} ++ ++static int myatmdd_init_txq(myatmdd_txq_t *queue, int size) ++{ ++ /* TODO - cope with kmalloc failure */ ++ struct sk_buff **pskb; ++ int i; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ queue->hw_ptr = queue->head = queue->tail = ++ queue->start = kmalloc(size * sizeof(struct sk_buff *), GFP_KERNEL); ++ queue->end = &queue->start[size]; ++ for (pskb = queue->start; pskb < queue->end; pskb++) ++ *pskb = NULL; ++ ++ queue->status = kmalloc(size * sizeof(myatmdd_rxstatus_e),GFP_KERNEL); ++ for (i = 0; i < size; i++) ++ queue->status[i] = TX_EMPTY; ++ ++ queue->pkt_len = kmalloc(size * sizeof(int),GFP_KERNEL); ++ for (i = 0; i < size; i++) ++ queue->pkt_len[i] = 0; ++ ++ return 0; ++} ++ ++static int myatmdd_release_rxq(myatmdd_rxq_t *queue) ++{ ++ struct sk_buff **pskb; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ for (pskb = queue->start; pskb < queue->end; pskb++) ++ { ++ /* Is there an skb here */ ++ if (*pskb == NULL) ++ continue; /* No, so skip this entry in ring */ ++ ++ /* Yes - free it */ ++ dev_kfree_skb(*pskb); ++ } ++ kfree(queue->start); ++ kfree(queue->status); ++ kfree(queue->pkt_len); ++ ++ return 0; ++} ++ ++static int myatmdd_release_txq(myatmdd_txq_t *queue) ++{ ++ struct sk_buff **pskb; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ /* Scan through all TX bd's and cleanup */ ++ for (pskb = queue->start; pskb < queue->end; pskb++) ++ { ++ /* Is this buffer currently unused - i.e. no skb */ ++ if (*pskb == NULL) ++ continue; /* Yes, so ignore it */ ++ ++ /* If we reach here, we have found a socket buffer that ++ * exists in the TX ring and is waiting to be released. ++ */ ++ printk(KERN_WARNING KLOG_PREAMBLE "discarding unsent tx sk_buff\n"); ++ atomic_inc(&ATM_SKB(*pskb)->vcc->stats->tx_err); ++ myatmdd_free_tx_skb(*pskb); ++ } ++ kfree(queue->start); ++ kfree(queue->status); ++ kfree(queue->pkt_len); ++ ++ return 0; ++} ++ ++/* returns non-zero for "out of space" */ ++static int myatmdd_txq_enqueue(myatmdd_txq_t *queue, struct sk_buff *skb) ++{ ++ /* increment head and wrap */ ++ struct sk_buff **newhead = queue->head + 1; ++ if (newhead == queue->end) ++ newhead = queue->start; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ /* abort if tx ring full */ ++ if (newhead == queue->tail) ++ return -1; ++ ++ /* all is okay if we're here */ ++ *queue->head = skb; ++ /* Tell hardware there's a buffer to send */ ++ queue->status[queue->head - queue->start] = TX_FULL; ++ queue->pkt_len[queue->head - queue->start] = skb->len; ++ queue->head = newhead; ++ return 0; ++} ++ ++/* returns non-zero for "out of space" */ ++static int myatmdd_rxq_enqueue(myatmdd_rxq_t *queue, struct sk_buff *skb /* empty buffer */) ++{ ++ /* increment head and wrap */ ++ struct sk_buff **newhead = queue->head + 1; ++ if (newhead == queue->end) ++ newhead = queue->start; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ /* abort if rx ring full */ ++ if (newhead == queue->tail) ++ return -1; ++ ++ /* all is okay if we're here */ ++ *queue->head = skb; ++ /* Tell hardware there's a buffer to send */ ++ queue->status[queue->head - queue->start] = RX_FULL; ++ queue->head = newhead; ++ return 0; ++} ++ ++static struct sk_buff *myatmdd_txq_dequeue(myatmdd_txq_t *queue) ++{ ++ DEBUG("%s\n", __FUNCTION__); ++ if (queue->tail != queue->head && queue->status[queue->tail - queue->start] == TX_SENT) ++ { ++ struct sk_buff *skb = *queue->tail; ++ ++ /* increment tail and wrap */ ++ struct sk_buff **newtail = queue->tail + 1; ++ if (newtail == queue->end) ++ newtail = queue->start; ++ *queue->tail = NULL; ++ queue->status[queue->tail - queue->start] = TX_EMPTY; ++ queue->tail = newtail; ++ return skb; ++ } ++ return NULL; ++} ++ ++/* returns NULL for "no new recvd frames" */ ++static struct sk_buff *myatmdd_rxq_dequeue(myatmdd_rxq_t *queue, int *pkt_len) ++{ ++ DEBUG("%s\n", __FUNCTION__); ++ if (queue->tail != queue->head && queue->status[queue->tail - queue->start] == RX_RECVD) ++ { ++ struct sk_buff *skb = *queue->tail; ++ ++ /* increment tail and wrap */ ++ struct sk_buff **newtail = queue->tail + 1; ++ if (newtail == queue->end) ++ newtail = queue->start; ++ *queue->tail = NULL; ++ queue->status[queue->tail - queue->start] = RX_EMPTY; ++ *pkt_len = queue->pkt_len[queue->tail - queue->start]; ++ queue->tail = newtail; ++ return skb; ++ } ++ return NULL; ++} ++ ++/* ++######################################################### ++# ++# Functions : Implementations of function ptrs in ++# myatmdd_phy_ops. This is the phy driver ++# start: myatmdd_phy_start, ++# ioctl: myatmdd_phy_ioctl, ++# interrupt: myatmdd_phy_int, ++# ++# Purpose : See ATM device driver interface v0.1 ++# ++# Notes : Conforming to Linux ATM device driver i/f ++# interface. Draft version 0.1 ++# ++# Designed to work with multiple devices ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++static int myatmdd_phy_start(struct atm_dev *dev) ++{ ++ /* Provide ATM driver with a pointer via which it ++ * may invoke PHY driver's IOCTL or interrupt ++ * handlers. ++ */ ++ dev->phy = &myatmdd_phy_ops; ++ ++ /* If required allocate phy private data and save ++ * pointer in dev->phy_data; ++ */ ++ ++ /* TODO Initialise PHY hardware... */ ++ ++ return 0; ++} ++ ++/* Should be called by SAR driver when it needs to handle an interrupt ++ * triggered by PHY. ++ */ ++static void myatmdd_phy_int(struct atm_dev *dev) ++{ ++ /* Handle interrupt triggered by PHY */ ++} ++ ++/* Gets called by SAR driver IOCTL handler for IOCTLS it doesn't recognise */ ++static int myatmdd_phy_ioctl(struct atm_dev *dev,unsigned int cmd,void *arg) ++{ ++ switch (cmd) ++ { ++// case SONET_GETSTATZ: ++ default: ++ return -EINVAL; ++ } ++} ++ ++/* ++######################################################### ++# ++# Function : myatmdd_free_tx_skb ++# ++# Purpose : frees an sk_buff. ++# ++# Args : skb=pointer to socket buffer ++# ++# Notes : Tries to use the upper layer pop() function ++# but uses dev_kfree_skb() if this doesn't exist ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++static void myatmdd_free_tx_skb(struct sk_buff *skb) ++{ ++ struct atm_vcc *vcc; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ /* See if we can use the VCC pop function */ ++ if (((vcc = ATM_SKB(skb)->vcc) != NULL) && (vcc->pop != NULL)) ++ { ++ /* Yes, so use ATM socket layer pop function */ ++ vcc->pop(vcc, skb); ++ } ++ else ++ { ++ printk(KERN_WARNING KLOG_PREAMBLE "unable to call skb free function\n"); ++ /* No, so free socket buffer */ ++ dev_kfree_skb(skb); ++ } ++} ++ ++/* ++######################################################### ++# ++# Functions : Implementations of function ptrs in ++# myatmdd_ops. ++# myatmdd_open(), ++# myatmdd_close(), ++# myatmdd_ioctl(), ++# myatmdd_getsockopt(), ++# myatmdd_setsockopt(), ++# myatmdd_send(), ++# myatmdd_sg_send(), ++# myatmdd_change_qos(), ++# myatmdd_proc_read() ++# ++# ++# Purpose : See ATM device driver interface v0.1 ++# ++# Notes : Conforming to Linux ATM device driver i/f ++# interface. Draft version 0.1 ++# ++# Designed to work with multiple devices ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++static int myatmdd_open(struct atm_vcc *vcc) ++{ ++ myatmdd_vccdata_t *priv; ++ int i; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ /* Make sure we are opening a AAL0 or AAL5 connection */ ++ if ((vcc->qos.aal != ATM_AAL5) && (vcc->qos.aal != ATM_AAL0)) ++ { ++ printk(KERN_WARNING KLOG_PREAMBLE "invalid AAL\n"); ++ return -EINVAL; ++ } ++ ++ /* Address is in use */ ++ set_bit(ATM_VF_ADDR, &vcc->flags); ++ ++ /* Allocate some vcc-private memory */ ++ vcc->dev_data = kmalloc(sizeof(myatmdd_vccdata_t), GFP_KERNEL); ++ if (vcc->dev_data == NULL) ++ return -ENOMEM; ++ priv = vcc->dev_data; ++ ++ /* Setup the hardware for new VC... */ ++ ++ /* Do not allow half open VCs - otherwise the example driver will not be able ++ * to loop back frames sent ! ++ */ ++ if (vcc->qos.rxtp.traffic_class == ATM_NONE || vcc->qos.txtp.traffic_class == ATM_NONE) ++ { ++ kfree(vcc->dev_data); ++ return -EPERM; ++ } ++ ++ /* Create rx/tx queues for this VC */ ++ myatmdd_init_txq(&priv->txqueue, TXQ_SZ); ++ myatmdd_init_rxq(&priv->rxqueue, RXQ_SZ); ++ ++ /* Fill rx queue with empty skbuffs */ ++ for (i = 0 ; i < RXQ_SZ - 1; i++) ++ { ++ struct sk_buff *skb = dev_alloc_skb(AAL5_BUFLEN); ++ myatmdd_rxq_enqueue(&priv->rxqueue,skb); ++ } ++ ++ /* Connection is now ready to receive data */ ++ set_bit(ATM_VF_READY, &vcc->flags); ++ ++ return 0; ++} ++ ++static void myatmdd_close(struct atm_vcc *vcc) ++{ ++ myatmdd_vccdata_t *priv = vcc->dev_data; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ /* Indicate channel closed */ ++ clear_bit(ATM_VF_READY, &vcc->flags); ++ ++ /* TODO Uninitialise the hardware for this VC... */ ++ ++ /* empty the rx and tx queues */ ++ myatmdd_release_txq(&priv->txqueue); ++ myatmdd_release_rxq(&priv->rxqueue); ++ ++ /* Free the vcc-private memory */ ++ kfree(vcc->dev_data); ++} ++ ++static int myatmdd_ioctl(struct atm_dev *dev, unsigned int cmd,void *arg) ++{ ++ /* myatmdd does not currently have an ioctl interface so pass ioctl onto PHY */ ++ if (dev->phy && dev->phy->ioctl) { ++ return dev->phy->ioctl(dev, cmd, arg); ++ } ++ return -EINVAL; ++} ++ ++static int myatmdd_getsockopt(struct atm_vcc *vcc,int level,int optname, void *optval,int optlen) ++{ ++ return -EINVAL; ++} ++ ++static int myatmdd_setsockopt(struct atm_vcc *vcc,int level,int optname, void *optval,int optlen) ++{ ++ return -EINVAL; ++} ++ ++/* Note may be called in either process or interrupt context! */ ++static int myatmdd_send(struct atm_vcc *vcc,struct sk_buff *skb) ++{ ++ myatmdd_vccdata_t *priv = vcc->dev_data; ++ ++ DEBUG("%s\n", __FUNCTION__); ++ ++ /* Assign VCC to socket buffer ++ * Note: this must be done before attempting to call ++ * myatmdd_free_tx_skb() as this may use ATM_SKB(skb)->vcc->pop() ++ */ ++ ATM_SKB(skb)->vcc = vcc; ++ ++ /* Setup hardware to send and arrange callback of myatmdd_send_complete... */ ++ ++ /* In this example ATM device driver all VCs are looped back. ++ * So copy to the rxq and emulate an rx interrupt ++ */ ++ ++ /* Can we accept another skb to send ? */ ++ if (myatmdd_txq_enqueue(&priv->txqueue, skb)) ++ { ++ /* No - free socket buffer */ ++ myatmdd_free_tx_skb(skb); ++ ++ /* Update tx channel stats */ ++ atomic_inc(&vcc->stats->tx_err); ++ ++ /* Tell protocol layer to back off */ ++ return(-EBUSY); ++ } ++ ++ /* This is the bit which copies the tx ring to the rx ring, ++ * and triggers emulated rx and tx interrupts ++ */ ++ myatmdd_emulate_loopback_hardware(vcc); ++ ++ return 0; ++} ++ ++static int myatmdd_change_qos(struct atm_vcc *vcc,struct atm_qos *qos,int flgs) ++{ ++ return 0; ++} ++ ++static int myatmdd_proc_read(struct atm_dev *dev,loff_t *pos,char *page) ++{ ++ int left = (int) *pos; ++ ++ if (!left--) ++ return sprintf(page, "1st line of stats\n"); ++ if (!left--) ++ return sprintf(page, "2nd line of stats\n"); ++ if (!left--) ++ return sprintf(page, "3rd line of stats\n"); ++ ++ return 0; ++} ++ ++/* ++######################################################### ++# ++# Function : myatmdd_init ++# ++# Purpose : init the module, init and register the ATM device ++# ++# Args : none ++# ++# Returns : return code ++# ++# Notes : ++# ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++int __init myatmdd_init(void) ++{ ++ myatmdd_devdata_t *priv = kmalloc(sizeof(myatmdd_devdata_t),GFP_KERNEL); ++ ++ if (priv == NULL) ++ return -ENOMEM; ++ ++ /* Register the new device */ ++ myatmdd_dev = atm_dev_register(MYATMDD,&myatmdd_ops,-1,NULL); ++ ++ /* Were we able to register this device? */ ++ if (myatmdd_dev == NULL) ++ { ++ printk(KERN_ERR KLOG_PREAMBLE "failed to register CPM ATM device\n"); ++ return -EPERM; ++ } ++ ++ /* Save pointer to device private data */ ++ myatmdd_dev->dev_data = priv; ++ ++ /* Initialise device parameters */ ++ myatmdd_dev->ci_range.vpi_bits = MYATMDD_VPI_BITS; ++ myatmdd_dev->ci_range.vci_bits = MYATMDD_VCI_BITS; ++ myatmdd_dev->link_rate = MYATMDD_PCR; ++ ++ /* Set up phy device */ ++ myatmdd_phy_start(myatmdd_dev); ++ ++ /* TODO Initialise SAR hardware... */ ++ ++ /* Console output */ ++ printk(KERN_INFO KLOG_PREAMBLE "Initialised\n"); ++ ++ return 0; ++} ++ ++/* NOTE: ++ * module_init() is called by insmod, if built as module, ++ * or by do_initcalls(), if built as a resident driver. ++ */ ++module_init(myatmdd_init); ++ ++/* ++######################################################### ++# ++# Function : myatmdd_exit ++# ++# Purpose : delete module, uninit and dereg ATM device ++# ++# Args : none ++# ++# Returns : none ++# ++# Notes : ++# ++########################################################## ++# Edit history: ++# Who When What ++# AJZ 10Apr03 Created ++########################################################## ++*/ ++ ++#ifdef MODULE ++static void __exit myatmdd_exit(void) ++{ ++ /* Disable SAR hardware... */ ++ ++ /* Console output */ ++ printk(KERN_ERR KLOG_PREAMBLE "Uninitialised\n"); ++ kfree(myatmdd_dev->dev_data); ++ atm_dev_deregister(myatmdd_dev); ++} ++module_exit(myatmdd_exit); ++ ++#endif /* MODULE */ diff --git a/kernel-aufs-support.patch b/kernel-aufs-support.patch index 14f7056e..264339e0 100644 --- a/kernel-aufs-support.patch +++ b/kernel-aufs-support.patch @@ -6,11 +6,14 @@ retrieving revision 1.4 diff -u -p -r1.2 -r1.4 --- linux-2.6.23/fs/namei.c 29 Oct 2007 07:01:03 -0000 1.2 +++ linux-2.6.23/fs/namei.c 29 Oct 2007 07:05:53 -0000 1.4 -@@ -2827,3 +2827,4 @@ EXPORT_SYMBOL(vfs_symlink); +@@ -2893,6 +2893,7 @@ EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); +EXPORT_SYMBOL(deny_write_access); + + /* to be mentioned only in INIT_TASK */ + struct fs_struct init_fs = { Index: linux-2.6.27/fs/ecryptfs/inode.c =================================================================== retrieving revision 1.1 diff --git a/kernel-aufs.patch b/kernel-aufs.patch index 5b406012..d5a226bf 100644 --- a/kernel-aufs.patch +++ b/kernel-aufs.patch @@ -7511,7 +7511,7 @@ diff -urN linux/fs/aufs/export.c linux-aufs/fs/aufs/export.c + if (nsi_lock) + si_read_unlock(parent->d_sb); + path_get(path); -+ file = dentry_open(parent, path->mnt, au_dir_roflags); ++ file = dentry_open(parent, path->mnt, au_dir_roflags, current_cred()); + dentry = (void *)file; + if (IS_ERR(file)) + goto out; @@ -8056,7 +8056,7 @@ diff -urN linux/fs/aufs/file.c linux-aufs/fs/aufs/file.c + if (file && au_test_nfs(h_dentry->d_sb)) + h_file = au_h_intent(dentry, bindex, file); + if (!h_file) -+ h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags); ++ h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags, current_cred()); + + /* + * a dirty trick for handling FMODE_EXEC and deny_write_access(). @@ -28491,7 +28491,7 @@ diff -urN linux/fs/aufs/xino.c linux-aufs/fs/aufs/xino.c + goto out_dput; + } + file = dentry_open(dget(dentry), mntget(base_file->f_vfsmnt), -+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE); ++ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE, current_cred()); + if (IS_ERR(file)) { + AuErr("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file)); + goto out_dput; diff --git a/kernel-autoconf.h b/kernel-autoconf.h index ca46a29a..2f68be0c 100644 --- a/kernel-autoconf.h +++ b/kernel-autoconf.h @@ -1,24 +1,2 @@ -#ifndef __pld_kernel_autoconf_h__ -#define __pld_kernel_autoconf_h__ - -/* - * Define some nasty macros o we can construct the file names - * we want to include - */ - -#if defined(__pld_autoconf_included_file__) -#undef __pld_autoconf_included_file__ -#endif /* __pld_autoconf_included_file__ */ - -#if defined(__KERNEL_SMP) -#include -#define __pld_autoconf_included_file__ 1 -#endif /* __KERNEL_SMP */ - -#if !defined(__pld_autoconf_included_file__) -#include -#else -#undef __pld_autoconf_included_file__ -#endif /* __pld_autoconf_included_file__ */ - -#endif /* __pld_kernel_autoconf_h__ */ +#error Runtime kernel config detection not available. +#error Please use the autoconf-dist.h instead. diff --git a/kernel-bzip2-lzma.patch b/kernel-bzip2-lzma.patch new file mode 100644 index 00000000..76899a6c --- /dev/null +++ b/kernel-bzip2-lzma.patch @@ -0,0 +1,3252 @@ +Linus, + +Please pull the latest bzip2-lzma-for-linus git tree from: + + git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git bzip2-lzma-for-linus + +We are sending this as a separate tree as it affects generic +files as well. + +Highlights: + + - Add kernel image compression mode config options: + + Kernel compression mode + > 1. Gzip (KERNEL_GZIP) (NEW) + 2. Bzip2 (KERNEL_BZIP2) (NEW) + 3. LZMA (KERNEL_LZMA) (NEW) + + Initial ramdisk compressed using gzip (RD_GZIP) [Y/n/?] (NEW) + Initial ramdisk compressed using bzip2 (RD_BZIP2) [N/y/?] (NEW) + Initial ramdisk compressed using lzma (RD_LZMA) [N/y/?] (NEW) + + Built-in initramfs compression mode + > 1. None (INITRAMFS_COMPRESSION_NONE) (NEW) + 2. Gzip (INITRAMFS_COMPRESSION_GZIP) (NEW) + choice[1-2?]: + + ... and matching compression and decompression implementations. + +Risks: + + - This has been a historically problematic topic thus we skipped + the v2.6.28 and v2.6.29 merge windows with it. Boot failures, + panics, initrd decompression problems have been observed and + fixed. + + - On x86 we switch over from lib/inflate.c to zlib - same + functionality but different library. Other architectures are not + affected by that, use of this new facility is opt-in. (This was + the last in-kernel user of lib/inflate.c on x86.) + + - To build a kernel with a different compressor the 'bzip2' or 'lzma' + tools are needed. If the kernel is built with CONFIG_KERNEL_LZMA=y + and the tool is not available the kernel build will fail with a clear + message. This tool is available in all major kernel distros, but + it is not generally a default-installed package. bzip2 is generally + installed by default. + + - There are no known regressions. + + Thanks, + + Ingo + +------------------> +Alain Knaff (8): + bzip2/lzma: library support for gzip, bzip2 and lzma decompression + bzip2/lzma: config and initramfs support for bzip2/lzma decompression + bzip2/lzma: x86 kernel compression support + bzip2/lzma: fix built-in initramfs vs CONFIG_RD_GZIP + bzip2/lzma: fix decompress_inflate.c vs multi-block-with-embedded-filename + bzip2/lzma: don't stop search at first unconfigured compression + bzip2/lzma: don't leave empty files around on failure + bzip2/lzma: make internal initramfs compression configurable + +Cyrill Gorcunov (1): + x86: headers cleanup - boot.h + +H. Peter Anvin (11): + bzip2/lzma: use a table to search for initramfs compression formats + bzip2/lzma: handle failures from bzip2 and lzma correctly + bzip2/lzma: make config machinery an arch configurable + bzip2/lzma: proper Kconfig dependencies for the ramdisk options + bzip2/lzma: DECOMPRESS_GZIP should select ZLIB_INFLATE + bzip2/lzma: move initrd/ramfs options out of BLK_DEV + bzip2/lzma: fix constant in decompress_inflate + bzip2/lzma: centralize format detection + bzip2/lzma: comprehensible error messages for missing decompressor + init: make initrd/initramfs decompression failure a KERN_EMERG event + bzip2/lzma: update boot protocol specification + +Ingo Molnar (1): + bzip2/lzma: make flush_buffer() unconditional + + + Documentation/x86/boot.txt | 5 +- + arch/x86/Kconfig | 3 + + arch/x86/boot/compressed/Makefile | 21 +- + arch/x86/boot/compressed/misc.c | 118 +----- + arch/x86/include/asm/boot.h | 16 +- + include/linux/decompress/bunzip2.h | 10 + + include/linux/decompress/generic.h | 33 ++ + include/linux/decompress/inflate.h | 13 + + include/linux/decompress/mm.h | 87 +++++ + include/linux/decompress/unlzma.h | 12 + + init/Kconfig | 60 +++ + init/do_mounts_rd.c | 178 +++------- + init/initramfs.c | 122 ++----- + lib/Kconfig | 14 + + lib/Makefile | 7 +- + lib/decompress.c | 54 +++ + lib/decompress_bunzip2.c | 735 ++++++++++++++++++++++++++++++++++++ + lib/decompress_inflate.c | 167 ++++++++ + lib/decompress_unlzma.c | 647 +++++++++++++++++++++++++++++++ + lib/zlib_inflate/inflate.h | 4 + + lib/zlib_inflate/inftrees.h | 4 + + scripts/Makefile.lib | 14 + + scripts/bin_size | 10 + + scripts/gen_initramfs_list.sh | 18 +- + usr/Kconfig | 89 +++++ + usr/Makefile | 36 ++- + usr/initramfs_data.S | 2 +- + usr/initramfs_data.bz2.S | 29 ++ + usr/initramfs_data.gz.S | 29 ++ + usr/initramfs_data.lzma.S | 29 ++ + 30 files changed, 2222 insertions(+), 344 deletions(-) + create mode 100644 include/linux/decompress/bunzip2.h + create mode 100644 include/linux/decompress/generic.h + create mode 100644 include/linux/decompress/inflate.h + create mode 100644 include/linux/decompress/mm.h + create mode 100644 include/linux/decompress/unlzma.h + create mode 100644 lib/decompress.c + create mode 100644 lib/decompress_bunzip2.c + create mode 100644 lib/decompress_inflate.c + create mode 100644 lib/decompress_unlzma.c + create mode 100644 scripts/bin_size + create mode 100644 usr/initramfs_data.bz2.S + create mode 100644 usr/initramfs_data.gz.S + create mode 100644 usr/initramfs_data.lzma.S + +diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt +index 7b4596a..d05730e 100644 +--- a/Documentation/x86/boot.txt ++++ b/Documentation/x86/boot.txt +@@ -542,7 +542,10 @@ Protocol: 2.08+ + + The payload may be compressed. The format of both the compressed and + uncompressed data should be determined using the standard magic +- numbers. Currently only gzip compressed ELF is used. ++ numbers. The currently supported compression formats are gzip ++ (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A) and LZMA ++ (magic number 5D 00). The uncompressed payload is currently always ELF ++ (magic number 7F 45 4C 46). + + Field name: payload_length + Type: read +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index bc2fbad..a233768 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -40,6 +40,9 @@ config X86 + select HAVE_GENERIC_DMA_COHERENT if X86_32 + select HAVE_EFFICIENT_UNALIGNED_ACCESS + select USER_STACKTRACE_SUPPORT ++ select HAVE_KERNEL_GZIP ++ select HAVE_KERNEL_BZIP2 ++ select HAVE_KERNEL_LZMA + + config ARCH_DEFCONFIG + string +diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile +index 1771c80..3ca4c19 100644 +--- a/arch/x86/boot/compressed/Makefile ++++ b/arch/x86/boot/compressed/Makefile +@@ -4,7 +4,7 @@ + # create a compressed vmlinux image from the original vmlinux + # + +-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o ++targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o + + KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 + KBUILD_CFLAGS += -fno-strict-aliasing -fPIC +@@ -47,18 +47,35 @@ ifeq ($(CONFIG_X86_32),y) + ifdef CONFIG_RELOCATABLE + $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,gzip) ++$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE ++ $(call if_changed,bzip2) ++$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE ++ $(call if_changed,lzma) + else + $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) ++$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE ++ $(call if_changed,bzip2) ++$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE ++ $(call if_changed,lzma) + endif + LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T + + else ++ + $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) ++$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE ++ $(call if_changed,bzip2) ++$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE ++ $(call if_changed,lzma) + + LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T + endif + +-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE ++suffix_$(CONFIG_KERNEL_GZIP) = gz ++suffix_$(CONFIG_KERNEL_BZIP2) = bz2 ++suffix_$(CONFIG_KERNEL_LZMA) = lzma ++ ++$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE + $(call if_changed,ld) +diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c +index da06221..e45be73 100644 +--- a/arch/x86/boot/compressed/misc.c ++++ b/arch/x86/boot/compressed/misc.c +@@ -116,71 +116,13 @@ + /* + * gzip declarations + */ +- +-#define OF(args) args + #define STATIC static + + #undef memset + #undef memcpy + #define memzero(s, n) memset((s), 0, (n)) + +-typedef unsigned char uch; +-typedef unsigned short ush; +-typedef unsigned long ulg; +- +-/* +- * Window size must be at least 32k, and a power of two. +- * We don't actually have a window just a huge output buffer, +- * so we report a 2G window size, as that should always be +- * larger than our output buffer: +- */ +-#define WSIZE 0x80000000 +- +-/* Input buffer: */ +-static unsigned char *inbuf; +- +-/* Sliding window buffer (and final output buffer): */ +-static unsigned char *window; +- +-/* Valid bytes in inbuf: */ +-static unsigned insize; +- +-/* Index of next byte to be processed in inbuf: */ +-static unsigned inptr; +- +-/* Bytes in output buffer: */ +-static unsigned outcnt; +- +-/* gzip flag byte */ +-#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ +-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gz file */ +-#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +-#define ORIG_NAM 0x08 /* bit 3 set: original file name present */ +-#define COMMENT 0x10 /* bit 4 set: file comment present */ +-#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ +-#define RESERVED 0xC0 /* bit 6, 7: reserved */ +- +-#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) +- +-/* Diagnostic functions */ +-#ifdef DEBUG +-# define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0) +-# define Trace(x) do { fprintf x; } while (0) +-# define Tracev(x) do { if (verbose) fprintf x ; } while (0) +-# define Tracevv(x) do { if (verbose > 1) fprintf x ; } while (0) +-# define Tracec(c, x) do { if (verbose && (c)) fprintf x ; } while (0) +-# define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0) +-#else +-# define Assert(cond, msg) +-# define Trace(x) +-# define Tracev(x) +-# define Tracevv(x) +-# define Tracec(c, x) +-# define Tracecv(c, x) +-#endif + +-static int fill_inbuf(void); +-static void flush_window(void); + static void error(char *m); + + /* +@@ -189,13 +131,8 @@ static void error(char *m); + static struct boot_params *real_mode; /* Pointer to real-mode data */ + static int quiet; + +-extern unsigned char input_data[]; +-extern int input_len; +- +-static long bytes_out; +- + static void *memset(void *s, int c, unsigned n); +-static void *memcpy(void *dest, const void *src, unsigned n); ++void *memcpy(void *dest, const void *src, unsigned n); + + static void __putstr(int, const char *); + #define putstr(__x) __putstr(0, __x) +@@ -213,7 +150,17 @@ static char *vidmem; + static int vidport; + static int lines, cols; + +-#include "../../../../lib/inflate.c" ++#ifdef CONFIG_KERNEL_GZIP ++#include "../../../../lib/decompress_inflate.c" ++#endif ++ ++#ifdef CONFIG_KERNEL_BZIP2 ++#include "../../../../lib/decompress_bunzip2.c" ++#endif ++ ++#ifdef CONFIG_KERNEL_LZMA ++#include "../../../../lib/decompress_unlzma.c" ++#endif + + static void scroll(void) + { +@@ -282,7 +229,7 @@ static void *memset(void *s, int c, unsigned n) + return s; + } + +-static void *memcpy(void *dest, const void *src, unsigned n) ++void *memcpy(void *dest, const void *src, unsigned n) + { + int i; + const char *s = src; +@@ -293,38 +240,6 @@ static void *memcpy(void *dest, const void *src, unsigned n) + return dest; + } + +-/* =========================================================================== +- * Fill the input buffer. This is called only when the buffer is empty +- * and at least one byte is really needed. +- */ +-static int fill_inbuf(void) +-{ +- error("ran out of input data"); +- return 0; +-} +- +-/* =========================================================================== +- * Write the output window window[0..outcnt-1] and update crc and bytes_out. +- * (Used for the decompressed data only.) +- */ +-static void flush_window(void) +-{ +- /* With my window equal to my output buffer +- * I only need to compute the crc here. +- */ +- unsigned long c = crc; /* temporary variable */ +- unsigned n; +- unsigned char *in, ch; +- +- in = window; +- for (n = 0; n < outcnt; n++) { +- ch = *in++; +- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); +- } +- crc = c; +- bytes_out += (unsigned long)outcnt; +- outcnt = 0; +-} + + static void error(char *x) + { +@@ -407,12 +322,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, + lines = real_mode->screen_info.orig_video_lines; + cols = real_mode->screen_info.orig_video_cols; + +- window = output; /* Output buffer (Normally at 1M) */ + free_mem_ptr = heap; /* Heap */ + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; +- inbuf = input_data; /* Input buffer */ +- insize = input_len; +- inptr = 0; + + #ifdef CONFIG_X86_64 + if ((unsigned long)output & (__KERNEL_ALIGN - 1)) +@@ -430,10 +341,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, + #endif + #endif + +- makecrc(); + if (!quiet) + putstr("\nDecompressing Linux... "); +- gunzip(); ++ decompress(input_data, input_len, NULL, NULL, output, NULL, error); + parse_elf(output); + if (!quiet) + putstr("done.\nBooting the kernel.\n"); +diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h +index dd61616..6526cf0 100644 +--- a/arch/x86/include/asm/boot.h ++++ b/arch/x86/include/asm/boot.h +@@ -10,17 +10,31 @@ + #define EXTENDED_VGA 0xfffe /* 80x50 mode */ + #define ASK_VGA 0xfffd /* ask for it at bootup */ + ++#ifdef __KERNEL__ ++ + /* Physical address where kernel should be loaded. */ + #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + ++#ifdef CONFIG_KERNEL_BZIP2 ++#define BOOT_HEAP_SIZE 0x400000 ++#else /* !CONFIG_KERNEL_BZIP2 */ ++ + #ifdef CONFIG_X86_64 + #define BOOT_HEAP_SIZE 0x7000 +-#define BOOT_STACK_SIZE 0x4000 + #else + #define BOOT_HEAP_SIZE 0x4000 ++#endif ++ ++#endif /* !CONFIG_KERNEL_BZIP2 */ ++ ++#ifdef CONFIG_X86_64 ++#define BOOT_STACK_SIZE 0x4000 ++#else + #define BOOT_STACK_SIZE 0x1000 + #endif + ++#endif /* __KERNEL__ */ ++ + #endif /* _ASM_X86_BOOT_H */ +diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h +new file mode 100644 +index 0000000..1152721 +--- /dev/null ++++ b/include/linux/decompress/bunzip2.h +@@ -0,0 +1,10 @@ ++#ifndef DECOMPRESS_BUNZIP2_H ++#define DECOMPRESS_BUNZIP2_H ++ ++int bunzip2(unsigned char *inbuf, int len, ++ int(*fill)(void*, unsigned int), ++ int(*flush)(void*, unsigned int), ++ unsigned char *output, ++ int *pos, ++ void(*error)(char *x)); ++#endif +diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h +new file mode 100644 +index 0000000..6dfb856 +--- /dev/null ++++ b/include/linux/decompress/generic.h +@@ -0,0 +1,33 @@ ++#ifndef DECOMPRESS_GENERIC_H ++#define DECOMPRESS_GENERIC_H ++ ++/* Minimal chunksize to be read. ++ *Bzip2 prefers at least 4096 ++ *Lzma prefers 0x10000 */ ++#define COMPR_IOBUF_SIZE 4096 ++ ++typedef int (*decompress_fn) (unsigned char *inbuf, int len, ++ int(*fill)(void*, unsigned int), ++ int(*writebb)(void*, unsigned int), ++ unsigned char *output, ++ int *posp, ++ void(*error)(char *x)); ++ ++/* inbuf - input buffer ++ *len - len of pre-read data in inbuf ++ *fill - function to fill inbuf if empty ++ *writebb - function to write out outbug ++ *posp - if non-null, input position (number of bytes read) will be ++ * returned here ++ * ++ *If len != 0, the inbuf is initialized (with as much data), and fill ++ *should not be called ++ *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE ++ *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE ++ */ ++ ++/* Utility routine to detect the decompression method */ ++decompress_fn decompress_method(const unsigned char *inbuf, int len, ++ const char **name); ++ ++#endif +diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h +new file mode 100644 +index 0000000..f9b06cc +--- /dev/null ++++ b/include/linux/decompress/inflate.h +@@ -0,0 +1,13 @@ ++#ifndef INFLATE_H ++#define INFLATE_H ++ ++/* Other housekeeping constants */ ++#define INBUFSIZ 4096 ++ ++int gunzip(unsigned char *inbuf, int len, ++ int(*fill)(void*, unsigned int), ++ int(*flush)(void*, unsigned int), ++ unsigned char *output, ++ int *pos, ++ void(*error_fn)(char *x)); ++#endif +diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h +new file mode 100644 +index 0000000..12ff8c3 +--- /dev/null ++++ b/include/linux/decompress/mm.h +@@ -0,0 +1,87 @@ ++/* ++ * linux/compr_mm.h ++ * ++ * Memory management for pre-boot and ramdisk uncompressors ++ * ++ * Authors: Alain Knaff ++ * ++ */ ++ ++#ifndef DECOMPR_MM_H ++#define DECOMPR_MM_H ++ ++#ifdef STATIC ++ ++/* Code active when included from pre-boot environment: */ ++ ++/* A trivial malloc implementation, adapted from ++ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 ++ */ ++static unsigned long malloc_ptr; ++static int malloc_count; ++ ++static void *malloc(int size) ++{ ++ void *p; ++ ++ if (size < 0) ++ error("Malloc error"); ++ if (!malloc_ptr) ++ malloc_ptr = free_mem_ptr; ++ ++ malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ ++ ++ p = (void *)malloc_ptr; ++ malloc_ptr += size; ++ ++ if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) ++ error("Out of memory"); ++ ++ malloc_count++; ++ return p; ++} ++ ++static void free(void *where) ++{ ++ malloc_count--; ++ if (!malloc_count) ++ malloc_ptr = free_mem_ptr; ++} ++ ++#define large_malloc(a) malloc(a) ++#define large_free(a) free(a) ++ ++#define set_error_fn(x) ++ ++#define INIT ++ ++#else /* STATIC */ ++ ++/* Code active when compiled standalone for use when loading ramdisk: */ ++ ++#include ++#include ++#include ++#include ++ ++/* Use defines rather than static inline in order to avoid spurious ++ * warnings when not needed (indeed large_malloc / large_free are not ++ * needed by inflate */ ++ ++#define malloc(a) kmalloc(a, GFP_KERNEL) ++#define free(a) kfree(a) ++ ++#define large_malloc(a) vmalloc(a) ++#define large_free(a) vfree(a) ++ ++static void(*error)(char *m); ++#define set_error_fn(x) error = x; ++ ++#define INIT __init ++#define STATIC ++ ++#include ++ ++#endif /* STATIC */ ++ ++#endif /* DECOMPR_MM_H */ +diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h +new file mode 100644 +index 0000000..7796538 +--- /dev/null ++++ b/include/linux/decompress/unlzma.h +@@ -0,0 +1,12 @@ ++#ifndef DECOMPRESS_UNLZMA_H ++#define DECOMPRESS_UNLZMA_H ++ ++int unlzma(unsigned char *, int, ++ int(*fill)(void*, unsigned int), ++ int(*flush)(void*, unsigned int), ++ unsigned char *output, ++ int *posp, ++ void(*error)(char *x) ++ ); ++ ++#endif +diff --git a/init/Kconfig b/init/Kconfig +index 6a5c5fe..38396ec 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -101,6 +101,66 @@ config LOCALVERSION_AUTO + + which is done within the script "scripts/setlocalversion".) + ++config HAVE_KERNEL_GZIP ++ bool ++ ++config HAVE_KERNEL_BZIP2 ++ bool ++ ++config HAVE_KERNEL_LZMA ++ bool ++ ++choice ++ prompt "Kernel compression mode" ++ default KERNEL_GZIP ++ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA ++ help ++ The linux kernel is a kind of self-extracting executable. ++ Several compression algorithms are available, which differ ++ in efficiency, compression and decompression speed. ++ Compression speed is only relevant when building a kernel. ++ Decompression speed is relevant at each boot. ++ ++ If you have any problems with bzip2 or lzma compressed ++ kernels, mail me (Alain Knaff) . (An older ++ version of this functionality (bzip2 only), for 2.4, was ++ supplied by Christian Ludwig) ++ ++ High compression options are mostly useful for users, who ++ are low on disk space (embedded systems), but for whom ram ++ size matters less. ++ ++ If in doubt, select 'gzip' ++ ++config KERNEL_GZIP ++ bool "Gzip" ++ depends on HAVE_KERNEL_GZIP ++ help ++ The old and tried gzip compression. Its compression ratio is ++ the poorest among the 3 choices; however its speed (both ++ compression and decompression) is the fastest. ++ ++config KERNEL_BZIP2 ++ bool "Bzip2" ++ depends on HAVE_KERNEL_BZIP2 ++ help ++ Its compression ratio and speed is intermediate. ++ Decompression speed is slowest among the three. The kernel ++ size is about 10% smaller with bzip2, in comparison to gzip. ++ Bzip2 uses a large amount of memory. For modern kernels you ++ will need at least 8MB RAM or more for booting. ++ ++config KERNEL_LZMA ++ bool "LZMA" ++ depends on HAVE_KERNEL_LZMA ++ help ++ The most recent compression algorithm. ++ Its ratio is best, decompression speed is between the other ++ two. Compression is slowest. The kernel size is about 33% ++ smaller with LZMA in comparison to gzip. ++ ++endchoice ++ + config SWAP + bool "Support for paging of anonymous memory (swap)" + depends on MMU && BLOCK +diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c +index 0f0f0cf..027a402 100644 +--- a/init/do_mounts_rd.c ++++ b/init/do_mounts_rd.c +@@ -11,6 +11,9 @@ + #include "do_mounts.h" + #include "../fs/squashfs/squashfs_fs.h" + ++#include ++ ++ + int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ + + static int __init prompt_ramdisk(char *str) +@@ -29,7 +32,7 @@ static int __init ramdisk_start_setup(char *str) + } + __setup("ramdisk_start=", ramdisk_start_setup); + +-static int __init crd_load(int in_fd, int out_fd); ++static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); + + /* + * This routine tries to find a RAM disk image to load, and returns the +@@ -38,15 +41,15 @@ static int __init crd_load(int in_fd, int out_fd); + * numbers could not be found. + * + * We currently check for the following magic numbers: +- * minix +- * ext2 ++ * minix ++ * ext2 + * romfs + * cramfs + * squashfs +- * gzip ++ * gzip + */ +-static int __init +-identify_ramdisk_image(int fd, int start_block) ++static int __init ++identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) + { + const int size = 512; + struct minix_super_block *minixsb; +@@ -56,6 +59,7 @@ identify_ramdisk_image(int fd, int start_block) + struct squashfs_super_block *squashfsb; + int nblocks = -1; + unsigned char *buf; ++ const char *compress_name; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) +@@ -69,18 +73,19 @@ identify_ramdisk_image(int fd, int start_block) + memset(buf, 0xe5, size); + + /* +- * Read block 0 to test for gzipped kernel ++ * Read block 0 to test for compressed kernel + */ + sys_lseek(fd, start_block * BLOCK_SIZE, 0); + sys_read(fd, buf, size); + +- /* +- * If it matches the gzip magic numbers, return 0 +- */ +- if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { +- printk(KERN_NOTICE +- "RAMDISK: Compressed image found at block %d\n", +- start_block); ++ *decompressor = decompress_method(buf, size, &compress_name); ++ if (compress_name) { ++ printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n", ++ compress_name, start_block); ++ if (!*decompressor) ++ printk(KERN_EMERG ++ "RAMDISK: %s decompressor not configured!\n", ++ compress_name); + nblocks = 0; + goto done; + } +@@ -142,7 +147,7 @@ identify_ramdisk_image(int fd, int start_block) + printk(KERN_NOTICE + "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n", + start_block); +- ++ + done: + sys_lseek(fd, start_block * BLOCK_SIZE, 0); + kfree(buf); +@@ -157,6 +162,7 @@ int __init rd_load_image(char *from) + int nblocks, i, disk; + char *buf = NULL; + unsigned short rotate = 0; ++ decompress_fn decompressor = NULL; + #if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) + char rotator[4] = { '|' , '/' , '-' , '\\' }; + #endif +@@ -169,12 +175,12 @@ int __init rd_load_image(char *from) + if (in_fd < 0) + goto noclose_input; + +- nblocks = identify_ramdisk_image(in_fd, rd_image_start); ++ nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); + if (nblocks < 0) + goto done; + + if (nblocks == 0) { +- if (crd_load(in_fd, out_fd) == 0) ++ if (crd_load(in_fd, out_fd, decompressor) == 0) + goto successful_load; + goto done; + } +@@ -200,7 +206,7 @@ int __init rd_load_image(char *from) + nblocks, rd_blocks); + goto done; + } +- ++ + /* + * OK, time to copy in the data + */ +@@ -273,138 +279,48 @@ int __init rd_load_disk(int n) + return rd_load_image("/dev/root"); + } + +-/* +- * gzip declarations +- */ +- +-#define OF(args) args +- +-#ifndef memzero +-#define memzero(s, n) memset ((s), 0, (n)) +-#endif +- +-typedef unsigned char uch; +-typedef unsigned short ush; +-typedef unsigned long ulg; +- +-#define INBUFSIZ 4096 +-#define WSIZE 0x8000 /* window size--must be a power of two, and */ +- /* at least 32K for zip's deflate method */ +- +-static uch *inbuf; +-static uch *window; +- +-static unsigned insize; /* valid bytes in inbuf */ +-static unsigned inptr; /* index of next byte to be processed in inbuf */ +-static unsigned outcnt; /* bytes in output buffer */ + static int exit_code; +-static int unzip_error; +-static long bytes_out; ++static int decompress_error; + static int crd_infd, crd_outfd; + +-#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) +- +-/* Diagnostic functions (stubbed out) */ +-#define Assert(cond,msg) +-#define Trace(x) +-#define Tracev(x) +-#define Tracevv(x) +-#define Tracec(c,x) +-#define Tracecv(c,x) +- +-#define STATIC static +-#define INIT __init +- +-static int __init fill_inbuf(void); +-static void __init flush_window(void); +-static void __init error(char *m); +- +-#define NO_INFLATE_MALLOC +- +-#include "../lib/inflate.c" +- +-/* =========================================================================== +- * Fill the input buffer. This is called only when the buffer is empty +- * and at least one byte is really needed. +- * Returning -1 does not guarantee that gunzip() will ever return. +- */ +-static int __init fill_inbuf(void) ++static int __init compr_fill(void *buf, unsigned int len) + { +- if (exit_code) return -1; +- +- insize = sys_read(crd_infd, inbuf, INBUFSIZ); +- if (insize == 0) { +- error("RAMDISK: ran out of compressed data"); +- return -1; +- } +- +- inptr = 1; +- +- return inbuf[0]; ++ int r = sys_read(crd_infd, buf, len); ++ if (r < 0) ++ printk(KERN_ERR "RAMDISK: error while reading compressed data"); ++ else if (r == 0) ++ printk(KERN_ERR "RAMDISK: EOF while reading compressed data"); ++ return r; + } + +-/* =========================================================================== +- * Write the output window window[0..outcnt-1] and update crc and bytes_out. +- * (Used for the decompressed data only.) +- */ +-static void __init flush_window(void) ++static int __init compr_flush(void *window, unsigned int outcnt) + { +- ulg c = crc; /* temporary variable */ +- unsigned n, written; +- uch *in, ch; +- +- written = sys_write(crd_outfd, window, outcnt); +- if (written != outcnt && unzip_error == 0) { +- printk(KERN_ERR "RAMDISK: incomplete write (%d != %d) %ld\n", +- written, outcnt, bytes_out); +- unzip_error = 1; +- } +- in = window; +- for (n = 0; n < outcnt; n++) { +- ch = *in++; +- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); +- } +- crc = c; +- bytes_out += (ulg)outcnt; +- outcnt = 0; ++ int written = sys_write(crd_outfd, window, outcnt); ++ if (written != outcnt) { ++ if (decompress_error == 0) ++ printk(KERN_ERR ++ "RAMDISK: incomplete write (%d != %d)\n", ++ written, outcnt); ++ decompress_error = 1; ++ return -1; ++ } ++ return outcnt; + } + + static void __init error(char *x) + { + printk(KERN_ERR "%s\n", x); + exit_code = 1; +- unzip_error = 1; ++ decompress_error = 1; + } + +-static int __init crd_load(int in_fd, int out_fd) ++static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) + { + int result; +- +- insize = 0; /* valid bytes in inbuf */ +- inptr = 0; /* index of next byte to be processed in inbuf */ +- outcnt = 0; /* bytes in output buffer */ +- exit_code = 0; +- bytes_out = 0; +- crc = (ulg)0xffffffffL; /* shift register contents */ +- + crd_infd = in_fd; + crd_outfd = out_fd; +- inbuf = kmalloc(INBUFSIZ, GFP_KERNEL); +- if (!inbuf) { +- printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n"); +- return -1; +- } +- window = kmalloc(WSIZE, GFP_KERNEL); +- if (!window) { +- printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n"); +- kfree(inbuf); +- return -1; +- } +- makecrc(); +- result = gunzip(); +- if (unzip_error) ++ result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error); ++ if (decompress_error) + result = 1; +- kfree(inbuf); +- kfree(window); + return result; + } +diff --git a/init/initramfs.c b/init/initramfs.c +index d9c941c..7dcde7e 100644 +--- a/init/initramfs.c ++++ b/init/initramfs.c +@@ -390,11 +390,13 @@ static int __init write_buffer(char *buf, unsigned len) + return len - count; + } + +-static void __init flush_buffer(char *buf, unsigned len) ++static int __init flush_buffer(void *bufv, unsigned len) + { ++ char *buf = (char *) bufv; + int written; ++ int origLen = len; + if (message) +- return; ++ return -1; + while ((written = write_buffer(buf, len)) < len && !message) { + char c = buf[written]; + if (c == '0') { +@@ -408,84 +410,28 @@ static void __init flush_buffer(char *buf, unsigned len) + } else + error("junk in compressed archive"); + } ++ return origLen; + } + +-/* +- * gzip declarations +- */ ++static unsigned my_inptr; /* index of next byte to be processed in inbuf */ + +-#define OF(args) args +- +-#ifndef memzero +-#define memzero(s, n) memset ((s), 0, (n)) +-#endif +- +-typedef unsigned char uch; +-typedef unsigned short ush; +-typedef unsigned long ulg; +- +-#define WSIZE 0x8000 /* window size--must be a power of two, and */ +- /* at least 32K for zip's deflate method */ +- +-static uch *inbuf; +-static uch *window; +- +-static unsigned insize; /* valid bytes in inbuf */ +-static unsigned inptr; /* index of next byte to be processed in inbuf */ +-static unsigned outcnt; /* bytes in output buffer */ +-static long bytes_out; +- +-#define get_byte() (inptr < insize ? inbuf[inptr++] : -1) +- +-/* Diagnostic functions (stubbed out) */ +-#define Assert(cond,msg) +-#define Trace(x) +-#define Tracev(x) +-#define Tracevv(x) +-#define Tracec(c,x) +-#define Tracecv(c,x) +- +-#define STATIC static +-#define INIT __init +- +-static void __init flush_window(void); +-static void __init error(char *m); +- +-#define NO_INFLATE_MALLOC +- +-#include "../lib/inflate.c" +- +-/* =========================================================================== +- * Write the output window window[0..outcnt-1] and update crc and bytes_out. +- * (Used for the decompressed data only.) +- */ +-static void __init flush_window(void) +-{ +- ulg c = crc; /* temporary variable */ +- unsigned n; +- uch *in, ch; +- +- flush_buffer(window, outcnt); +- in = window; +- for (n = 0; n < outcnt; n++) { +- ch = *in++; +- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); +- } +- crc = c; +- bytes_out += (ulg)outcnt; +- outcnt = 0; +-} ++#include + + static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) + { + int written; ++ decompress_fn decompress; ++ const char *compress_name; ++ static __initdata char msg_buf[64]; ++ + dry_run = check_only; + header_buf = kmalloc(110, GFP_KERNEL); + symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); + name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); +- window = kmalloc(WSIZE, GFP_KERNEL); +- if (!window || !header_buf || !symlink_buf || !name_buf) ++ ++ if (!header_buf || !symlink_buf || !name_buf) + panic("can't allocate buffers"); ++ + state = Start; + this_header = 0; + message = NULL; +@@ -505,22 +451,25 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) + continue; + } + this_header = 0; +- insize = len; +- inbuf = buf; +- inptr = 0; +- outcnt = 0; /* bytes in output buffer */ +- bytes_out = 0; +- crc = (ulg)0xffffffffL; /* shift register contents */ +- makecrc(); +- gunzip(); ++ decompress = decompress_method(buf, len, &compress_name); ++ if (decompress) ++ decompress(buf, len, NULL, flush_buffer, NULL, ++ &my_inptr, error); ++ else if (compress_name) { ++ if (!message) { ++ snprintf(msg_buf, sizeof msg_buf, ++ "compression method %s not configured", ++ compress_name); ++ message = msg_buf; ++ } ++ } + if (state != Reset) +- error("junk in gzipped archive"); +- this_header = saved_offset + inptr; +- buf += inptr; +- len -= inptr; ++ error("junk in compressed archive"); ++ this_header = saved_offset + my_inptr; ++ buf += my_inptr; ++ len -= my_inptr; + } + dir_utime(); +- kfree(window); + kfree(name_buf); + kfree(symlink_buf); + kfree(header_buf); +@@ -579,7 +528,7 @@ static int __init populate_rootfs(void) + char *err = unpack_to_rootfs(__initramfs_start, + __initramfs_end - __initramfs_start, 0); + if (err) +- panic(err); ++ panic(err); /* Failed to decompress INTERNAL initramfs */ + if (initrd_start) { + #ifdef CONFIG_BLK_DEV_RAM + int fd; +@@ -605,9 +554,12 @@ static int __init populate_rootfs(void) + printk(KERN_INFO "Unpacking initramfs..."); + err = unpack_to_rootfs((char *)initrd_start, + initrd_end - initrd_start, 0); +- if (err) +- panic(err); +- printk(" done\n"); ++ if (err) { ++ printk(" failed!\n"); ++ printk(KERN_EMERG "%s\n", err); ++ } else { ++ printk(" done\n"); ++ } + free_initrd(); + #endif + } +diff --git a/lib/Kconfig b/lib/Kconfig +index 03c2c24..daa4818 100644 +--- a/lib/Kconfig ++++ b/lib/Kconfig +@@ -98,6 +98,20 @@ config LZO_DECOMPRESS + tristate + + # ++# These all provide a common interface (hence the apparent duplication with ++# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) ++# ++config DECOMPRESS_GZIP ++ select ZLIB_INFLATE ++ tristate ++ ++config DECOMPRESS_BZIP2 ++ tristate ++ ++config DECOMPRESS_LZMA ++ tristate ++ ++# + # Generic allocator support is selected if needed + # + config GENERIC_ALLOCATOR +diff --git a/lib/Makefile b/lib/Makefile +index 32b0e64..790de7c 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ + rbtree.o radix-tree.o dump_stack.o \ + idr.o int_sqrt.o extable.o prio_tree.o \ + sha1.o irq_regs.o reciprocal_div.o argv_split.o \ +- proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o ++ proportions.o prio_heap.o ratelimit.o show_mem.o \ ++ is_single_threaded.o decompress.o + + lib-$(CONFIG_MMU) += ioremap.o + lib-$(CONFIG_SMP) += cpumask.o +@@ -65,6 +66,10 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ + obj-$(CONFIG_LZO_COMPRESS) += lzo/ + obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ + ++lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o ++lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o ++lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o ++ + obj-$(CONFIG_TEXTSEARCH) += textsearch.o + obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o + obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o +diff --git a/lib/decompress.c b/lib/decompress.c +new file mode 100644 +index 0000000..d2842f5 +--- /dev/null ++++ b/lib/decompress.c +@@ -0,0 +1,54 @@ ++/* ++ * decompress.c ++ * ++ * Detect the decompression method based on magic number ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++ ++#ifndef CONFIG_DECOMPRESS_GZIP ++# define gunzip NULL ++#endif ++#ifndef CONFIG_DECOMPRESS_BZIP2 ++# define bunzip2 NULL ++#endif ++#ifndef CONFIG_DECOMPRESS_LZMA ++# define unlzma NULL ++#endif ++ ++static const struct compress_format { ++ unsigned char magic[2]; ++ const char *name; ++ decompress_fn decompressor; ++} compressed_formats[] = { ++ { {037, 0213}, "gzip", gunzip }, ++ { {037, 0236}, "gzip", gunzip }, ++ { {0x42, 0x5a}, "bzip2", bunzip2 }, ++ { {0x5d, 0x00}, "lzma", unlzma }, ++ { {0, 0}, NULL, NULL } ++}; ++ ++decompress_fn decompress_method(const unsigned char *inbuf, int len, ++ const char **name) ++{ ++ const struct compress_format *cf; ++ ++ if (len < 2) ++ return NULL; /* Need at least this much... */ ++ ++ for (cf = compressed_formats; cf->name; cf++) { ++ if (!memcmp(inbuf, cf->magic, 2)) ++ break; ++ ++ } ++ if (name) ++ *name = cf->name; ++ return cf->decompressor; ++} +diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c +new file mode 100644 +index 0000000..5d3ddb5 +--- /dev/null ++++ b/lib/decompress_bunzip2.c +@@ -0,0 +1,735 @@ ++/* vi: set sw = 4 ts = 4: */ ++/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). ++ ++ Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), ++ which also acknowledges contributions by Mike Burrows, David Wheeler, ++ Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, ++ Robert Sedgewick, and Jon L. Bentley. ++ ++ This code is licensed under the LGPLv2: ++ LGPL (http://www.gnu.org/copyleft/lgpl.html ++*/ ++ ++/* ++ Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). ++ ++ More efficient reading of Huffman codes, a streamlined read_bunzip() ++ function, and various other tweaks. In (limited) tests, approximately ++ 20% faster than bzcat on x86 and about 10% faster on arm. ++ ++ Note that about 2/3 of the time is spent in read_unzip() reversing ++ the Burrows-Wheeler transformation. Much of that time is delay ++ resulting from cache misses. ++ ++ I would ask that anyone benefiting from this work, especially those ++ using it in commercial products, consider making a donation to my local ++ non-profit hospice organization in the name of the woman I loved, who ++ passed away Feb. 12, 2003. ++ ++ In memory of Toni W. Hagan ++ ++ Hospice of Acadiana, Inc. ++ 2600 Johnston St., Suite 200 ++ Lafayette, LA 70503-3240 ++ ++ Phone (337) 232-1234 or 1-800-738-2226 ++ Fax (337) 232-1297 ++ ++ http://www.hospiceacadiana.com/ ++ ++ Manuel ++ */ ++ ++/* ++ Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) ++*/ ++ ++ ++#ifndef STATIC ++#include ++#endif /* !STATIC */ ++ ++#include ++ ++#ifndef INT_MAX ++#define INT_MAX 0x7fffffff ++#endif ++ ++/* Constants for Huffman coding */ ++#define MAX_GROUPS 6 ++#define GROUP_SIZE 50 /* 64 would have been more efficient */ ++#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ ++#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ ++#define SYMBOL_RUNA 0 ++#define SYMBOL_RUNB 1 ++ ++/* Status return values */ ++#define RETVAL_OK 0 ++#define RETVAL_LAST_BLOCK (-1) ++#define RETVAL_NOT_BZIP_DATA (-2) ++#define RETVAL_UNEXPECTED_INPUT_EOF (-3) ++#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) ++#define RETVAL_DATA_ERROR (-5) ++#define RETVAL_OUT_OF_MEMORY (-6) ++#define RETVAL_OBSOLETE_INPUT (-7) ++ ++/* Other housekeeping constants */ ++#define BZIP2_IOBUF_SIZE 4096 ++ ++/* This is what we know about each Huffman coding group */ ++struct group_data { ++ /* We have an extra slot at the end of limit[] for a sentinal value. */ ++ int limit[MAX_HUFCODE_BITS+1]; ++ int base[MAX_HUFCODE_BITS]; ++ int permute[MAX_SYMBOLS]; ++ int minLen, maxLen; ++}; ++ ++/* Structure holding all the housekeeping data, including IO buffers and ++ memory that persists between calls to bunzip */ ++struct bunzip_data { ++ /* State for interrupting output loop */ ++ int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; ++ /* I/O tracking data (file handles, buffers, positions, etc.) */ ++ int (*fill)(void*, unsigned int); ++ int inbufCount, inbufPos /*, outbufPos*/; ++ unsigned char *inbuf /*,*outbuf*/; ++ unsigned int inbufBitCount, inbufBits; ++ /* The CRC values stored in the block header and calculated from the ++ data */ ++ unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; ++ /* Intermediate buffer and its size (in bytes) */ ++ unsigned int *dbuf, dbufSize; ++ /* These things are a bit too big to go on the stack */ ++ unsigned char selectors[32768]; /* nSelectors = 15 bits */ ++ struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ ++ int io_error; /* non-zero if we have IO error */ ++}; ++ ++ ++/* Return the next nnn bits of input. All reads from the compressed input ++ are done through this function. All reads are big endian */ ++static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) ++{ ++ unsigned int bits = 0; ++ ++ /* If we need to get more data from the byte buffer, do so. ++ (Loop getting one byte at a time to enforce endianness and avoid ++ unaligned access.) */ ++ while (bd->inbufBitCount < bits_wanted) { ++ /* If we need to read more data from file into byte buffer, do ++ so */ ++ if (bd->inbufPos == bd->inbufCount) { ++ if (bd->io_error) ++ return 0; ++ bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); ++ if (bd->inbufCount <= 0) { ++ bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; ++ return 0; ++ } ++ bd->inbufPos = 0; ++ } ++ /* Avoid 32-bit overflow (dump bit buffer to top of output) */ ++ if (bd->inbufBitCount >= 24) { ++ bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); ++ bits_wanted -= bd->inbufBitCount; ++ bits <<= bits_wanted; ++ bd->inbufBitCount = 0; ++ } ++ /* Grab next 8 bits of input from buffer. */ ++ bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; ++ bd->inbufBitCount += 8; ++ } ++ /* Calculate result */ ++ bd->inbufBitCount -= bits_wanted; ++ bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); ++ ++ return bits; ++} ++ ++/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ ++ ++static int INIT get_next_block(struct bunzip_data *bd) ++{ ++ struct group_data *hufGroup = NULL; ++ int *base = NULL; ++ int *limit = NULL; ++ int dbufCount, nextSym, dbufSize, groupCount, selector, ++ i, j, k, t, runPos, symCount, symTotal, nSelectors, ++ byteCount[256]; ++ unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; ++ unsigned int *dbuf, origPtr; ++ ++ dbuf = bd->dbuf; ++ dbufSize = bd->dbufSize; ++ selectors = bd->selectors; ++ ++ /* Read in header signature and CRC, then validate signature. ++ (last block signature means CRC is for whole file, return now) */ ++ i = get_bits(bd, 24); ++ j = get_bits(bd, 24); ++ bd->headerCRC = get_bits(bd, 32); ++ if ((i == 0x177245) && (j == 0x385090)) ++ return RETVAL_LAST_BLOCK; ++ if ((i != 0x314159) || (j != 0x265359)) ++ return RETVAL_NOT_BZIP_DATA; ++ /* We can add support for blockRandomised if anybody complains. ++ There was some code for this in busybox 1.0.0-pre3, but nobody ever ++ noticed that it didn't actually work. */ ++ if (get_bits(bd, 1)) ++ return RETVAL_OBSOLETE_INPUT; ++ origPtr = get_bits(bd, 24); ++ if (origPtr > dbufSize) ++ return RETVAL_DATA_ERROR; ++ /* mapping table: if some byte values are never used (encoding things ++ like ascii text), the compression code removes the gaps to have fewer ++ symbols to deal with, and writes a sparse bitfield indicating which ++ values were present. We make a translation table to convert the ++ symbols back to the corresponding bytes. */ ++ t = get_bits(bd, 16); ++ symTotal = 0; ++ for (i = 0; i < 16; i++) { ++ if (t&(1 << (15-i))) { ++ k = get_bits(bd, 16); ++ for (j = 0; j < 16; j++) ++ if (k&(1 << (15-j))) ++ symToByte[symTotal++] = (16*i)+j; ++ } ++ } ++ /* How many different Huffman coding groups does this block use? */ ++ groupCount = get_bits(bd, 3); ++ if (groupCount < 2 || groupCount > MAX_GROUPS) ++ return RETVAL_DATA_ERROR; ++ /* nSelectors: Every GROUP_SIZE many symbols we select a new ++ Huffman coding group. Read in the group selector list, ++ which is stored as MTF encoded bit runs. (MTF = Move To ++ Front, as each value is used it's moved to the start of the ++ list.) */ ++ nSelectors = get_bits(bd, 15); ++ if (!nSelectors) ++ return RETVAL_DATA_ERROR; ++ for (i = 0; i < groupCount; i++) ++ mtfSymbol[i] = i; ++ for (i = 0; i < nSelectors; i++) { ++ /* Get next value */ ++ for (j = 0; get_bits(bd, 1); j++) ++ if (j >= groupCount) ++ return RETVAL_DATA_ERROR; ++ /* Decode MTF to get the next selector */ ++ uc = mtfSymbol[j]; ++ for (; j; j--) ++ mtfSymbol[j] = mtfSymbol[j-1]; ++ mtfSymbol[0] = selectors[i] = uc; ++ } ++ /* Read the Huffman coding tables for each group, which code ++ for symTotal literal symbols, plus two run symbols (RUNA, ++ RUNB) */ ++ symCount = symTotal+2; ++ for (j = 0; j < groupCount; j++) { ++ unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; ++ int minLen, maxLen, pp; ++ /* Read Huffman code lengths for each symbol. They're ++ stored in a way similar to mtf; record a starting ++ value for the first symbol, and an offset from the ++ previous value for everys symbol after that. ++ (Subtracting 1 before the loop and then adding it ++ back at the end is an optimization that makes the ++ test inside the loop simpler: symbol length 0 ++ becomes negative, so an unsigned inequality catches ++ it.) */ ++ t = get_bits(bd, 5)-1; ++ for (i = 0; i < symCount; i++) { ++ for (;;) { ++ if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) ++ return RETVAL_DATA_ERROR; ++ ++ /* If first bit is 0, stop. Else ++ second bit indicates whether to ++ increment or decrement the value. ++ Optimization: grab 2 bits and unget ++ the second if the first was 0. */ ++ ++ k = get_bits(bd, 2); ++ if (k < 2) { ++ bd->inbufBitCount++; ++ break; ++ } ++ /* Add one if second bit 1, else ++ * subtract 1. Avoids if/else */ ++ t += (((k+1)&2)-1); ++ } ++ /* Correct for the initial -1, to get the ++ * final symbol length */ ++ length[i] = t+1; ++ } ++ /* Find largest and smallest lengths in this group */ ++ minLen = maxLen = length[0]; ++ ++ for (i = 1; i < symCount; i++) { ++ if (length[i] > maxLen) ++ maxLen = length[i]; ++ else if (length[i] < minLen) ++ minLen = length[i]; ++ } ++ ++ /* Calculate permute[], base[], and limit[] tables from ++ * length[]. ++ * ++ * permute[] is the lookup table for converting ++ * Huffman coded symbols into decoded symbols. base[] ++ * is the amount to subtract from the value of a ++ * Huffman symbol of a given length when using ++ * permute[]. ++ * ++ * limit[] indicates the largest numerical value a ++ * symbol with a given number of bits can have. This ++ * is how the Huffman codes can vary in length: each ++ * code with a value > limit[length] needs another ++ * bit. ++ */ ++ hufGroup = bd->groups+j; ++ hufGroup->minLen = minLen; ++ hufGroup->maxLen = maxLen; ++ /* Note that minLen can't be smaller than 1, so we ++ adjust the base and limit array pointers so we're ++ not always wasting the first entry. We do this ++ again when using them (during symbol decoding).*/ ++ base = hufGroup->base-1; ++ limit = hufGroup->limit-1; ++ /* Calculate permute[]. Concurently, initialize ++ * temp[] and limit[]. */ ++ pp = 0; ++ for (i = minLen; i <= maxLen; i++) { ++ temp[i] = limit[i] = 0; ++ for (t = 0; t < symCount; t++) ++ if (length[t] == i) ++ hufGroup->permute[pp++] = t; ++ } ++ /* Count symbols coded for at each bit length */ ++ for (i = 0; i < symCount; i++) ++ temp[length[i]]++; ++ /* Calculate limit[] (the largest symbol-coding value ++ *at each bit length, which is (previous limit << ++ *1)+symbols at this level), and base[] (number of ++ *symbols to ignore at each bit length, which is limit ++ *minus the cumulative count of symbols coded for ++ *already). */ ++ pp = t = 0; ++ for (i = minLen; i < maxLen; i++) { ++ pp += temp[i]; ++ /* We read the largest possible symbol size ++ and then unget bits after determining how ++ many we need, and those extra bits could be ++ set to anything. (They're noise from ++ future symbols.) At each level we're ++ really only interested in the first few ++ bits, so here we set all the trailing ++ to-be-ignored bits to 1 so they don't ++ affect the value > limit[length] ++ comparison. */ ++ limit[i] = (pp << (maxLen - i)) - 1; ++ pp <<= 1; ++ base[i+1] = pp-(t += temp[i]); ++ } ++ limit[maxLen+1] = INT_MAX; /* Sentinal value for ++ * reading next sym. */ ++ limit[maxLen] = pp+temp[maxLen]-1; ++ base[minLen] = 0; ++ } ++ /* We've finished reading and digesting the block header. Now ++ read this block's Huffman coded symbols from the file and ++ undo the Huffman coding and run length encoding, saving the ++ result into dbuf[dbufCount++] = uc */ ++ ++ /* Initialize symbol occurrence counters and symbol Move To ++ * Front table */ ++ for (i = 0; i < 256; i++) { ++ byteCount[i] = 0; ++ mtfSymbol[i] = (unsigned char)i; ++ } ++ /* Loop through compressed symbols. */ ++ runPos = dbufCount = symCount = selector = 0; ++ for (;;) { ++ /* Determine which Huffman coding group to use. */ ++ if (!(symCount--)) { ++ symCount = GROUP_SIZE-1; ++ if (selector >= nSelectors) ++ return RETVAL_DATA_ERROR; ++ hufGroup = bd->groups+selectors[selector++]; ++ base = hufGroup->base-1; ++ limit = hufGroup->limit-1; ++ } ++ /* Read next Huffman-coded symbol. */ ++ /* Note: It is far cheaper to read maxLen bits and ++ back up than it is to read minLen bits and then an ++ additional bit at a time, testing as we go. ++ Because there is a trailing last block (with file ++ CRC), there is no danger of the overread causing an ++ unexpected EOF for a valid compressed file. As a ++ further optimization, we do the read inline ++ (falling back to a call to get_bits if the buffer ++ runs dry). The following (up to got_huff_bits:) is ++ equivalent to j = get_bits(bd, hufGroup->maxLen); ++ */ ++ while (bd->inbufBitCount < hufGroup->maxLen) { ++ if (bd->inbufPos == bd->inbufCount) { ++ j = get_bits(bd, hufGroup->maxLen); ++ goto got_huff_bits; ++ } ++ bd->inbufBits = ++ (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; ++ bd->inbufBitCount += 8; ++ }; ++ bd->inbufBitCount -= hufGroup->maxLen; ++ j = (bd->inbufBits >> bd->inbufBitCount)& ++ ((1 << hufGroup->maxLen)-1); ++got_huff_bits: ++ /* Figure how how many bits are in next symbol and ++ * unget extras */ ++ i = hufGroup->minLen; ++ while (j > limit[i]) ++ ++i; ++ bd->inbufBitCount += (hufGroup->maxLen - i); ++ /* Huffman decode value to get nextSym (with bounds checking) */ ++ if ((i > hufGroup->maxLen) ++ || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) ++ >= MAX_SYMBOLS)) ++ return RETVAL_DATA_ERROR; ++ nextSym = hufGroup->permute[j]; ++ /* We have now decoded the symbol, which indicates ++ either a new literal byte, or a repeated run of the ++ most recent literal byte. First, check if nextSym ++ indicates a repeated run, and if so loop collecting ++ how many times to repeat the last literal. */ ++ if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ ++ /* If this is the start of a new run, zero out ++ * counter */ ++ if (!runPos) { ++ runPos = 1; ++ t = 0; ++ } ++ /* Neat trick that saves 1 symbol: instead of ++ or-ing 0 or 1 at each bit position, add 1 ++ or 2 instead. For example, 1011 is 1 << 0 ++ + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 ++ + 1 << 2. You can make any bit pattern ++ that way using 1 less symbol than the basic ++ or 0/1 method (except all bits 0, which ++ would use no symbols, but a run of length 0 ++ doesn't mean anything in this context). ++ Thus space is saved. */ ++ t += (runPos << nextSym); ++ /* +runPos if RUNA; +2*runPos if RUNB */ ++ ++ runPos <<= 1; ++ continue; ++ } ++ /* When we hit the first non-run symbol after a run, ++ we now know how many times to repeat the last ++ literal, so append that many copies to our buffer ++ of decoded symbols (dbuf) now. (The last literal ++ used is the one at the head of the mtfSymbol ++ array.) */ ++ if (runPos) { ++ runPos = 0; ++ if (dbufCount+t >= dbufSize) ++ return RETVAL_DATA_ERROR; ++ ++ uc = symToByte[mtfSymbol[0]]; ++ byteCount[uc] += t; ++ while (t--) ++ dbuf[dbufCount++] = uc; ++ } ++ /* Is this the terminating symbol? */ ++ if (nextSym > symTotal) ++ break; ++ /* At this point, nextSym indicates a new literal ++ character. Subtract one to get the position in the ++ MTF array at which this literal is currently to be ++ found. (Note that the result can't be -1 or 0, ++ because 0 and 1 are RUNA and RUNB. But another ++ instance of the first symbol in the mtf array, ++ position 0, would have been handled as part of a ++ run above. Therefore 1 unused mtf position minus 2 ++ non-literal nextSym values equals -1.) */ ++ if (dbufCount >= dbufSize) ++ return RETVAL_DATA_ERROR; ++ i = nextSym - 1; ++ uc = mtfSymbol[i]; ++ /* Adjust the MTF array. Since we typically expect to ++ *move only a small number of symbols, and are bound ++ *by 256 in any case, using memmove here would ++ *typically be bigger and slower due to function call ++ *overhead and other assorted setup costs. */ ++ do { ++ mtfSymbol[i] = mtfSymbol[i-1]; ++ } while (--i); ++ mtfSymbol[0] = uc; ++ uc = symToByte[uc]; ++ /* We have our literal byte. Save it into dbuf. */ ++ byteCount[uc]++; ++ dbuf[dbufCount++] = (unsigned int)uc; ++ } ++ /* At this point, we've read all the Huffman-coded symbols ++ (and repeated runs) for this block from the input stream, ++ and decoded them into the intermediate buffer. There are ++ dbufCount many decoded bytes in dbuf[]. Now undo the ++ Burrows-Wheeler transform on dbuf. See ++ http://dogma.net/markn/articles/bwt/bwt.htm ++ */ ++ /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ ++ j = 0; ++ for (i = 0; i < 256; i++) { ++ k = j+byteCount[i]; ++ byteCount[i] = j; ++ j = k; ++ } ++ /* Figure out what order dbuf would be in if we sorted it. */ ++ for (i = 0; i < dbufCount; i++) { ++ uc = (unsigned char)(dbuf[i] & 0xff); ++ dbuf[byteCount[uc]] |= (i << 8); ++ byteCount[uc]++; ++ } ++ /* Decode first byte by hand to initialize "previous" byte. ++ Note that it doesn't get output, and if the first three ++ characters are identical it doesn't qualify as a run (hence ++ writeRunCountdown = 5). */ ++ if (dbufCount) { ++ if (origPtr >= dbufCount) ++ return RETVAL_DATA_ERROR; ++ bd->writePos = dbuf[origPtr]; ++ bd->writeCurrent = (unsigned char)(bd->writePos&0xff); ++ bd->writePos >>= 8; ++ bd->writeRunCountdown = 5; ++ } ++ bd->writeCount = dbufCount; ++ ++ return RETVAL_OK; ++} ++ ++/* Undo burrows-wheeler transform on intermediate buffer to produce output. ++ If start_bunzip was initialized with out_fd =-1, then up to len bytes of ++ data are written to outbuf. Return value is number of bytes written or ++ error (all errors are negative numbers). If out_fd!=-1, outbuf and len ++ are ignored, data is written to out_fd and return is RETVAL_OK or error. ++*/ ++ ++static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len) ++{ ++ const unsigned int *dbuf; ++ int pos, xcurrent, previous, gotcount; ++ ++ /* If last read was short due to end of file, return last block now */ ++ if (bd->writeCount < 0) ++ return bd->writeCount; ++ ++ gotcount = 0; ++ dbuf = bd->dbuf; ++ pos = bd->writePos; ++ xcurrent = bd->writeCurrent; ++ ++ /* We will always have pending decoded data to write into the output ++ buffer unless this is the very first call (in which case we haven't ++ Huffman-decoded a block into the intermediate buffer yet). */ ++ ++ if (bd->writeCopies) { ++ /* Inside the loop, writeCopies means extra copies (beyond 1) */ ++ --bd->writeCopies; ++ /* Loop outputting bytes */ ++ for (;;) { ++ /* If the output buffer is full, snapshot ++ * state and return */ ++ if (gotcount >= len) { ++ bd->writePos = pos; ++ bd->writeCurrent = xcurrent; ++ bd->writeCopies++; ++ return len; ++ } ++ /* Write next byte into output buffer, updating CRC */ ++ outbuf[gotcount++] = xcurrent; ++ bd->writeCRC = (((bd->writeCRC) << 8) ++ ^bd->crc32Table[((bd->writeCRC) >> 24) ++ ^xcurrent]); ++ /* Loop now if we're outputting multiple ++ * copies of this byte */ ++ if (bd->writeCopies) { ++ --bd->writeCopies; ++ continue; ++ } ++decode_next_byte: ++ if (!bd->writeCount--) ++ break; ++ /* Follow sequence vector to undo ++ * Burrows-Wheeler transform */ ++ previous = xcurrent; ++ pos = dbuf[pos]; ++ xcurrent = pos&0xff; ++ pos >>= 8; ++ /* After 3 consecutive copies of the same ++ byte, the 4th is a repeat count. We count ++ down from 4 instead *of counting up because ++ testing for non-zero is faster */ ++ if (--bd->writeRunCountdown) { ++ if (xcurrent != previous) ++ bd->writeRunCountdown = 4; ++ } else { ++ /* We have a repeated run, this byte ++ * indicates the count */ ++ bd->writeCopies = xcurrent; ++ xcurrent = previous; ++ bd->writeRunCountdown = 5; ++ /* Sometimes there are just 3 bytes ++ * (run length 0) */ ++ if (!bd->writeCopies) ++ goto decode_next_byte; ++ /* Subtract the 1 copy we'd output ++ * anyway to get extras */ ++ --bd->writeCopies; ++ } ++ } ++ /* Decompression of this block completed successfully */ ++ bd->writeCRC = ~bd->writeCRC; ++ bd->totalCRC = ((bd->totalCRC << 1) | ++ (bd->totalCRC >> 31)) ^ bd->writeCRC; ++ /* If this block had a CRC error, force file level CRC error. */ ++ if (bd->writeCRC != bd->headerCRC) { ++ bd->totalCRC = bd->headerCRC+1; ++ return RETVAL_LAST_BLOCK; ++ } ++ } ++ ++ /* Refill the intermediate buffer by Huffman-decoding next ++ * block of input */ ++ /* (previous is just a convenient unused temp variable here) */ ++ previous = get_next_block(bd); ++ if (previous) { ++ bd->writeCount = previous; ++ return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; ++ } ++ bd->writeCRC = 0xffffffffUL; ++ pos = bd->writePos; ++ xcurrent = bd->writeCurrent; ++ goto decode_next_byte; ++} ++ ++static int INIT nofill(void *buf, unsigned int len) ++{ ++ return -1; ++} ++ ++/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain ++ a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are ++ ignored, and data is read from file handle into temporary buffer. */ ++static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, ++ int (*fill)(void*, unsigned int)) ++{ ++ struct bunzip_data *bd; ++ unsigned int i, j, c; ++ const unsigned int BZh0 = ++ (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16) ++ +(((unsigned int)'h') << 8)+(unsigned int)'0'; ++ ++ /* Figure out how much data to allocate */ ++ i = sizeof(struct bunzip_data); ++ ++ /* Allocate bunzip_data. Most fields initialize to zero. */ ++ bd = *bdp = malloc(i); ++ memset(bd, 0, sizeof(struct bunzip_data)); ++ /* Setup input buffer */ ++ bd->inbuf = inbuf; ++ bd->inbufCount = len; ++ if (fill != NULL) ++ bd->fill = fill; ++ else ++ bd->fill = nofill; ++ ++ /* Init the CRC32 table (big endian) */ ++ for (i = 0; i < 256; i++) { ++ c = i << 24; ++ for (j = 8; j; j--) ++ c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); ++ bd->crc32Table[i] = c; ++ } ++ ++ /* Ensure that file starts with "BZh['1'-'9']." */ ++ i = get_bits(bd, 32); ++ if (((unsigned int)(i-BZh0-1)) >= 9) ++ return RETVAL_NOT_BZIP_DATA; ++ ++ /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of ++ uncompressed data. Allocate intermediate buffer for block. */ ++ bd->dbufSize = 100000*(i-BZh0); ++ ++ bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); ++ return RETVAL_OK; ++} ++ ++/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, ++ not end of file.) */ ++STATIC int INIT bunzip2(unsigned char *buf, int len, ++ int(*fill)(void*, unsigned int), ++ int(*flush)(void*, unsigned int), ++ unsigned char *outbuf, ++ int *pos, ++ void(*error_fn)(char *x)) ++{ ++ struct bunzip_data *bd; ++ int i = -1; ++ unsigned char *inbuf; ++ ++ set_error_fn(error_fn); ++ if (flush) ++ outbuf = malloc(BZIP2_IOBUF_SIZE); ++ else ++ len -= 4; /* Uncompressed size hack active in pre-boot ++ environment */ ++ if (!outbuf) { ++ error("Could not allocate output bufer"); ++ return -1; ++ } ++ if (buf) ++ inbuf = buf; ++ else ++ inbuf = malloc(BZIP2_IOBUF_SIZE); ++ if (!inbuf) { ++ error("Could not allocate input bufer"); ++ goto exit_0; ++ } ++ i = start_bunzip(&bd, inbuf, len, fill); ++ if (!i) { ++ for (;;) { ++ i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); ++ if (i <= 0) ++ break; ++ if (!flush) ++ outbuf += i; ++ else ++ if (i != flush(outbuf, i)) { ++ i = RETVAL_UNEXPECTED_OUTPUT_EOF; ++ break; ++ } ++ } ++ } ++ /* Check CRC and release memory */ ++ if (i == RETVAL_LAST_BLOCK) { ++ if (bd->headerCRC != bd->totalCRC) ++ error("Data integrity error when decompressing."); ++ else ++ i = RETVAL_OK; ++ } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { ++ error("Compressed file ends unexpectedly"); ++ } ++ if (bd->dbuf) ++ large_free(bd->dbuf); ++ if (pos) ++ *pos = bd->inbufPos; ++ free(bd); ++ if (!buf) ++ free(inbuf); ++exit_0: ++ if (flush) ++ free(outbuf); ++ return i; ++} ++ ++#define decompress bunzip2 +diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c +new file mode 100644 +index 0000000..839a329 +--- /dev/null ++++ b/lib/decompress_inflate.c +@@ -0,0 +1,167 @@ ++#ifdef STATIC ++/* Pre-boot environment: included */ ++ ++/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots ++ * errors about console_printk etc... on ARM */ ++#define _LINUX_KERNEL_H ++ ++#include "zlib_inflate/inftrees.c" ++#include "zlib_inflate/inffast.c" ++#include "zlib_inflate/inflate.c" ++ ++#else /* STATIC */ ++/* initramfs et al: linked */ ++ ++#include ++ ++#include "zlib_inflate/inftrees.h" ++#include "zlib_inflate/inffast.h" ++#include "zlib_inflate/inflate.h" ++ ++#include "zlib_inflate/infutil.h" ++ ++#endif /* STATIC */ ++ ++#include ++ ++#define INBUF_LEN (16*1024) ++ ++/* Included from initramfs et al code */ ++STATIC int INIT gunzip(unsigned char *buf, int len, ++ int(*fill)(void*, unsigned int), ++ int(*flush)(void*, unsigned int), ++ unsigned char *out_buf, ++ int *pos, ++ void(*error_fn)(char *x)) { ++ u8 *zbuf; ++ struct z_stream_s *strm; ++ int rc; ++ size_t out_len; ++ ++ set_error_fn(error_fn); ++ rc = -1; ++ if (flush) { ++ out_len = 0x8000; /* 32 K */ ++ out_buf = malloc(out_len); ++ } else { ++ out_len = 0x7fffffff; /* no limit */ ++ } ++ if (!out_buf) { ++ error("Out of memory while allocating output buffer"); ++ goto gunzip_nomem1; ++ } ++ ++ if (buf) ++ zbuf = buf; ++ else { ++ zbuf = malloc(INBUF_LEN); ++ len = 0; ++ } ++ if (!zbuf) { ++ error("Out of memory while allocating input buffer"); ++ goto gunzip_nomem2; ++ } ++ ++ strm = malloc(sizeof(*strm)); ++ if (strm == NULL) { ++ error("Out of memory while allocating z_stream"); ++ goto gunzip_nomem3; ++ } ++ ++ strm->workspace = malloc(flush ? zlib_inflate_workspacesize() : ++ sizeof(struct inflate_state)); ++ if (strm->workspace == NULL) { ++ error("Out of memory while allocating workspace"); ++ goto gunzip_nomem4; ++ } ++ ++ if (len == 0) ++ len = fill(zbuf, INBUF_LEN); ++ ++ /* verify the gzip header */ ++ if (len < 10 || ++ zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) { ++ if (pos) ++ *pos = 0; ++ error("Not a gzip file"); ++ goto gunzip_5; ++ } ++ ++ /* skip over gzip header (1f,8b,08... 10 bytes total + ++ * possible asciz filename) ++ */ ++ strm->next_in = zbuf + 10; ++ /* skip over asciz filename */ ++ if (zbuf[3] & 0x8) { ++ while (strm->next_in[0]) ++ strm->next_in++; ++ strm->next_in++; ++ } ++ strm->avail_in = len - (strm->next_in - zbuf); ++ ++ strm->next_out = out_buf; ++ strm->avail_out = out_len; ++ ++ rc = zlib_inflateInit2(strm, -MAX_WBITS); ++ ++ if (!flush) { ++ WS(strm)->inflate_state.wsize = 0; ++ WS(strm)->inflate_state.window = NULL; ++ } ++ ++ while (rc == Z_OK) { ++ if (strm->avail_in == 0) { ++ /* TODO: handle case where both pos and fill are set */ ++ len = fill(zbuf, INBUF_LEN); ++ if (len < 0) { ++ rc = -1; ++ error("read error"); ++ break; ++ } ++ strm->next_in = zbuf; ++ strm->avail_in = len; ++ } ++ rc = zlib_inflate(strm, 0); ++ ++ /* Write any data generated */ ++ if (flush && strm->next_out > out_buf) { ++ int l = strm->next_out - out_buf; ++ if (l != flush(out_buf, l)) { ++ rc = -1; ++ error("write error"); ++ break; ++ } ++ strm->next_out = out_buf; ++ strm->avail_out = out_len; ++ } ++ ++ /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ ++ if (rc == Z_STREAM_END) { ++ rc = 0; ++ break; ++ } else if (rc != Z_OK) { ++ error("uncompression error"); ++ rc = -1; ++ } ++ } ++ ++ zlib_inflateEnd(strm); ++ if (pos) ++ /* add + 8 to skip over trailer */ ++ *pos = strm->next_in - zbuf+8; ++ ++gunzip_5: ++ free(strm->workspace); ++gunzip_nomem4: ++ free(strm); ++gunzip_nomem3: ++ if (!buf) ++ free(zbuf); ++gunzip_nomem2: ++ if (flush) ++ free(out_buf); ++gunzip_nomem1: ++ return rc; /* returns Z_OK (0) if successful */ ++} ++ ++#define decompress gunzip +diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c +new file mode 100644 +index 0000000..546f2f4 +--- /dev/null ++++ b/lib/decompress_unlzma.c +@@ -0,0 +1,647 @@ ++/* Lzma decompressor for Linux kernel. Shamelessly snarfed ++ *from busybox 1.1.1 ++ * ++ *Linux kernel adaptation ++ *Copyright (C) 2006 Alain < alain@knaff.lu > ++ * ++ *Based on small lzma deflate implementation/Small range coder ++ *implementation for lzma. ++ *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > ++ * ++ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) ++ *Copyright (C) 1999-2005 Igor Pavlov ++ * ++ *Copyrights of the parts, see headers below. ++ * ++ * ++ *This program is free software; you can redistribute it and/or ++ *modify it under the terms of the GNU Lesser General Public ++ *License as published by the Free Software Foundation; either ++ *version 2.1 of the License, or (at your option) any later version. ++ * ++ *This program is distributed in the hope that it will be useful, ++ *but WITHOUT ANY WARRANTY; without even the implied warranty of ++ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ *Lesser General Public License for more details. ++ * ++ *You should have received a copy of the GNU Lesser General Public ++ *License along with this library; if not, write to the Free Software ++ *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef STATIC ++#include ++#endif /* STATIC */ ++ ++#include ++ ++#define MIN(a, b) (((a) < (b)) ? (a) : (b)) ++ ++static long long INIT read_int(unsigned char *ptr, int size) ++{ ++ int i; ++ long long ret = 0; ++ ++ for (i = 0; i < size; i++) ++ ret = (ret << 8) | ptr[size-i-1]; ++ return ret; ++} ++ ++#define ENDIAN_CONVERT(x) \ ++ x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) ++ ++ ++/* Small range coder implementation for lzma. ++ *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > ++ * ++ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) ++ *Copyright (c) 1999-2005 Igor Pavlov ++ */ ++ ++#include ++ ++#define LZMA_IOBUF_SIZE 0x10000 ++ ++struct rc { ++ int (*fill)(void*, unsigned int); ++ uint8_t *ptr; ++ uint8_t *buffer; ++ uint8_t *buffer_end; ++ int buffer_size; ++ uint32_t code; ++ uint32_t range; ++ uint32_t bound; ++}; ++ ++ ++#define RC_TOP_BITS 24 ++#define RC_MOVE_BITS 5 ++#define RC_MODEL_TOTAL_BITS 11 ++ ++ ++/* Called twice: once at startup and once in rc_normalize() */ ++static void INIT rc_read(struct rc *rc) ++{ ++ rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); ++ if (rc->buffer_size <= 0) ++ error("unexpected EOF"); ++ rc->ptr = rc->buffer; ++ rc->buffer_end = rc->buffer + rc->buffer_size; ++} ++ ++/* Called once */ ++static inline void INIT rc_init(struct rc *rc, ++ int (*fill)(void*, unsigned int), ++ char *buffer, int buffer_size) ++{ ++ rc->fill = fill; ++ rc->buffer = (uint8_t *)buffer; ++ rc->buffer_size = buffer_size; ++ rc->buffer_end = rc->buffer + rc->buffer_size; ++ rc->ptr = rc->buffer; ++ ++ rc->code = 0; ++ rc->range = 0xFFFFFFFF; ++} ++ ++static inline void INIT rc_init_code(struct rc *rc) ++{ ++ int i; ++ ++ for (i = 0; i < 5; i++) { ++ if (rc->ptr >= rc->buffer_end) ++ rc_read(rc); ++ rc->code = (rc->code << 8) | *rc->ptr++; ++ } ++} ++ ++ ++/* Called once. TODO: bb_maybe_free() */ ++static inline void INIT rc_free(struct rc *rc) ++{ ++ free(rc->buffer); ++} ++ ++/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ ++static void INIT rc_do_normalize(struct rc *rc) ++{ ++ if (rc->ptr >= rc->buffer_end) ++ rc_read(rc); ++ rc->range <<= 8; ++ rc->code = (rc->code << 8) | *rc->ptr++; ++} ++static inline void INIT rc_normalize(struct rc *rc) ++{ ++ if (rc->range < (1 << RC_TOP_BITS)) ++ rc_do_normalize(rc); ++} ++ ++/* Called 9 times */ ++/* Why rc_is_bit_0_helper exists? ++ *Because we want to always expose (rc->code < rc->bound) to optimizer ++ */ ++static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) ++{ ++ rc_normalize(rc); ++ rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); ++ return rc->bound; ++} ++static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) ++{ ++ uint32_t t = rc_is_bit_0_helper(rc, p); ++ return rc->code < t; ++} ++ ++/* Called ~10 times, but very small, thus inlined */ ++static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) ++{ ++ rc->range = rc->bound; ++ *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; ++} ++static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) ++{ ++ rc->range -= rc->bound; ++ rc->code -= rc->bound; ++ *p -= *p >> RC_MOVE_BITS; ++} ++ ++/* Called 4 times in unlzma loop */ ++static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) ++{ ++ if (rc_is_bit_0(rc, p)) { ++ rc_update_bit_0(rc, p); ++ *symbol *= 2; ++ return 0; ++ } else { ++ rc_update_bit_1(rc, p); ++ *symbol = *symbol * 2 + 1; ++ return 1; ++ } ++} ++ ++/* Called once */ ++static inline int INIT rc_direct_bit(struct rc *rc) ++{ ++ rc_normalize(rc); ++ rc->range >>= 1; ++ if (rc->code >= rc->range) { ++ rc->code -= rc->range; ++ return 1; ++ } ++ return 0; ++} ++ ++/* Called twice */ ++static inline void INIT ++rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) ++{ ++ int i = num_levels; ++ ++ *symbol = 1; ++ while (i--) ++ rc_get_bit(rc, p + *symbol, symbol); ++ *symbol -= 1 << num_levels; ++} ++ ++ ++/* ++ * Small lzma deflate implementation. ++ * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > ++ * ++ * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) ++ * Copyright (C) 1999-2005 Igor Pavlov ++ */ ++ ++ ++struct lzma_header { ++ uint8_t pos; ++ uint32_t dict_size; ++ uint64_t dst_size; ++} __attribute__ ((packed)) ; ++ ++ ++#define LZMA_BASE_SIZE 1846 ++#define LZMA_LIT_SIZE 768 ++ ++#define LZMA_NUM_POS_BITS_MAX 4 ++ ++#define LZMA_LEN_NUM_LOW_BITS 3 ++#define LZMA_LEN_NUM_MID_BITS 3 ++#define LZMA_LEN_NUM_HIGH_BITS 8 ++ ++#define LZMA_LEN_CHOICE 0 ++#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) ++#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) ++#define LZMA_LEN_MID (LZMA_LEN_LOW \ ++ + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) ++#define LZMA_LEN_HIGH (LZMA_LEN_MID \ ++ +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) ++#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) ++ ++#define LZMA_NUM_STATES 12 ++#define LZMA_NUM_LIT_STATES 7 ++ ++#define LZMA_START_POS_MODEL_INDEX 4 ++#define LZMA_END_POS_MODEL_INDEX 14 ++#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) ++ ++#define LZMA_NUM_POS_SLOT_BITS 6 ++#define LZMA_NUM_LEN_TO_POS_STATES 4 ++ ++#define LZMA_NUM_ALIGN_BITS 4 ++ ++#define LZMA_MATCH_MIN_LEN 2 ++ ++#define LZMA_IS_MATCH 0 ++#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) ++#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) ++#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) ++#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) ++#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) ++#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ ++ + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) ++#define LZMA_SPEC_POS (LZMA_POS_SLOT \ ++ +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) ++#define LZMA_ALIGN (LZMA_SPEC_POS \ ++ + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) ++#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) ++#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) ++#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) ++ ++ ++struct writer { ++ uint8_t *buffer; ++ uint8_t previous_byte; ++ size_t buffer_pos; ++ int bufsize; ++ size_t global_pos; ++ int(*flush)(void*, unsigned int); ++ struct lzma_header *header; ++}; ++ ++struct cstate { ++ int state; ++ uint32_t rep0, rep1, rep2, rep3; ++}; ++ ++static inline size_t INIT get_pos(struct writer *wr) ++{ ++ return ++ wr->global_pos + wr->buffer_pos; ++} ++ ++static inline uint8_t INIT peek_old_byte(struct writer *wr, ++ uint32_t offs) ++{ ++ if (!wr->flush) { ++ int32_t pos; ++ while (offs > wr->header->dict_size) ++ offs -= wr->header->dict_size; ++ pos = wr->buffer_pos - offs; ++ return wr->buffer[pos]; ++ } else { ++ uint32_t pos = wr->buffer_pos - offs; ++ while (pos >= wr->header->dict_size) ++ pos += wr->header->dict_size; ++ return wr->buffer[pos]; ++ } ++ ++} ++ ++static inline void INIT write_byte(struct writer *wr, uint8_t byte) ++{ ++ wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; ++ if (wr->flush && wr->buffer_pos == wr->header->dict_size) { ++ wr->buffer_pos = 0; ++ wr->global_pos += wr->header->dict_size; ++ wr->flush((char *)wr->buffer, wr->header->dict_size); ++ } ++} ++ ++ ++static inline void INIT copy_byte(struct writer *wr, uint32_t offs) ++{ ++ write_byte(wr, peek_old_byte(wr, offs)); ++} ++ ++static inline void INIT copy_bytes(struct writer *wr, ++ uint32_t rep0, int len) ++{ ++ do { ++ copy_byte(wr, rep0); ++ len--; ++ } while (len != 0 && wr->buffer_pos < wr->header->dst_size); ++} ++ ++static inline void INIT process_bit0(struct writer *wr, struct rc *rc, ++ struct cstate *cst, uint16_t *p, ++ int pos_state, uint16_t *prob, ++ int lc, uint32_t literal_pos_mask) { ++ int mi = 1; ++ rc_update_bit_0(rc, prob); ++ prob = (p + LZMA_LITERAL + ++ (LZMA_LIT_SIZE ++ * (((get_pos(wr) & literal_pos_mask) << lc) ++ + (wr->previous_byte >> (8 - lc)))) ++ ); ++ ++ if (cst->state >= LZMA_NUM_LIT_STATES) { ++ int match_byte = peek_old_byte(wr, cst->rep0); ++ do { ++ int bit; ++ uint16_t *prob_lit; ++ ++ match_byte <<= 1; ++ bit = match_byte & 0x100; ++ prob_lit = prob + 0x100 + bit + mi; ++ if (rc_get_bit(rc, prob_lit, &mi)) { ++ if (!bit) ++ break; ++ } else { ++ if (bit) ++ break; ++ } ++ } while (mi < 0x100); ++ } ++ while (mi < 0x100) { ++ uint16_t *prob_lit = prob + mi; ++ rc_get_bit(rc, prob_lit, &mi); ++ } ++ write_byte(wr, mi); ++ if (cst->state < 4) ++ cst->state = 0; ++ else if (cst->state < 10) ++ cst->state -= 3; ++ else ++ cst->state -= 6; ++} ++ ++static inline void INIT process_bit1(struct writer *wr, struct rc *rc, ++ struct cstate *cst, uint16_t *p, ++ int pos_state, uint16_t *prob) { ++ int offset; ++ uint16_t *prob_len; ++ int num_bits; ++ int len; ++ ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP + cst->state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ cst->rep3 = cst->rep2; ++ cst->rep2 = cst->rep1; ++ cst->rep1 = cst->rep0; ++ cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; ++ prob = p + LZMA_LEN_CODER; ++ } else { ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP_G0 + cst->state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ prob = (p + LZMA_IS_REP_0_LONG ++ + (cst->state << ++ LZMA_NUM_POS_BITS_MAX) + ++ pos_state); ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ ++ cst->state = cst->state < LZMA_NUM_LIT_STATES ? ++ 9 : 11; ++ copy_byte(wr, cst->rep0); ++ return; ++ } else { ++ rc_update_bit_1(rc, prob); ++ } ++ } else { ++ uint32_t distance; ++ ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP_G1 + cst->state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ distance = cst->rep1; ++ } else { ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP_G2 + cst->state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ distance = cst->rep2; ++ } else { ++ rc_update_bit_1(rc, prob); ++ distance = cst->rep3; ++ cst->rep3 = cst->rep2; ++ } ++ cst->rep2 = cst->rep1; ++ } ++ cst->rep1 = cst->rep0; ++ cst->rep0 = distance; ++ } ++ cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; ++ prob = p + LZMA_REP_LEN_CODER; ++ } ++ ++ prob_len = prob + LZMA_LEN_CHOICE; ++ if (rc_is_bit_0(rc, prob_len)) { ++ rc_update_bit_0(rc, prob_len); ++ prob_len = (prob + LZMA_LEN_LOW ++ + (pos_state << ++ LZMA_LEN_NUM_LOW_BITS)); ++ offset = 0; ++ num_bits = LZMA_LEN_NUM_LOW_BITS; ++ } else { ++ rc_update_bit_1(rc, prob_len); ++ prob_len = prob + LZMA_LEN_CHOICE_2; ++ if (rc_is_bit_0(rc, prob_len)) { ++ rc_update_bit_0(rc, prob_len); ++ prob_len = (prob + LZMA_LEN_MID ++ + (pos_state << ++ LZMA_LEN_NUM_MID_BITS)); ++ offset = 1 << LZMA_LEN_NUM_LOW_BITS; ++ num_bits = LZMA_LEN_NUM_MID_BITS; ++ } else { ++ rc_update_bit_1(rc, prob_len); ++ prob_len = prob + LZMA_LEN_HIGH; ++ offset = ((1 << LZMA_LEN_NUM_LOW_BITS) ++ + (1 << LZMA_LEN_NUM_MID_BITS)); ++ num_bits = LZMA_LEN_NUM_HIGH_BITS; ++ } ++ } ++ ++ rc_bit_tree_decode(rc, prob_len, num_bits, &len); ++ len += offset; ++ ++ if (cst->state < 4) { ++ int pos_slot; ++ ++ cst->state += LZMA_NUM_LIT_STATES; ++ prob = ++ p + LZMA_POS_SLOT + ++ ((len < ++ LZMA_NUM_LEN_TO_POS_STATES ? len : ++ LZMA_NUM_LEN_TO_POS_STATES - 1) ++ << LZMA_NUM_POS_SLOT_BITS); ++ rc_bit_tree_decode(rc, prob, ++ LZMA_NUM_POS_SLOT_BITS, ++ &pos_slot); ++ if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { ++ int i, mi; ++ num_bits = (pos_slot >> 1) - 1; ++ cst->rep0 = 2 | (pos_slot & 1); ++ if (pos_slot < LZMA_END_POS_MODEL_INDEX) { ++ cst->rep0 <<= num_bits; ++ prob = p + LZMA_SPEC_POS + ++ cst->rep0 - pos_slot - 1; ++ } else { ++ num_bits -= LZMA_NUM_ALIGN_BITS; ++ while (num_bits--) ++ cst->rep0 = (cst->rep0 << 1) | ++ rc_direct_bit(rc); ++ prob = p + LZMA_ALIGN; ++ cst->rep0 <<= LZMA_NUM_ALIGN_BITS; ++ num_bits = LZMA_NUM_ALIGN_BITS; ++ } ++ i = 1; ++ mi = 1; ++ while (num_bits--) { ++ if (rc_get_bit(rc, prob + mi, &mi)) ++ cst->rep0 |= i; ++ i <<= 1; ++ } ++ } else ++ cst->rep0 = pos_slot; ++ if (++(cst->rep0) == 0) ++ return; ++ } ++ ++ len += LZMA_MATCH_MIN_LEN; ++ ++ copy_bytes(wr, cst->rep0, len); ++} ++ ++ ++ ++STATIC inline int INIT unlzma(unsigned char *buf, int in_len, ++ int(*fill)(void*, unsigned int), ++ int(*flush)(void*, unsigned int), ++ unsigned char *output, ++ int *posp, ++ void(*error_fn)(char *x) ++ ) ++{ ++ struct lzma_header header; ++ int lc, pb, lp; ++ uint32_t pos_state_mask; ++ uint32_t literal_pos_mask; ++ uint16_t *p; ++ int num_probs; ++ struct rc rc; ++ int i, mi; ++ struct writer wr; ++ struct cstate cst; ++ unsigned char *inbuf; ++ int ret = -1; ++ ++ set_error_fn(error_fn); ++ if (!flush) ++ in_len -= 4; /* Uncompressed size hack active in pre-boot ++ environment */ ++ if (buf) ++ inbuf = buf; ++ else ++ inbuf = malloc(LZMA_IOBUF_SIZE); ++ if (!inbuf) { ++ error("Could not allocate input bufer"); ++ goto exit_0; ++ } ++ ++ cst.state = 0; ++ cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; ++ ++ wr.header = &header; ++ wr.flush = flush; ++ wr.global_pos = 0; ++ wr.previous_byte = 0; ++ wr.buffer_pos = 0; ++ ++ rc_init(&rc, fill, inbuf, in_len); ++ ++ for (i = 0; i < sizeof(header); i++) { ++ if (rc.ptr >= rc.buffer_end) ++ rc_read(&rc); ++ ((unsigned char *)&header)[i] = *rc.ptr++; ++ } ++ ++ if (header.pos >= (9 * 5 * 5)) ++ error("bad header"); ++ ++ mi = 0; ++ lc = header.pos; ++ while (lc >= 9) { ++ mi++; ++ lc -= 9; ++ } ++ pb = 0; ++ lp = mi; ++ while (lp >= 5) { ++ pb++; ++ lp -= 5; ++ } ++ pos_state_mask = (1 << pb) - 1; ++ literal_pos_mask = (1 << lp) - 1; ++ ++ ENDIAN_CONVERT(header.dict_size); ++ ENDIAN_CONVERT(header.dst_size); ++ ++ if (header.dict_size == 0) ++ header.dict_size = 1; ++ ++ if (output) ++ wr.buffer = output; ++ else { ++ wr.bufsize = MIN(header.dst_size, header.dict_size); ++ wr.buffer = large_malloc(wr.bufsize); ++ } ++ if (wr.buffer == NULL) ++ goto exit_1; ++ ++ num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); ++ p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); ++ if (p == 0) ++ goto exit_2; ++ num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); ++ for (i = 0; i < num_probs; i++) ++ p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; ++ ++ rc_init_code(&rc); ++ ++ while (get_pos(&wr) < header.dst_size) { ++ int pos_state = get_pos(&wr) & pos_state_mask; ++ uint16_t *prob = p + LZMA_IS_MATCH + ++ (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; ++ if (rc_is_bit_0(&rc, prob)) ++ process_bit0(&wr, &rc, &cst, p, pos_state, prob, ++ lc, literal_pos_mask); ++ else { ++ process_bit1(&wr, &rc, &cst, p, pos_state, prob); ++ if (cst.rep0 == 0) ++ break; ++ } ++ } ++ ++ if (posp) ++ *posp = rc.ptr-rc.buffer; ++ if (wr.flush) ++ wr.flush(wr.buffer, wr.buffer_pos); ++ ret = 0; ++ large_free(p); ++exit_2: ++ if (!output) ++ large_free(wr.buffer); ++exit_1: ++ if (!buf) ++ free(inbuf); ++exit_0: ++ return ret; ++} ++ ++#define decompress unlzma +diff --git a/lib/zlib_inflate/inflate.h b/lib/zlib_inflate/inflate.h +index df8a6c9..3d17b3d 100644 +--- a/lib/zlib_inflate/inflate.h ++++ b/lib/zlib_inflate/inflate.h +@@ -1,3 +1,6 @@ ++#ifndef INFLATE_H ++#define INFLATE_H ++ + /* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +@@ -105,3 +108,4 @@ struct inflate_state { + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + }; ++#endif +diff --git a/lib/zlib_inflate/inftrees.h b/lib/zlib_inflate/inftrees.h +index 5f5219b..b70b473 100644 +--- a/lib/zlib_inflate/inftrees.h ++++ b/lib/zlib_inflate/inftrees.h +@@ -1,3 +1,6 @@ ++#ifndef INFTREES_H ++#define INFTREES_H ++ + /* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +@@ -53,3 +56,4 @@ typedef enum { + extern int zlib_inflate_table (codetype type, unsigned short *lens, + unsigned codes, code **table, + unsigned *bits, unsigned short *work); ++#endif +diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib +index e063657..3b949a3 100644 +--- a/scripts/Makefile.lib ++++ b/scripts/Makefile.lib +@@ -186,3 +186,17 @@ quiet_cmd_gzip = GZIP $@ + cmd_gzip = gzip -f -9 < $< > $@ + + ++# Bzip2 ++# --------------------------------------------------------------------------- ++ ++# Bzip2 does not include size in file... so we have to fake that ++size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size ++ ++quiet_cmd_bzip2 = BZIP2 $@ ++cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false) ++ ++# Lzma ++# --------------------------------------------------------------------------- ++ ++quiet_cmd_lzma = LZMA $@ ++cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false) +diff --git a/scripts/bin_size b/scripts/bin_size +new file mode 100644 +index 0000000..43e1b36 +--- /dev/null ++++ b/scripts/bin_size +@@ -0,0 +1,10 @@ ++#!/bin/sh ++ ++if [ $# = 0 ] ; then ++ echo Usage: $0 file ++fi ++ ++size_dec=`stat -c "%s" $1` ++size_hex_echo_string=`printf "%08x" $size_dec | ++ sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'` ++/bin/echo -ne $size_hex_echo_string +diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh +index 5f3415f..3eea8f1 100644 +--- a/scripts/gen_initramfs_list.sh ++++ b/scripts/gen_initramfs_list.sh +@@ -5,7 +5,7 @@ + # Released under the terms of the GNU GPL + # + # Generate a cpio packed initramfs. It uses gen_init_cpio to generate +-# the cpio archive, and gzip to pack it. ++# the cpio archive, and then compresses it. + # The script may also be used to generate the inputfile used for gen_init_cpio + # This script assumes that gen_init_cpio is located in usr/ directory + +@@ -16,8 +16,8 @@ usage() { + cat << EOF + Usage: + $0 [-o ] [-u ] [-g ] {-d | } ... +- -o Create gzipped initramfs file named using +- gen_init_cpio and gzip ++ -o Create compressed initramfs file named using ++ gen_init_cpio and compressor depending on the extension + -u User ID to map to user ID 0 (root). + is only meaningful if is a + directory. "squash" forces all files to uid 0. +@@ -225,6 +225,7 @@ cpio_list= + output="/dev/stdout" + output_file="" + is_cpio_compressed= ++compr="gzip -9 -f" + + arg="$1" + case "$arg" in +@@ -233,11 +234,15 @@ case "$arg" in + echo "deps_initramfs := \\" + shift + ;; +- "-o") # generate gzipped cpio image named $1 ++ "-o") # generate compressed cpio image named $1 + shift + output_file="$1" + cpio_list="$(mktemp ${TMPDIR:-/tmp}/cpiolist.XXXXXX)" + output=${cpio_list} ++ echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f" ++ echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f" ++ echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f" ++ echo "$output_file" | grep -q "\.cpio$" && compr="cat" + shift + ;; + esac +@@ -274,7 +279,7 @@ while [ $# -gt 0 ]; do + esac + done + +-# If output_file is set we will generate cpio archive and gzip it ++# If output_file is set we will generate cpio archive and compress it + # we are carefull to delete tmp files + if [ ! -z ${output_file} ]; then + if [ -z ${cpio_file} ]; then +@@ -287,7 +292,8 @@ if [ ! -z ${output_file} ]; then + if [ "${is_cpio_compressed}" = "compressed" ]; then + cat ${cpio_tfile} > ${output_file} + else +- cat ${cpio_tfile} | gzip -f -9 - > ${output_file} ++ (cat ${cpio_tfile} | ${compr} - > ${output_file}) \ ++ || (rm -f ${output_file} ; false) + fi + [ -z ${cpio_file} ] && rm ${cpio_tfile} + fi +diff --git a/usr/Kconfig b/usr/Kconfig +index 86cecb5..43a3a0f 100644 +--- a/usr/Kconfig ++++ b/usr/Kconfig +@@ -44,3 +44,92 @@ config INITRAMFS_ROOT_GID + owned by group root in the initial ramdisk image. + + If you are not sure, leave it set to "0". ++ ++config RD_GZIP ++ bool "Initial ramdisk compressed using gzip" ++ default y ++ depends on BLK_DEV_INITRD=y ++ select DECOMPRESS_GZIP ++ help ++ Support loading of a gzip encoded initial ramdisk or cpio buffer. ++ If unsure, say Y. ++ ++config RD_BZIP2 ++ bool "Initial ramdisk compressed using bzip2" ++ default n ++ depends on BLK_DEV_INITRD=y ++ select DECOMPRESS_BZIP2 ++ help ++ Support loading of a bzip2 encoded initial ramdisk or cpio buffer ++ If unsure, say N. ++ ++config RD_LZMA ++ bool "Initial ramdisk compressed using lzma" ++ default n ++ depends on BLK_DEV_INITRD=y ++ select DECOMPRESS_LZMA ++ help ++ Support loading of a lzma encoded initial ramdisk or cpio buffer ++ If unsure, say N. ++ ++choice ++ prompt "Built-in initramfs compression mode" ++ help ++ This setting is only meaningful if the INITRAMFS_SOURCE is ++ set. It decides by which algorithm the INITRAMFS_SOURCE will ++ be compressed. ++ Several compression algorithms are available, which differ ++ in efficiency, compression and decompression speed. ++ Compression speed is only relevant when building a kernel. ++ Decompression speed is relevant at each boot. ++ ++ If you have any problems with bzip2 or lzma compressed ++ initramfs, mail me (Alain Knaff) . ++ ++ High compression options are mostly useful for users who ++ are low on disk space (embedded systems), but for whom ram ++ size matters less. ++ ++ If in doubt, select 'gzip' ++ ++config INITRAMFS_COMPRESSION_NONE ++ bool "None" ++ help ++ Do not compress the built-in initramfs at all. This may ++ sound wasteful in space, but, you should be aware that the ++ built-in initramfs will be compressed at a later stage ++ anyways along with the rest of the kernel, on those ++ architectures that support this. ++ However, not compressing the initramfs may lead to slightly ++ higher memory consumption during a short time at boot, while ++ both the cpio image and the unpacked filesystem image will ++ be present in memory simultaneously ++ ++config INITRAMFS_COMPRESSION_GZIP ++ bool "Gzip" ++ depends on RD_GZIP ++ help ++ The old and tried gzip compression. Its compression ratio is ++ the poorest among the 3 choices; however its speed (both ++ compression and decompression) is the fastest. ++ ++config INITRAMFS_COMPRESSION_BZIP2 ++ bool "Bzip2" ++ depends on RD_BZIP2 ++ help ++ Its compression ratio and speed is intermediate. ++ Decompression speed is slowest among the three. The initramfs ++ size is about 10% smaller with bzip2, in comparison to gzip. ++ Bzip2 uses a large amount of memory. For modern kernels you ++ will need at least 8MB RAM or more for booting. ++ ++config INITRAMFS_COMPRESSION_LZMA ++ bool "LZMA" ++ depends on RD_LZMA ++ help ++ The most recent compression algorithm. ++ Its ratio is best, decompression speed is between the other ++ two. Compression is slowest. The initramfs size is about 33% ++ smaller with LZMA in comparison to gzip. ++ ++endchoice +diff --git a/usr/Makefile b/usr/Makefile +index 201f27f..b84894b 100644 +--- a/usr/Makefile ++++ b/usr/Makefile +@@ -6,13 +6,25 @@ klibcdirs:; + PHONY += klibcdirs + + ++# No compression ++suffix_$(CONFIG_INITRAMFS_COMPRESSION_NONE) = ++ ++# Gzip, but no bzip2 ++suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP) = .gz ++ ++# Bzip2 ++suffix_$(CONFIG_INITRAMFS_COMPRESSION_BZIP2) = .bz2 ++ ++# Lzma ++suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZMA) = .lzma ++ + # Generate builtin.o based on initramfs_data.o +-obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o ++obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data$(suffix_y).o + +-# initramfs_data.o contains the initramfs_data.cpio.gz image. ++# initramfs_data.o contains the compressed initramfs_data.cpio image. + # The image is included using .incbin, a dependency which is not + # tracked automatically. +-$(obj)/initramfs_data.o: $(obj)/initramfs_data.cpio.gz FORCE ++$(obj)/initramfs_data$(suffix_y).o: $(obj)/initramfs_data.cpio$(suffix_y) FORCE + + ##### + # Generate the initramfs cpio archive +@@ -25,28 +37,28 @@ ramfs-args := \ + $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \ + $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID)) + +-# .initramfs_data.cpio.gz.d is used to identify all files included ++# .initramfs_data.cpio.d is used to identify all files included + # in initramfs and to detect if any files are added/removed. + # Removed files are identified by directory timestamp being updated + # The dependency list is generated by gen_initramfs.sh -l +-ifneq ($(wildcard $(obj)/.initramfs_data.cpio.gz.d),) +- include $(obj)/.initramfs_data.cpio.gz.d ++ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),) ++ include $(obj)/.initramfs_data.cpio.d + endif + + quiet_cmd_initfs = GEN $@ + cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input) + +-targets := initramfs_data.cpio.gz ++targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio + # do not try to update files included in initramfs + $(deps_initramfs): ; + + $(deps_initramfs): klibcdirs +-# We rebuild initramfs_data.cpio.gz if: +-# 1) Any included file is newer then initramfs_data.cpio.gz ++# We rebuild initramfs_data.cpio if: ++# 1) Any included file is newer then initramfs_data.cpio + # 2) There are changes in which files are included (added or deleted) +-# 3) If gen_init_cpio are newer than initramfs_data.cpio.gz ++# 3) If gen_init_cpio are newer than initramfs_data.cpio + # 4) arguments to gen_initramfs.sh changes +-$(obj)/initramfs_data.cpio.gz: $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs +- $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.gz.d ++$(obj)/initramfs_data.cpio$(suffix_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs ++ $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d + $(call if_changed,initfs) + +diff --git a/usr/initramfs_data.S b/usr/initramfs_data.S +index c2e1ad4..7c6973d 100644 +--- a/usr/initramfs_data.S ++++ b/usr/initramfs_data.S +@@ -26,5 +26,5 @@ SECTIONS + */ + + .section .init.ramfs,"a" +-.incbin "usr/initramfs_data.cpio.gz" ++.incbin "usr/initramfs_data.cpio" + +diff --git a/usr/initramfs_data.bz2.S b/usr/initramfs_data.bz2.S +new file mode 100644 +index 0000000..bc54d09 +--- /dev/null ++++ b/usr/initramfs_data.bz2.S +@@ -0,0 +1,29 @@ ++/* ++ initramfs_data includes the compressed binary that is the ++ filesystem used for early user space. ++ Note: Older versions of "as" (prior to binutils 2.11.90.0.23 ++ released on 2001-07-14) dit not support .incbin. ++ If you are forced to use older binutils than that then the ++ following trick can be applied to create the resulting binary: ++ ++ ++ ld -m elf_i386 --format binary --oformat elf32-i386 -r \ ++ -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o ++ ld -m elf_i386 -r -o built-in.o initramfs_data.o ++ ++ initramfs_data.scr looks like this: ++SECTIONS ++{ ++ .init.ramfs : { *(.data) } ++} ++ ++ The above example is for i386 - the parameters vary from architectures. ++ Eventually look up LDFLAGS_BLOB in an older version of the ++ arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced. ++ ++ Using .incbin has the advantage over ld that the correct flags are set ++ in the ELF header, as required by certain architectures. ++*/ ++ ++.section .init.ramfs,"a" ++.incbin "usr/initramfs_data.cpio.bz2" +diff --git a/usr/initramfs_data.gz.S b/usr/initramfs_data.gz.S +new file mode 100644 +index 0000000..890c8dd +--- /dev/null ++++ b/usr/initramfs_data.gz.S +@@ -0,0 +1,29 @@ ++/* ++ initramfs_data includes the compressed binary that is the ++ filesystem used for early user space. ++ Note: Older versions of "as" (prior to binutils 2.11.90.0.23 ++ released on 2001-07-14) dit not support .incbin. ++ If you are forced to use older binutils than that then the ++ following trick can be applied to create the resulting binary: ++ ++ ++ ld -m elf_i386 --format binary --oformat elf32-i386 -r \ ++ -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o ++ ld -m elf_i386 -r -o built-in.o initramfs_data.o ++ ++ initramfs_data.scr looks like this: ++SECTIONS ++{ ++ .init.ramfs : { *(.data) } ++} ++ ++ The above example is for i386 - the parameters vary from architectures. ++ Eventually look up LDFLAGS_BLOB in an older version of the ++ arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced. ++ ++ Using .incbin has the advantage over ld that the correct flags are set ++ in the ELF header, as required by certain architectures. ++*/ ++ ++.section .init.ramfs,"a" ++.incbin "usr/initramfs_data.cpio.gz" +diff --git a/usr/initramfs_data.lzma.S b/usr/initramfs_data.lzma.S +new file mode 100644 +index 0000000..e11469e +--- /dev/null ++++ b/usr/initramfs_data.lzma.S +@@ -0,0 +1,29 @@ ++/* ++ initramfs_data includes the compressed binary that is the ++ filesystem used for early user space. ++ Note: Older versions of "as" (prior to binutils 2.11.90.0.23 ++ released on 2001-07-14) dit not support .incbin. ++ If you are forced to use older binutils than that then the ++ following trick can be applied to create the resulting binary: ++ ++ ++ ld -m elf_i386 --format binary --oformat elf32-i386 -r \ ++ -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o ++ ld -m elf_i386 -r -o built-in.o initramfs_data.o ++ ++ initramfs_data.scr looks like this: ++SECTIONS ++{ ++ .init.ramfs : { *(.data) } ++} ++ ++ The above example is for i386 - the parameters vary from architectures. ++ Eventually look up LDFLAGS_BLOB in an older version of the ++ arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced. ++ ++ Using .incbin has the advantage over ld that the correct flags are set ++ in the ELF header, as required by certain architectures. ++*/ ++ ++.section .init.ramfs,"a" ++.incbin "usr/initramfs_data.cpio.lzma" +-- +To unsubscribe from this list: send the line "unsubscribe linux-kernel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html +Please read the FAQ at http://www.tux.org/lkml/ diff --git a/kernel-config.h b/kernel-config.h new file mode 100644 index 00000000..9d1c14f7 --- /dev/null +++ b/kernel-config.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_CONFIG_H +#define _LINUX_CONFIG_H + +#include + +#endif diff --git a/kernel-esfq.patch b/kernel-esfq.patch new file mode 100644 index 00000000..4f5457da --- /dev/null +++ b/kernel-esfq.patch @@ -0,0 +1,796 @@ +diff -Naur linux-2.6.24.orig/include/linux/pkt_sched.h linux-2.6.24/include/linux/pkt_sched.h +--- linux-2.6.24.orig/include/linux/pkt_sched.h 2008-01-24 14:58:37.000000000 -0800 ++++ linux-2.6.24/include/linux/pkt_sched.h 2008-01-28 00:27:12.000000000 -0800 +@@ -157,6 +157,33 @@ + * to change these parameters in compile time. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_ESFQ_HASH_CLASSIC, ++ TCA_ESFQ_HASH_DST, ++ TCA_ESFQ_HASH_SRC, ++ TCA_ESFQ_HASH_FWMARK, ++ /* conntrack */ ++ TCA_ESFQ_HASH_CTORIGDST, ++ TCA_ESFQ_HASH_CTORIGSRC, ++ TCA_ESFQ_HASH_CTREPLDST, ++ TCA_ESFQ_HASH_CTREPLSRC, ++ TCA_ESFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ + /* RED section */ + + enum +diff -Naur linux-2.6.24.orig/net/sched/Kconfig linux-2.6.24/net/sched/Kconfig +--- linux-2.6.24.orig/net/sched/Kconfig 2008-01-24 14:58:37.000000000 -0800 ++++ linux-2.6.24/net/sched/Kconfig 2008-01-28 00:27:12.000000000 -0800 +@@ -139,6 +139,37 @@ + To compile this code as a module, choose M here: the + module will be called sch_sfq. + ++config NET_SCH_ESFQ ++ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" ++ ---help--- ++ Say Y here if you want to use the Enhanced Stochastic Fairness ++ Queueing (ESFQ) packet scheduling algorithm for some of your network ++ devices or as a leaf discipline for a classful qdisc such as HTB or ++ CBQ (see the top of for details and ++ references to the SFQ algorithm). ++ ++ This is an enchanced SFQ version which allows you to control some ++ hardcoded values in the SFQ scheduler. ++ ++ ESFQ also adds control of the hash function used to identify packet ++ flows. The original SFQ discipline hashes by connection; ESFQ add ++ several other hashing methods, such as by src IP or by dst IP, which ++ can be more fair to users in some networking situations. ++ ++ To compile this code as a module, choose M here: the ++ module will be called sch_esfq. ++ ++config NET_SCH_ESFQ_NFCT ++ bool "Connection Tracking Hash Types" ++ depends on NET_SCH_ESFQ && NF_CONNTRACK ++ ---help--- ++ Say Y here to enable support for hashing based on netfilter connection ++ tracking information. This is useful for a router that is also using ++ NAT to connect privately-addressed hosts to the Internet. If you want ++ to provide fair distribution of upstream bandwidth, ESFQ must use ++ connection tracking information, since all outgoing packets will share ++ the same source address. ++ + config NET_SCH_TEQL + tristate "True Link Equalizer (TEQL)" + ---help--- +diff -Naur linux-2.6.24.orig/net/sched/Makefile linux-2.6.24/net/sched/Makefile +--- linux-2.6.24.orig/net/sched/Makefile 2008-01-24 14:58:37.000000000 -0800 ++++ linux-2.6.24/net/sched/Makefile 2008-01-28 00:27:12.000000000 -0800 +@@ -23,6 +23,7 @@ + obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o + obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o ++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o + obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +diff -Naur linux-2.6.24.orig/net/sched/sch_esfq.c linux-2.6.24/net/sched/sch_esfq.c +--- linux-2.6.24.orig/net/sched/sch_esfq.c 1969-12-31 16:00:00.000000000 -0800 ++++ linux-2.6.24/net/sched/sch_esfq.c 2008-01-28 00:27:22.000000000 -0800 +@@ -0,0 +1,703 @@ ++/* ++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Alexey Kuznetsov, ++ * ++ * Changes: Alexander Atanasov, ++ * Added dynamic depth,limit,divisor,hash_kind options. ++ * Added dst and src hashes. ++ * ++ * Alexander Clouter, ++ * Ported ESFQ to Linux 2.6. ++ * ++ * Corey Hickey, ++ * Maintenance of the Linux 2.6 port. ++ * Added fwmark hash (thanks to Robert Kurjata). ++ * Added usage of jhash. ++ * Added conntrack support. ++ * Added ctnatchg hash (thanks to Ben Pfountz). ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Stochastic Fairness Queuing algorithm. ++ For more comments look at sch_sfq.c. ++ The difference is that you can change limit, depth, ++ hash table size and choose alternate hash types. ++ ++ classic: same as in sch_sfq.c ++ dst: destination IP address ++ src: source IP address ++ fwmark: netfilter mark value ++ ctorigdst: original destination IP address ++ ctorigsrc: original source IP address ++ ctrepldst: reply destination IP address ++ ctreplsrc: reply source IP ++ ++*/ ++ ++#define ESFQ_HEAD 0 ++#define ESFQ_TAIL 1 ++ ++/* This type should contain at least SFQ_DEPTH*2 values */ ++typedef unsigned int esfq_index; ++ ++struct esfq_head ++{ ++ esfq_index next; ++ esfq_index prev; ++}; ++ ++struct esfq_sched_data ++{ ++/* Parameters */ ++ int perturb_period; ++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ ++ int limit; ++ unsigned depth; ++ unsigned hash_divisor; ++ unsigned hash_kind; ++/* Variables */ ++ struct timer_list perturb_timer; ++ int perturbation; ++ esfq_index tail; /* Index of current slot in round */ ++ esfq_index max_depth; /* Maximal depth */ ++ ++ esfq_index *ht; /* Hash table */ ++ esfq_index *next; /* Active slots link */ ++ short *allot; /* Current allotment per slot */ ++ unsigned short *hash; /* Hash value indexed by slots */ ++ struct sk_buff_head *qs; /* Slot queue */ ++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ ++}; ++ ++/* This contains the info we will hash. */ ++struct esfq_packet_info ++{ ++ u32 proto; /* protocol or port */ ++ u32 src; /* source from packet header */ ++ u32 dst; /* destination from packet header */ ++ u32 ctorigsrc; /* original source from conntrack */ ++ u32 ctorigdst; /* original destination from conntrack */ ++ u32 ctreplsrc; /* reply source from conntrack */ ++ u32 ctrepldst; /* reply destination from conntrack */ ++ u32 mark; /* netfilter mark (fwmark) */ ++}; ++ ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) ++{ ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) ++{ ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++ struct esfq_packet_info info; ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct = nf_ct_get(skb, &ctinfo); ++#endif ++ ++ switch (skb->protocol) { ++ case __constant_htons(ETH_P_IP): ++ { ++ struct iphdr *iph = ip_hdr(skb); ++ info.dst = iph->daddr; ++ info.src = iph->saddr; ++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && ++ (iph->protocol == IPPROTO_TCP || ++ iph->protocol == IPPROTO_UDP || ++ iph->protocol == IPPROTO_SCTP || ++ iph->protocol == IPPROTO_DCCP || ++ iph->protocol == IPPROTO_ESP)) ++ info.proto = *(((u32*)iph) + iph->ihl); ++ else ++ info.proto = iph->protocol; ++ break; ++ } ++ case __constant_htons(ETH_P_IPV6): ++ { ++ struct ipv6hdr *iph = ipv6_hdr(skb); ++ /* Hash ipv6 addresses into a u32. This isn't ideal, ++ * but the code is simple. */ ++ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); ++ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); ++ if (iph->nexthdr == IPPROTO_TCP || ++ iph->nexthdr == IPPROTO_UDP || ++ iph->nexthdr == IPPROTO_SCTP || ++ iph->nexthdr == IPPROTO_DCCP || ++ iph->nexthdr == IPPROTO_ESP) ++ info.proto = *(u32*)&iph[1]; ++ else ++ info.proto = iph->nexthdr; ++ break; ++ } ++ default: ++ info.dst = (u32)(unsigned long)skb->dst; ++ info.src = (u32)(unsigned long)skb->sk; ++ info.proto = skb->protocol; ++ } ++ ++ info.mark = skb->mark; ++ ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ /* defaults if there is no conntrack info */ ++ info.ctorigsrc = info.src; ++ info.ctorigdst = info.dst; ++ info.ctreplsrc = info.dst; ++ info.ctrepldst = info.src; ++ /* collect conntrack info */ ++ if (ct && ct != &nf_conntrack_untracked) { ++ if (skb->protocol == __constant_htons(ETH_P_IP)) { ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; ++ } ++ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { ++ /* Again, hash ipv6 addresses into a single u32. */ ++ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); ++ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); ++ } ++ ++ } ++#endif ++ ++ switch(q->hash_kind) { ++ case TCA_ESFQ_HASH_CLASSIC: ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++ case TCA_ESFQ_HASH_DST: ++ return esfq_jhash_1word(q, info.dst); ++ case TCA_ESFQ_HASH_SRC: ++ return esfq_jhash_1word(q, info.src); ++ case TCA_ESFQ_HASH_FWMARK: ++ return esfq_jhash_1word(q, info.mark); ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ case TCA_ESFQ_HASH_CTORIGDST: ++ return esfq_jhash_1word(q, info.ctorigdst); ++ case TCA_ESFQ_HASH_CTORIGSRC: ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ case TCA_ESFQ_HASH_CTREPLDST: ++ return esfq_jhash_1word(q, info.ctrepldst); ++ case TCA_ESFQ_HASH_CTREPLSRC: ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ case TCA_ESFQ_HASH_CTNATCHG: ++ { ++ if (info.ctorigdst == info.ctreplsrc) ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ } ++#endif ++ default: ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); ++ } ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++} ++ ++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d = q->qs[x].qlen + q->depth; ++ ++ p = d; ++ n = q->dep[d].next; ++ q->dep[x].next = n; ++ q->dep[x].prev = p; ++ q->dep[p].next = q->dep[n].prev = x; ++} ++ ++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ ++ if (n == p && q->max_depth == q->qs[x].qlen + 1) ++ q->max_depth--; ++ ++ esfq_link(q, x); ++} ++ ++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ d = q->qs[x].qlen; ++ if (q->max_depth < d) ++ q->max_depth = d; ++ ++ esfq_link(q, x); ++} ++ ++static unsigned int esfq_drop(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index d = q->max_depth; ++ struct sk_buff *skb; ++ unsigned int len; ++ ++ /* Queue is full! Find the longest slot and ++ drop a packet from it */ ++ ++ if (d > 1) { ++ esfq_index x = q->dep[d+q->depth].next; ++ skb = q->qs[x].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[x]); ++ kfree_skb(skb); ++ esfq_dec(q, x); ++ sch->q.qlen--; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ if (d == 1) { ++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ ++ d = q->next[q->tail]; ++ q->next[q->tail] = q->next[d]; ++ q->allot[q->next[d]] += q->quantum; ++ skb = q->qs[d].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[d]); ++ kfree_skb(skb); ++ esfq_dec(q, d); ++ sch->q.qlen--; ++ q->ht[q->hash[d]] = q->depth; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ return 0; ++} ++ ++static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) ++{ ++ unsigned hash = esfq_hash(q, skb); ++ unsigned depth = q->depth; ++ esfq_index x; ++ ++ x = q->ht[hash]; ++ if (x == depth) { ++ q->ht[hash] = x = q->dep[depth].next; ++ q->hash[x] = hash; ++ } ++ ++ if (end == ESFQ_TAIL) ++ __skb_queue_tail(&q->qs[x], skb); ++ else ++ __skb_queue_head(&q->qs[x], skb); ++ ++ esfq_inc(q, x); ++ if (q->qs[x].qlen == 1) { /* The flow is new */ ++ if (q->tail == depth) { /* It is the first flow */ ++ q->tail = x; ++ q->next[x] = x; ++ q->allot[x] = q->quantum; ++ } else { ++ q->next[x] = q->next[q->tail]; ++ q->next[q->tail] = x; ++ q->tail = x; ++ } ++ } ++} ++ ++static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_enqueue(skb, q, ESFQ_TAIL); ++ sch->qstats.backlog += skb->len; ++ if (++sch->q.qlen < q->limit-1) { ++ sch->bstats.bytes += skb->len; ++ sch->bstats.packets++; ++ return 0; ++ } ++ ++ sch->qstats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++ ++static int esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_enqueue(skb, q, ESFQ_HEAD); ++ sch->qstats.backlog += skb->len; ++ if (++sch->q.qlen < q->limit - 1) { ++ sch->qstats.requeues++; ++ return 0; ++ } ++ ++ sch->qstats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) ++{ ++ struct sk_buff *skb; ++ unsigned depth = q->depth; ++ esfq_index a, old_a; ++ ++ /* No active slots */ ++ if (q->tail == depth) ++ return NULL; ++ ++ a = old_a = q->next[q->tail]; ++ ++ /* Grab packet */ ++ skb = __skb_dequeue(&q->qs[a]); ++ esfq_dec(q, a); ++ ++ /* Is the slot empty? */ ++ if (q->qs[a].qlen == 0) { ++ q->ht[q->hash[a]] = depth; ++ a = q->next[a]; ++ if (a == old_a) { ++ q->tail = depth; ++ return skb; ++ } ++ q->next[q->tail] = a; ++ q->allot[a] += q->quantum; ++ } else if ((q->allot[a] -= skb->len) <= 0) { ++ q->tail = a; ++ a = q->next[a]; ++ q->allot[a] += q->quantum; ++ } ++ ++ return skb; ++} ++ ++static struct sk_buff *esfq_dequeue(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct sk_buff *skb; ++ ++ skb = esfq_q_dequeue(q); ++ if (skb == NULL) ++ return NULL; ++ sch->q.qlen--; ++ sch->qstats.backlog -= skb->len; ++ return skb; ++} ++ ++static void esfq_q_destroy(struct esfq_sched_data *q) ++{ ++ del_timer(&q->perturb_timer); ++ if(q->ht) ++ kfree(q->ht); ++ if(q->dep) ++ kfree(q->dep); ++ if(q->next) ++ kfree(q->next); ++ if(q->allot) ++ kfree(q->allot); ++ if(q->hash) ++ kfree(q->hash); ++ if(q->qs) ++ kfree(q->qs); ++} ++ ++static void esfq_destroy(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_destroy(q); ++} ++ ++ ++static void esfq_reset(struct Qdisc* sch) ++{ ++ struct sk_buff *skb; ++ ++ while ((skb = esfq_dequeue(sch)) != NULL) ++ kfree_skb(skb); ++} ++ ++static void esfq_perturbation(unsigned long arg) ++{ ++ struct Qdisc *sch = (struct Qdisc*)arg; ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ ++ q->perturbation = net_random()&0x1F; ++ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++} ++ ++static unsigned int esfq_check_hash(unsigned int kind) ++{ ++ switch (kind) { ++ case TCA_ESFQ_HASH_CTORIGDST: ++ case TCA_ESFQ_HASH_CTORIGSRC: ++ case TCA_ESFQ_HASH_CTREPLDST: ++ case TCA_ESFQ_HASH_CTREPLSRC: ++ case TCA_ESFQ_HASH_CTNATCHG: ++#ifndef CONFIG_NET_SCH_ESFQ_NFCT ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); ++ return TCA_ESFQ_HASH_CLASSIC; ++ } ++#endif ++ case TCA_ESFQ_HASH_CLASSIC: ++ case TCA_ESFQ_HASH_DST: ++ case TCA_ESFQ_HASH_SRC: ++ case TCA_ESFQ_HASH_FWMARK: ++ return kind; ++ default: ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); ++ return TCA_ESFQ_HASH_CLASSIC; ++ } ++ } ++} ++ ++static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) ++{ ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ esfq_index p = ~0U/2; ++ int i; ++ ++ if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) ++ return -EINVAL; ++ ++ q->perturbation = 0; ++ q->hash_kind = TCA_ESFQ_HASH_CLASSIC; ++ q->max_depth = 0; ++ if (opt == NULL) { ++ q->perturb_period = 0; ++ q->hash_divisor = 1024; ++ q->tail = q->limit = q->depth = 128; ++ ++ } else { ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ if (ctl->quantum) ++ q->quantum = ctl->quantum; ++ q->perturb_period = ctl->perturb_period*HZ; ++ q->hash_divisor = ctl->divisor ? : 1024; ++ q->tail = q->limit = q->depth = ctl->flows ? : 128; ++ ++ if ( q->depth > p - 1 ) ++ return -EINVAL; ++ ++ if (ctl->limit) ++ q->limit = min_t(u32, ctl->limit, q->depth); ++ ++ if (ctl->hash_kind) { ++ q->hash_kind = esfq_check_hash(ctl->hash_kind); ++ } ++ } ++ ++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->ht) ++ goto err_case; ++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); ++ if (!q->dep) ++ goto err_case; ++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->next) ++ goto err_case; ++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); ++ if (!q->allot) ++ goto err_case; ++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); ++ if (!q->hash) ++ goto err_case; ++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); ++ if (!q->qs) ++ goto err_case; ++ ++ for (i=0; i< q->hash_divisor; i++) ++ q->ht[i] = q->depth; ++ for (i=0; idepth; i++) { ++ skb_queue_head_init(&q->qs[i]); ++ q->dep[i+q->depth].next = i+q->depth; ++ q->dep[i+q->depth].prev = i+q->depth; ++ } ++ ++ for (i=0; idepth; i++) ++ esfq_link(q, i); ++ return 0; ++err_case: ++ esfq_q_destroy(q); ++ return -ENOBUFS; ++} ++ ++static int esfq_init(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ int err; ++ ++ q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(q, opt))) ++ return err; ++ ++ init_timer(&q->perturb_timer); ++ q->perturb_timer.data = (unsigned long)sch; ++ q->perturb_timer.function = esfq_perturbation; ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++ ++ return 0; ++} ++ ++static int esfq_change(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct esfq_sched_data new; ++ struct sk_buff *skb; ++ int err; ++ ++ /* set up new queue */ ++ memset(&new, 0, sizeof(struct esfq_sched_data)); ++ new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(&new, opt))) ++ return err; ++ ++ /* copy all packets from the old queue to the new queue */ ++ sch_tree_lock(sch); ++ while ((skb = esfq_q_dequeue(q)) != NULL) ++ esfq_q_enqueue(skb, &new, ESFQ_TAIL); ++ ++ /* clean up the old queue */ ++ esfq_q_destroy(q); ++ ++ /* copy elements of the new queue into the old queue */ ++ q->perturb_period = new.perturb_period; ++ q->quantum = new.quantum; ++ q->limit = new.limit; ++ q->depth = new.depth; ++ q->hash_divisor = new.hash_divisor; ++ q->hash_kind = new.hash_kind; ++ q->tail = new.tail; ++ q->max_depth = new.max_depth; ++ q->ht = new.ht; ++ q->dep = new.dep; ++ q->next = new.next; ++ q->allot = new.allot; ++ q->hash = new.hash; ++ q->qs = new.qs; ++ ++ /* finish up */ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } else { ++ q->perturbation = 0; ++ } ++ sch_tree_unlock(sch); ++ return 0; ++} ++ ++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned char *b = skb_tail_pointer(skb); ++ struct tc_esfq_qopt opt; ++ ++ opt.quantum = q->quantum; ++ opt.perturb_period = q->perturb_period/HZ; ++ ++ opt.limit = q->limit; ++ opt.divisor = q->hash_divisor; ++ opt.flows = q->depth; ++ opt.hash_kind = q->hash_kind; ++ ++ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); ++ ++ return skb->len; ++ ++nla_put_failure: ++ nlmsg_trim(skb, b); ++ return -1; ++} ++ ++static struct Qdisc_ops esfq_qdisc_ops = ++{ ++ .next = NULL, ++ .cl_ops = NULL, ++ .id = "esfq", ++ .priv_size = sizeof(struct esfq_sched_data), ++ .enqueue = esfq_enqueue, ++ .dequeue = esfq_dequeue, ++ .requeue = esfq_requeue, ++ .drop = esfq_drop, ++ .init = esfq_init, ++ .reset = esfq_reset, ++ .destroy = esfq_destroy, ++ .change = esfq_change, ++ .dump = esfq_dump, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init esfq_module_init(void) ++{ ++ return register_qdisc(&esfq_qdisc_ops); ++} ++static void __exit esfq_module_exit(void) ++{ ++ unregister_qdisc(&esfq_qdisc_ops); ++} ++module_init(esfq_module_init) ++module_exit(esfq_module_exit) ++MODULE_LICENSE("GPL"); diff --git a/kernel-fbcon-margins.patch b/kernel-fbcon-margins.patch new file mode 100644 index 00000000..fd77379f --- /dev/null +++ b/kernel-fbcon-margins.patch @@ -0,0 +1,29 @@ +This fixes "margin colour" (colour used to clear margins - e.g. a half of line +at the bottom of 100x37 console on 800x600 framebuffer). + +I don't know what was the intention behind using attr_bgcol_ec() here, but it +caused using of background colour of last erase character to clear margins - +which definitely isn't what we want... +This patch changes margin colour to black (or colour 0 in palette modes). + + -- Jakub Bogusz + +--- linux-2.6.9/drivers/video/console/bitblit.c.orig 2004-10-20 18:13:32.000000000 +0200 ++++ linux-2.6.9/drivers/video/console/bitblit.c 2004-10-20 18:22:35.153056112 +0200 +@@ -206,7 +206,6 @@ + static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, + int bottom_only) + { +- int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; + unsigned int cw = vc->vc_font.width; + unsigned int ch = vc->vc_font.height; + unsigned int rw = info->var.xres - (vc->vc_cols*cw); +@@ -215,7 +214,7 @@ + unsigned int bs = info->var.yres - bh; + struct fb_fillrect region; + +- region.color = attr_bgcol_ec(bgshift, vc, info); ++ region.color = 0; + region.rop = ROP_COPY; + + if (rw && !bottom_only) { diff --git a/kernel-fbcondecor.patch b/kernel-fbcondecor.patch new file mode 100644 index 00000000..d8e737ee --- /dev/null +++ b/kernel-fbcondecor.patch @@ -0,0 +1,1839 @@ +diff -Naurp -x .git /tmp/linux/Documentation/fb/00-INDEX ./Documentation/fb/00-INDEX +--- /tmp/linux/Documentation/fb/00-INDEX 2008-03-25 07:24:16.000000000 +0100 ++++ ./Documentation/fb/00-INDEX 2008-03-25 23:04:10.000000000 +0100 +@@ -17,6 +17,8 @@ deferred_io.txt + - an introduction to deferred IO. + fbcon.txt + - intro to and usage guide for the framebuffer console (fbcon). ++fbcondecor.txt ++ - info on the Framebuffer Console Decoration + framebuffer.txt + - introduction to frame buffer devices. + imacfb.txt +diff -Naurp -x .git /tmp/linux/Documentation/fb/fbcondecor.txt ./Documentation/fb/fbcondecor.txt +--- /tmp/linux/Documentation/fb/fbcondecor.txt 1970-01-01 01:00:00.000000000 +0100 ++++ ./Documentation/fb/fbcondecor.txt 2008-03-25 23:04:10.000000000 +0100 +@@ -0,0 +1,207 @@ ++What is it? ++----------- ++ ++The framebuffer decorations are a kernel feature which allows displaying a ++background picture on selected consoles. ++ ++What do I need to get it to work? ++--------------------------------- ++ ++To get fbcondecor up-and-running you will have to: ++ 1) get a copy of splashutils [1] or a similar program ++ 2) get some fbcondecor themes ++ 3) build the kernel helper program ++ 4) build your kernel with the FB_CON_DECOR option enabled. ++ ++To get fbcondecor operational right after fbcon initialization is finished, you ++will have to include a theme and the kernel helper into your initramfs image. ++Please refer to splashutils documentation for instructions on how to do that. ++ ++[1] The splashutils package can be downloaded from: ++ http://dev.gentoo.org/~spock/projects/splashutils/ ++ ++The userspace helper ++-------------------- ++ ++The userspace fbcondecor helper (by default: /sbin/fbcondecor_helper) is called by the ++kernel whenever an important event occurs and the kernel needs some kind of ++job to be carried out. Important events include console switches and video ++mode switches (the kernel requests background images and configuration ++parameters for the current console). The fbcondecor helper must be accessible at ++all times. If it's not, fbcondecor will be switched off automatically. ++ ++It's possible to set path to the fbcondecor helper by writing it to ++/proc/sys/kernel/fbcondecor. ++ ++***************************************************************************** ++ ++The information below is mostly technical stuff. There's probably no need to ++read it unless you plan to develop a userspace helper. ++ ++The fbcondecor protocol ++----------------------- ++ ++The fbcondecor protocol defines a communication interface between the kernel and ++the userspace fbcondecor helper. ++ ++The kernel side is responsible for: ++ ++ * rendering console text, using an image as a background (instead of a ++ standard solid color fbcon uses), ++ * accepting commands from the user via ioctls on the fbcondecor device, ++ * calling the userspace helper to set things up as soon as the fb subsystem ++ is initialized. ++ ++The userspace helper is responsible for everything else, including parsing ++configuration files, decompressing the image files whenever the kernel needs ++it, and communicating with the kernel if necessary. ++ ++The fbcondecor protocol specifies how communication is done in both ways: ++kernel->userspace and userspace->helper. ++ ++Kernel -> Userspace ++------------------- ++ ++The kernel communicates with the userspace helper by calling it and specifying ++the task to be done in a series of arguments. ++ ++The arguments follow the pattern: ++ ++ ++All commands defined in fbcondecor protocol v2 have the following parameters: ++ virtual console ++ framebuffer number ++ theme ++ ++Fbcondecor protocol v1 specified an additional 'fbcondecor mode' after the ++framebuffer number. Fbcondecor protocol v1 is deprecated and should not be used. ++ ++Fbcondecor protocol v2 specifies the following commands: ++ ++getpic ++------ ++ The kernel issues this command to request image data. It's up to the ++ userspace helper to find a background image appropriate for the specified ++ theme and the current resolution. The userspace helper should respond by ++ issuing the FBIOCONDECOR_SETPIC ioctl. ++ ++init ++---- ++ The kernel issues this command after the fbcondecor device is created and ++ the fbcondecor interface is initialized. Upon receiving 'init', the userspace ++ helper should parse the kernel command line (/proc/cmdline) or otherwise ++ decide whether fbcondecor is to be activated. ++ ++ To activate fbcondecor on the first console the helper should issue the ++ FBIOCONDECOR_SETCFG, FBIOCONDECOR_SETPIC and FBIOCONDECOR_SETSTATE commands, ++ in the above-mentioned order. ++ ++ When the userspace helper is called in an early phase of the boot process ++ (right after the initialization of fbcon), no filesystems will be mounted. ++ The helper program should mount sysfs and then create the appropriate ++ framebuffer, fbcondecor and tty0 devices (if they don't already exist) to get ++ current display settings and to be able to communicate with the kernel side. ++ It should probably also mount the procfs to be able to parse the kernel ++ command line parameters. ++ ++ Note that the console sem is not held when the kernel calls fbcondecor_helper ++ with the 'init' command. The fbcondecor helper should perform all ioctls with ++ origin set to FBCON_DECOR_IO_ORIG_USER. ++ ++modechange ++---------- ++ The kernel issues this command on a mode change. The helper's response should ++ be similar to the response to the 'init' command. Note that this time the ++ console sem is held and all ioctls must be performed with origin set to ++ FBCON_DECOR_IO_ORIG_KERNEL. ++ ++ ++Userspace -> Kernel ++------------------- ++ ++Userspace programs can communicate with fbcondecor via ioctls on the ++fbcondecor device. These ioctls are to be used by both the userspace helper ++(called only by the kernel) and userspace configuration tools (run by the users). ++ ++The fbcondecor helper should set the origin field to FBCON_DECOR_IO_ORIG_KERNEL ++when doing the appropriate ioctls. All userspace configuration tools should ++use FBCON_DECOR_IO_ORIG_USER. Failure to set the appropriate value in the origin ++field when performing ioctls from the kernel helper will most likely result ++in a console deadlock. ++ ++FBCON_DECOR_IO_ORIG_KERNEL instructs fbcondecor not to try to acquire the console ++semaphore. Not surprisingly, FBCON_DECOR_IO_ORIG_USER instructs it to acquire ++the console sem. ++ ++The framebuffer console decoration provides the following ioctls (all defined in ++linux/fb.h): ++ ++FBIOCONDECOR_SETPIC ++description: loads a background picture for a virtual console ++argument: struct fbcon_decor_iowrapper*; data: struct fb_image* ++notes: ++If called for consoles other than the current foreground one, the picture data ++will be ignored. ++ ++If the current virtual console is running in a 8-bpp mode, the cmap substruct ++of fb_image has to be filled appropriately: start should be set to 16 (first ++16 colors are reserved for fbcon), len to a value <= 240 and red, green and ++blue should point to valid cmap data. The transp field is ingored. The fields ++dx, dy, bg_color, fg_color in fb_image are ignored as well. ++ ++FBIOCONDECOR_SETCFG ++description: sets the fbcondecor config for a virtual console ++argument: struct fbcon_decor_iowrapper*; data: struct vc_decor* ++notes: The structure has to be filled with valid data. ++ ++FBIOCONDECOR_GETCFG ++description: gets the fbcondecor config for a virtual console ++argument: struct fbcon_decor_iowrapper*; data: struct vc_decor* ++ ++FBIOCONDECOR_SETSTATE ++description: sets the fbcondecor state for a virtual console ++argument: struct fbcon_decor_iowrapper*; data: unsigned int* ++ values: 0 = disabled, 1 = enabled. ++ ++FBIOCONDECOR_GETSTATE ++description: gets the fbcondecor state for a virtual console ++argument: struct fbcon_decor_iowrapper*; data: unsigned int* ++ values: as in FBIOCONDECOR_SETSTATE ++ ++Info on used structures: ++ ++Definition of struct vc_decor can be found in linux/console_decor.h. It's ++heavily commented. Note that the 'theme' field should point to a string ++no longer than FBCON_DECOR_THEME_LEN. When FBIOCONDECOR_GETCFG call is ++performed, the theme field should point to a char buffer of length ++FBCON_DECOR_THEME_LEN. ++ ++Definition of struct fbcon_decor_iowrapper can be found in linux/fb.h. ++The fields in this struct have the following meaning: ++ ++vc: ++Virtual console number. ++ ++origin: ++Specifies if the ioctl is performed as a response to a kernel request. The ++fbcondecor helper should set this field to FBCON_DECOR_IO_ORIG_KERNEL, userspace ++programs should set it to FBCON_DECOR_IO_ORIG_USER. This field is necessary to ++avoid console semaphore deadlocks. ++ ++data: ++Pointer to a data structure appropriate for the performed ioctl. Type of ++the data struct is specified in the ioctls description. ++ ++***************************************************************************** ++ ++Credit ++------ ++ ++Original 'bootsplash' project & implementation by: ++ Volker Poplawski , Stefan Reinauer , ++ Steffen Winterfeldt , Michael Schroeder , ++ Ken Wimer . ++ ++Fbcondecor, fbcondecor protocol design, current implementation & docs by: ++ Michal Januszewski ++ +diff -Naurp -x .git /tmp/linux/drivers/Makefile ./drivers/Makefile +--- /tmp/linux/drivers/Makefile 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/Makefile 2008-03-25 23:04:10.000000000 +0100 +@@ -9,6 +9,9 @@ obj-$(CONFIG_HAVE_GPIO_LIB) += gpio/ + obj-$(CONFIG_PCI) += pci/ + obj-$(CONFIG_PARISC) += parisc/ + obj-$(CONFIG_RAPIDIO) += rapidio/ ++# char/ comes before serial/ etc so that the VT console is the boot-time ++# default. ++obj-y += char/ + obj-y += video/ + obj-$(CONFIG_ACPI) += acpi/ + # PnP must come after ACPI since it will eventually need to check if acpi +@@ -18,10 +21,6 @@ obj-$(CONFIG_ARM_AMBA) += amba/ + + obj-$(CONFIG_XEN) += xen/ + +-# char/ comes before serial/ etc so that the VT console is the boot-time +-# default. +-obj-y += char/ +- + obj-$(CONFIG_CONNECTOR) += connector/ + + # i810fb and intelfb depend on char/agp/ +diff -Naurp -x .git /tmp/linux/drivers/video/console/bitblit.c ./drivers/video/console/bitblit.c +--- /tmp/linux/drivers/video/console/bitblit.c 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/video/console/bitblit.c 2008-03-25 23:04:10.000000000 +0100 +@@ -17,6 +17,7 @@ + #include + #include + #include "fbcon.h" ++#include "fbcondecor.h" + + /* + * Accelerated handlers. +@@ -54,6 +55,13 @@ static void bit_bmove(struct vc_data *vc + area.height = height * vc->vc_font.height; + area.width = width * vc->vc_font.width; + ++ if (fbcon_decor_active(info, vc)) { ++ area.sx += vc->vc_decor.tx; ++ area.sy += vc->vc_decor.ty; ++ area.dx += vc->vc_decor.tx; ++ area.dy += vc->vc_decor.ty; ++ } ++ + info->fbops->fb_copyarea(info, &area); + } + +@@ -379,11 +387,15 @@ static void bit_cursor(struct vc_data *v + cursor.image.depth = 1; + cursor.rop = ROP_XOR; + +- if (info->fbops->fb_cursor) +- err = info->fbops->fb_cursor(info, &cursor); ++ if (fbcon_decor_active(info, vc)) { ++ fbcon_decor_cursor(info, &cursor); ++ } else { ++ if (info->fbops->fb_cursor) ++ err = info->fbops->fb_cursor(info, &cursor); + +- if (err) +- soft_cursor(info, &cursor); ++ if (err) ++ soft_cursor(info, &cursor); ++ } + + ops->cursor_reset = 0; + } +diff -Naurp -x .git /tmp/linux/drivers/video/console/cfbcondecor.c ./drivers/video/console/cfbcondecor.c +--- /tmp/linux/drivers/video/console/cfbcondecor.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./drivers/video/console/cfbcondecor.c 2008-03-25 23:11:30.000000000 +0100 +@@ -0,0 +1,471 @@ ++/* ++ * linux/drivers/video/cfbcon_decor.c -- Framebuffer decor render functions ++ * ++ * Copyright (C) 2004 Michal Januszewski ++ * ++ * Code based upon "Bootdecor" (C) 2001-2003 ++ * Volker Poplawski , ++ * Stefan Reinauer , ++ * Steffen Winterfeldt , ++ * Michael Schroeder , ++ * Ken Wimer . ++ * ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file COPYING in the main directory of this archive for ++ * more details. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "fbcon.h" ++#include "fbcondecor.h" ++ ++#define parse_pixel(shift,bpp,type) \ ++ do { \ ++ if (d & (0x80 >> (shift))) \ ++ dd2[(shift)] = fgx; \ ++ else \ ++ dd2[(shift)] = transparent ? *(type *)decor_src : bgx; \ ++ decor_src += (bpp); \ ++ } while (0) \ ++ ++extern int get_color(struct vc_data *vc, struct fb_info *info, ++ u16 c, int is_fg); ++ ++void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc) ++{ ++ int i, j, k; ++ int minlen = min(min(info->var.red.length, info->var.green.length), ++ info->var.blue.length); ++ u32 col; ++ ++ for (j = i = 0; i < 16; i++) { ++ k = color_table[i]; ++ ++ col = ((vc->vc_palette[j++] >> (8-minlen)) ++ << info->var.red.offset); ++ col |= ((vc->vc_palette[j++] >> (8-minlen)) ++ << info->var.green.offset); ++ col |= ((vc->vc_palette[j++] >> (8-minlen)) ++ << info->var.blue.offset); ++ ((u32 *)info->pseudo_palette)[k] = col; ++ } ++} ++ ++void fbcon_decor_renderc(struct fb_info *info, int ypos, int xpos, int height, ++ int width, u8* src, u32 fgx, u32 bgx, u8 transparent) ++{ ++ unsigned int x, y; ++ u32 dd; ++ int bytespp = ((info->var.bits_per_pixel + 7) >> 3); ++ unsigned int d = ypos * info->fix.line_length + xpos * bytespp; ++ unsigned int ds = (ypos * info->var.xres + xpos) * bytespp; ++ u16 dd2[4]; ++ ++ u8* decor_src = (u8 *)(info->bgdecor.data + ds); ++ u8* dst = (u8 *)(info->screen_base + d); ++ ++ if ((ypos + height) > info->var.yres || (xpos + width) > info->var.xres) ++ return; ++ ++ for (y = 0; y < height; y++) { ++ switch (info->var.bits_per_pixel) { ++ ++ case 32: ++ for (x = 0; x < width; x++) { ++ ++ if ((x & 7) == 0) ++ d = *src++; ++ if (d & 0x80) ++ dd = fgx; ++ else ++ dd = transparent ? ++ *(u32 *)decor_src : bgx; ++ ++ d <<= 1; ++ decor_src += 4; ++ fb_writel(dd, dst); ++ dst += 4; ++ } ++ break; ++ case 24: ++ for (x = 0; x < width; x++) { ++ ++ if ((x & 7) == 0) ++ d = *src++; ++ if (d & 0x80) ++ dd = fgx; ++ else ++ dd = transparent ? ++ (*(u32 *)decor_src & 0xffffff) : bgx; ++ ++ d <<= 1; ++ decor_src += 3; ++#ifdef __LITTLE_ENDIAN ++ fb_writew(dd & 0xffff, dst); ++ dst += 2; ++ fb_writeb((dd >> 16), dst); ++#else ++ fb_writew(dd >> 8, dst); ++ dst += 2; ++ fb_writeb(dd & 0xff, dst); ++#endif ++ dst++; ++ } ++ break; ++ case 16: ++ for (x = 0; x < width; x += 2) { ++ if ((x & 7) == 0) ++ d = *src++; ++ ++ parse_pixel(0, 2, u16); ++ parse_pixel(1, 2, u16); ++#ifdef __LITTLE_ENDIAN ++ dd = dd2[0] | (dd2[1] << 16); ++#else ++ dd = dd2[1] | (dd2[0] << 16); ++#endif ++ d <<= 2; ++ fb_writel(dd, dst); ++ dst += 4; ++ } ++ break; ++ ++ case 8: ++ for (x = 0; x < width; x += 4) { ++ if ((x & 7) == 0) ++ d = *src++; ++ ++ parse_pixel(0, 1, u8); ++ parse_pixel(1, 1, u8); ++ parse_pixel(2, 1, u8); ++ parse_pixel(3, 1, u8); ++ ++#ifdef __LITTLE_ENDIAN ++ dd = dd2[0] | (dd2[1] << 8) | (dd2[2] << 16) | (dd2[3] << 24); ++#else ++ dd = dd2[3] | (dd2[2] << 8) | (dd2[1] << 16) | (dd2[0] << 24); ++#endif ++ d <<= 4; ++ fb_writel(dd, dst); ++ dst += 4; ++ } ++ } ++ ++ dst += info->fix.line_length - width * bytespp; ++ decor_src += (info->var.xres - width) * bytespp; ++ } ++} ++ ++#define cc2cx(a) \ ++ ((info->fix.visual == FB_VISUAL_TRUECOLOR || \ ++ info->fix.visual == FB_VISUAL_DIRECTCOLOR) ? \ ++ ((u32*)info->pseudo_palette)[a] : a) ++ ++void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, ++ const unsigned short *s, int count, int yy, int xx) ++{ ++ unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; ++ struct fbcon_ops *ops = info->fbcon_par; ++ int fg_color, bg_color, transparent; ++ u8 *src; ++ u32 bgx, fgx; ++ u16 c = scr_readw(s); ++ ++ fg_color = get_color(vc, info, c, 1); ++ bg_color = get_color(vc, info, c, 0); ++ ++ /* Don't paint the background image if console is blanked */ ++ transparent = ops->blank_state ? 0 : ++ (vc->vc_decor.bg_color == bg_color); ++ ++ xx = xx * vc->vc_font.width + vc->vc_decor.tx; ++ yy = yy * vc->vc_font.height + vc->vc_decor.ty; ++ ++ fgx = cc2cx(fg_color); ++ bgx = cc2cx(bg_color); ++ ++ while (count--) { ++ c = scr_readw(s++); ++ src = vc->vc_font.data + (c & charmask) * vc->vc_font.height * ++ ((vc->vc_font.width + 7) >> 3); ++ ++ fbcon_decor_renderc(info, yy, xx, vc->vc_font.height, ++ vc->vc_font.width, src, fgx, bgx, transparent); ++ xx += vc->vc_font.width; ++ } ++} ++ ++void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor) ++{ ++ int i; ++ unsigned int dsize, s_pitch; ++ struct fbcon_ops *ops = info->fbcon_par; ++ struct vc_data* vc; ++ u8 *src; ++ ++ /* we really don't need any cursors while the console is blanked */ ++ if (info->state != FBINFO_STATE_RUNNING || ops->blank_state) ++ return; ++ ++ vc = vc_cons[ops->currcon].d; ++ ++ src = kmalloc(64 + sizeof(struct fb_image), GFP_ATOMIC); ++ if (!src) ++ return; ++ ++ s_pitch = (cursor->image.width + 7) >> 3; ++ dsize = s_pitch * cursor->image.height; ++ if (cursor->enable) { ++ switch (cursor->rop) { ++ case ROP_XOR: ++ for (i = 0; i < dsize; i++) ++ src[i] = cursor->image.data[i] ^ cursor->mask[i]; ++ break; ++ case ROP_COPY: ++ default: ++ for (i = 0; i < dsize; i++) ++ src[i] = cursor->image.data[i] & cursor->mask[i]; ++ break; ++ } ++ } else ++ memcpy(src, cursor->image.data, dsize); ++ ++ fbcon_decor_renderc(info, ++ cursor->image.dy + vc->vc_decor.ty, ++ cursor->image.dx + vc->vc_decor.tx, ++ cursor->image.height, ++ cursor->image.width, ++ (u8*)src, ++ cc2cx(cursor->image.fg_color), ++ cc2cx(cursor->image.bg_color), ++ cursor->image.bg_color == vc->vc_decor.bg_color); ++ ++ kfree(src); ++} ++ ++static void decorset(u8 *dst, int height, int width, int dstbytes, ++ u32 bgx, int bpp) ++{ ++ int i; ++ ++ if (bpp == 8) ++ bgx |= bgx << 8; ++ if (bpp == 16 || bpp == 8) ++ bgx |= bgx << 16; ++ ++ while (height-- > 0) { ++ u8 *p = dst; ++ ++ switch (bpp) { ++ ++ case 32: ++ for (i=0; i < width; i++) { ++ fb_writel(bgx, p); p += 4; ++ } ++ break; ++ case 24: ++ for (i=0; i < width; i++) { ++#ifdef __LITTLE_ENDIAN ++ fb_writew((bgx & 0xffff),(u16*)p); p += 2; ++ fb_writeb((bgx >> 16),p++); ++#else ++ fb_writew((bgx >> 8),(u16*)p); p += 2; ++ fb_writeb((bgx & 0xff),p++); ++#endif ++ } ++ case 16: ++ for (i=0; i < width/4; i++) { ++ fb_writel(bgx,p); p += 4; ++ fb_writel(bgx,p); p += 4; ++ } ++ if (width & 2) { ++ fb_writel(bgx,p); p += 4; ++ } ++ if (width & 1) ++ fb_writew(bgx,(u16*)p); ++ break; ++ case 8: ++ for (i=0; i < width/4; i++) { ++ fb_writel(bgx,p); p += 4; ++ } ++ ++ if (width & 2) { ++ fb_writew(bgx,p); p += 2; ++ } ++ if (width & 1) ++ fb_writeb(bgx,(u8*)p); ++ break; ++ ++ } ++ dst += dstbytes; ++ } ++} ++ ++void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes, ++ int srclinebytes, int bpp) ++{ ++ int i; ++ ++ while (height-- > 0) { ++ u32 *p = (u32 *)dst; ++ u32 *q = (u32 *)src; ++ ++ switch (bpp) { ++ ++ case 32: ++ for (i=0; i < width; i++) ++ fb_writel(*q++, p++); ++ break; ++ case 24: ++ for (i=0; i < (width*3/4); i++) ++ fb_writel(*q++, p++); ++ if ((width*3) % 4) { ++ if (width & 2) { ++ fb_writeb(*(u8*)q, (u8*)p); ++ } else if (width & 1) { ++ fb_writew(*(u16*)q, (u16*)p); ++ fb_writeb(*(u8*)((u16*)q+1),(u8*)((u16*)p+2)); ++ } ++ } ++ break; ++ case 16: ++ for (i=0; i < width/4; i++) { ++ fb_writel(*q++, p++); ++ fb_writel(*q++, p++); ++ } ++ if (width & 2) ++ fb_writel(*q++, p++); ++ if (width & 1) ++ fb_writew(*(u16*)q, (u16*)p); ++ break; ++ case 8: ++ for (i=0; i < width/4; i++) ++ fb_writel(*q++, p++); ++ ++ if (width & 2) { ++ fb_writew(*(u16*)q, (u16*)p); ++ q = (u32*) ((u16*)q + 1); ++ p = (u32*) ((u16*)p + 1); ++ } ++ if (width & 1) ++ fb_writeb(*(u8*)q, (u8*)p); ++ break; ++ } ++ ++ dst += linebytes; ++ src += srclinebytes; ++ } ++} ++ ++static void decorfill(struct fb_info *info, int sy, int sx, int height, ++ int width) ++{ ++ int bytespp = ((info->var.bits_per_pixel + 7) >> 3); ++ int d = sy * info->fix.line_length + sx * bytespp; ++ int ds = (sy * info->var.xres + sx) * bytespp; ++ ++ fbcon_decor_copy((u8 *)(info->screen_base + d), (u8 *)(info->bgdecor.data + ds), ++ height, width, info->fix.line_length, info->var.xres * bytespp, ++ info->var.bits_per_pixel); ++} ++ ++void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, ++ int height, int width) ++{ ++ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; ++ struct fbcon_ops *ops = info->fbcon_par; ++ u8 *dst; ++ int transparent, bg_color = attr_bgcol_ec(bgshift, vc, info); ++ ++ transparent = (vc->vc_decor.bg_color == bg_color); ++ sy = sy * vc->vc_font.height + vc->vc_decor.ty; ++ sx = sx * vc->vc_font.width + vc->vc_decor.tx; ++ height *= vc->vc_font.height; ++ width *= vc->vc_font.width; ++ ++ /* Don't paint the background image if console is blanked */ ++ if (transparent && !ops->blank_state) { ++ decorfill(info, sy, sx, height, width); ++ } else { ++ dst = (u8 *)(info->screen_base + sy * info->fix.line_length + ++ sx * ((info->var.bits_per_pixel + 7) >> 3)); ++ decorset(dst, height, width, info->fix.line_length, cc2cx(bg_color), ++ info->var.bits_per_pixel); ++ } ++} ++ ++void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, ++ int bottom_only) ++{ ++ unsigned int tw = vc->vc_cols*vc->vc_font.width; ++ unsigned int th = vc->vc_rows*vc->vc_font.height; ++ ++ if (!bottom_only) { ++ /* top margin */ ++ decorfill(info, 0, 0, vc->vc_decor.ty, info->var.xres); ++ /* left margin */ ++ decorfill(info, vc->vc_decor.ty, 0, th, vc->vc_decor.tx); ++ /* right margin */ ++ decorfill(info, vc->vc_decor.ty, vc->vc_decor.tx + tw, th, ++ info->var.xres - vc->vc_decor.tx - tw); ++ } ++ decorfill(info, vc->vc_decor.ty + th, 0, ++ info->var.yres - vc->vc_decor.ty - th, info->var.xres); ++} ++ ++void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, ++ int sx, int dx, int width) ++{ ++ u16 *d = (u16 *) (vc->vc_origin + vc->vc_size_row * y + dx * 2); ++ u16 *s = d + (dx - sx); ++ u16 *start = d; ++ u16 *ls = d; ++ u16 *le = d + width; ++ u16 c; ++ int x = dx; ++ u16 attr = 1; ++ ++ do { ++ c = scr_readw(d); ++ if (attr != (c & 0xff00)) { ++ attr = c & 0xff00; ++ if (d > start) { ++ fbcon_decor_putcs(vc, info, start, d - start, y, x); ++ x += d - start; ++ start = d; ++ } ++ } ++ if (s >= ls && s < le && c == scr_readw(s)) { ++ if (d > start) { ++ fbcon_decor_putcs(vc, info, start, d - start, y, x); ++ x += d - start + 1; ++ start = d + 1; ++ } else { ++ x++; ++ start++; ++ } ++ } ++ s++; ++ d++; ++ } while (d < le); ++ if (d > start) ++ fbcon_decor_putcs(vc, info, start, d - start, y, x); ++} ++ ++void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank) ++{ ++ if (blank) { ++ decorset((u8 *)info->screen_base, info->var.yres, info->var.xres, ++ info->fix.line_length, 0, info->var.bits_per_pixel); ++ } else { ++ update_screen(vc); ++ fbcon_decor_clear_margins(vc, info, 0); ++ } ++} ++ +diff -Naurp -x .git /tmp/linux/drivers/video/console/fbcon.c ./drivers/video/console/fbcon.c +--- /tmp/linux/drivers/video/console/fbcon.c 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/video/console/fbcon.c 2008-03-25 23:04:10.000000000 +0100 +@@ -90,6 +90,7 @@ + #endif + + #include "fbcon.h" ++#include "fbcondecor.h" + + #ifdef FBCONDEBUG + # define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args) +@@ -105,7 +106,7 @@ enum { + + static struct display fb_display[MAX_NR_CONSOLES]; + +-static signed char con2fb_map[MAX_NR_CONSOLES]; ++signed char con2fb_map[MAX_NR_CONSOLES]; + static signed char con2fb_map_boot[MAX_NR_CONSOLES]; + #ifndef MODULE + static int logo_height; +@@ -315,7 +316,7 @@ static inline int fbcon_is_inactive(stru + vc->vc_mode != KD_TEXT || ops->graphics); + } + +-static inline int get_color(struct vc_data *vc, struct fb_info *info, ++inline int get_color(struct vc_data *vc, struct fb_info *info, + u16 c, int is_fg) + { + int depth = fb_get_color_depth(&info->var, &info->fix); +@@ -420,6 +421,7 @@ static void fb_flashcursor(struct work_s + CM_ERASE : CM_DRAW; + ops->cursor(vc, info, mode, softback_lines, get_color(vc, info, c, 1), + get_color(vc, info, c, 0)); ++ + release_console_sem(); + } + +@@ -590,6 +592,8 @@ static int fbcon_takeover(int show_logo) + info_idx = -1; + } + ++ fbcon_decor_init(); ++ + return err; + } + +@@ -1031,6 +1035,12 @@ static const char *fbcon_startup(void) + rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + cols /= vc->vc_font.width; + rows /= vc->vc_font.height; ++ ++ if (fbcon_decor_active(info, vc)) { ++ cols = vc->vc_decor.twidth / vc->vc_font.width; ++ rows = vc->vc_decor.theight / vc->vc_font.height; ++ } ++ + vc_resize(vc, cols, rows); + + DPRINTK("mode: %s\n", info->fix.id); +@@ -1114,7 +1124,7 @@ static void fbcon_init(struct vc_data *v + cap = info->flags; + + if (vc != svc || logo_shown == FBCON_LOGO_DONTSHOW || +- (info->fix.type == FB_TYPE_TEXT)) ++ (info->fix.type == FB_TYPE_TEXT) || fbcon_decor_active(info, vc)) + logo = 0; + + if (var_to_display(p, &info->var, info)) +@@ -1313,6 +1323,11 @@ static void fbcon_clear(struct vc_data * + if (!height || !width) + return; + ++ if (fbcon_decor_active(info, vc)) { ++ fbcon_decor_clear(vc, info, sy, sx, height, width); ++ return; ++ } ++ + /* Split blits that cross physical y_wrap boundary */ + + y_break = p->vrows - p->yscroll; +@@ -1332,10 +1347,15 @@ static void fbcon_putcs(struct vc_data * + struct display *p = &fb_display[vc->vc_num]; + struct fbcon_ops *ops = info->fbcon_par; + +- if (!fbcon_is_inactive(vc, info)) +- ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, +- get_color(vc, info, scr_readw(s), 1), +- get_color(vc, info, scr_readw(s), 0)); ++ if (!fbcon_is_inactive(vc, info)) { ++ ++ if (fbcon_decor_active(info, vc)) ++ fbcon_decor_putcs(vc, info, s, count, ypos, xpos); ++ else ++ ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, ++ get_color(vc, info, scr_readw(s), 1), ++ get_color(vc, info, scr_readw(s), 0)); ++ } + } + + static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) +@@ -1351,8 +1371,13 @@ static void fbcon_clear_margins(struct v + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + +- if (!fbcon_is_inactive(vc, info)) +- ops->clear_margins(vc, info, bottom_only); ++ if (!fbcon_is_inactive(vc, info)) { ++ if (fbcon_decor_active(info, vc)) { ++ fbcon_decor_clear_margins(vc, info, bottom_only); ++ } else { ++ ops->clear_margins(vc, info, bottom_only); ++ } ++ } + } + + static void fbcon_cursor(struct vc_data *vc, int mode) +@@ -1872,7 +1897,7 @@ static int fbcon_scroll(struct vc_data * + count = vc->vc_rows; + if (softback_top) + fbcon_softback_note(vc, t, count); +- if (logo_shown >= 0) ++ if (logo_shown >= 0 || fbcon_decor_active(info, vc)) + goto redraw_up; + switch (p->scrollmode) { + case SCROLL_MOVE: +@@ -1965,6 +1990,8 @@ static int fbcon_scroll(struct vc_data * + count = vc->vc_rows; + if (logo_shown >= 0) + goto redraw_down; ++ if (fbcon_decor_active(info, vc)) ++ goto redraw_down; + switch (p->scrollmode) { + case SCROLL_MOVE: + fbcon_redraw_blit(vc, info, p, b - 1, b - t - count, +@@ -2113,6 +2140,13 @@ static void fbcon_bmove_rec(struct vc_da + } + return; + } ++ ++ if (fbcon_decor_active(info, vc) && sy == dy && height == 1) { ++ /* must use slower redraw bmove to keep background pic intact */ ++ fbcon_decor_bmove_redraw(vc, info, sy, sx, dx, width); ++ return; ++ } ++ + ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx, + height, width); + } +@@ -2183,8 +2217,8 @@ static int fbcon_resize(struct vc_data * + var.yres = virt_h * virt_fh; + x_diff = info->var.xres - var.xres; + y_diff = info->var.yres - var.yres; +- if (x_diff < 0 || x_diff > virt_fw || +- y_diff < 0 || y_diff > virt_fh) { ++ if ((x_diff < 0 || x_diff > virt_fw || ++ y_diff < 0 || y_diff > virt_fh) && !vc->vc_decor.state) { + const struct fb_videomode *mode; + + DPRINTK("attempting resize %ix%i\n", var.xres, var.yres); +@@ -2220,6 +2254,19 @@ static int fbcon_switch(struct vc_data * + + info = registered_fb[con2fb_map[vc->vc_num]]; + ops = info->fbcon_par; ++ prev_console = ops->currcon; ++ if (prev_console != -1) ++ old_info = registered_fb[con2fb_map[prev_console]]; ++ ++ if (!fbcon_decor_active_vc(vc) && info->fix.visual == FB_VISUAL_DIRECTCOLOR) { ++ struct vc_data *vc_curr = vc_cons[prev_console].d; ++ if (vc_curr && fbcon_decor_active_vc(vc_curr)) { ++ /* Clear the screen to avoid displaying funky colors during ++ * palette updates. */ ++ memset((u8*)info->screen_base + info->fix.line_length * info->var.yoffset, ++ 0, info->var.yres * info->fix.line_length); ++ } ++ } + + if (softback_top) { + if (softback_lines) +@@ -2238,9 +2285,6 @@ static int fbcon_switch(struct vc_data * + logo_shown = FBCON_LOGO_CANSHOW; + } + +- prev_console = ops->currcon; +- if (prev_console != -1) +- old_info = registered_fb[con2fb_map[prev_console]]; + /* + * FIXME: If we have multiple fbdev's loaded, we need to + * update all info->currcon. Perhaps, we can place this +@@ -2280,6 +2324,18 @@ static int fbcon_switch(struct vc_data * + fbcon_del_cursor_timer(old_info); + } + ++ if (fbcon_decor_active_vc(vc)) { ++ struct vc_data *vc_curr = vc_cons[prev_console].d; ++ ++ if (!vc_curr->vc_decor.theme || ++ strcmp(vc->vc_decor.theme, vc_curr->vc_decor.theme) || ++ (fbcon_decor_active_nores(info, vc_curr) && ++ !fbcon_decor_active(info, vc_curr))) { ++ if (fbcon_decor_call_helper("modechange", vc->vc_num)) ++ fbcon_decor_disable(vc, 0); ++ } ++ } ++ + if (fbcon_is_inactive(vc, info) || + ops->blank_state != FB_BLANK_UNBLANK) + fbcon_del_cursor_timer(info); +@@ -2394,8 +2450,12 @@ static int fbcon_blank(struct vc_data *v + fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW); + ops->cursor_flash = (!blank); + +- if (fb_blank(info, blank)) +- fbcon_generic_blank(vc, info, blank); ++ if (fb_blank(info, blank)) { ++ if (fbcon_decor_active(info, vc)) ++ fbcon_decor_blank(vc, info, blank); ++ else ++ fbcon_generic_blank(vc, info, blank); ++ } + } + + if (!blank) +@@ -2546,13 +2606,22 @@ static int fbcon_do_set_font(struct vc_d + } + + if (resize) { ++ /* reset wrap/pan */ + int cols, rows; + + cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres); + rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); ++ ++ info->var.xoffset = info->var.yoffset = p->yscroll = 0; ++ if (fbcon_decor_active(info, vc)) { ++ cols = vc->vc_decor.twidth; ++ rows = vc->vc_decor.theight; ++ } + cols /= w; + rows /= h; ++ + vc_resize(vc, cols, rows); ++ + if (CON_IS_VISIBLE(vc) && softback_buf) + fbcon_update_softback(vc); + } else if (CON_IS_VISIBLE(vc) +@@ -2681,7 +2750,7 @@ static int fbcon_set_palette(struct vc_d + int i, j, k, depth; + u8 val; + +- if (fbcon_is_inactive(vc, info)) ++ if (fbcon_is_inactive(vc, info) || vc->vc_num != fg_console) + return -EINVAL; + + if (!CON_IS_VISIBLE(vc)) +@@ -2707,7 +2776,49 @@ static int fbcon_set_palette(struct vc_d + } else + fb_copy_cmap(fb_default_cmap(1 << depth), &palette_cmap); + +- return fb_set_cmap(&palette_cmap, info); ++ if (fbcon_decor_active(info, vc_cons[fg_console].d) && ++ info->fix.visual == FB_VISUAL_DIRECTCOLOR) { ++ ++ u16 *red, *green, *blue; ++ int minlen = min(min(info->var.red.length, info->var.green.length), ++ info->var.blue.length); ++ int h; ++ ++ struct fb_cmap cmap = { ++ .start = 0, ++ .len = (1 << minlen), ++ .red = NULL, ++ .green = NULL, ++ .blue = NULL, ++ .transp = NULL ++ }; ++ ++ red = kmalloc(256 * sizeof(u16) * 3, GFP_KERNEL); ++ ++ if (!red) ++ goto out; ++ ++ green = red + 256; ++ blue = green + 256; ++ cmap.red = red; ++ cmap.green = green; ++ cmap.blue = blue; ++ ++ for (i = 0; i < cmap.len; i++) { ++ red[i] = green[i] = blue[i] = (0xffff * i)/(cmap.len-1); ++ } ++ ++ h = fb_set_cmap(&cmap, info); ++ fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d); ++ kfree(red); ++ ++ return h; ++ ++ } else if (fbcon_decor_active(info, vc_cons[fg_console].d) && ++ info->var.bits_per_pixel == 8 && info->bgdecor.cmap.red != NULL) ++ fb_set_cmap(&info->bgdecor.cmap, info); ++ ++out: return fb_set_cmap(&palette_cmap, info); + } + + static u16 *fbcon_screen_pos(struct vc_data *vc, int offset) +@@ -2933,7 +3044,14 @@ static void fbcon_modechanged(struct fb_ + rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + cols /= vc->vc_font.width; + rows /= vc->vc_font.height; +- vc_resize(vc, cols, rows); ++ ++ if (!fbcon_decor_active_nores(info, vc)) { ++ vc_resize(vc, cols, rows); ++ } else { ++ if (fbcon_decor_call_helper("modechange", vc->vc_num)) ++ fbcon_decor_disable(vc, 0); ++ } ++ + updatescrollmode(p, info, vc); + scrollback_max = 0; + scrollback_current = 0; +@@ -3561,6 +3679,7 @@ static void fbcon_exit(void) + } + } + ++ fbcon_decor_exit(); + fbcon_has_exited = 1; + } + +diff -Naurp -x .git /tmp/linux/drivers/video/console/fbcondecor.c ./drivers/video/console/fbcondecor.c +--- /tmp/linux/drivers/video/console/fbcondecor.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./drivers/video/console/fbcondecor.c 2008-03-25 23:04:10.000000000 +0100 +@@ -0,0 +1,420 @@ ++/* ++ * linux/drivers/video/console/fbcondecor.c -- Framebuffer console decorations ++ * ++ * Copyright (C) 2004 Michal Januszewski ++ * ++ * Code based upon "Bootsplash" (C) 2001-2003 ++ * Volker Poplawski , ++ * Stefan Reinauer , ++ * Steffen Winterfeldt , ++ * Michael Schroeder , ++ * Ken Wimer . ++ * ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file COPYING in the main directory of this archive for ++ * more details. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "fbcon.h" ++#include "fbcondecor.h" ++ ++extern signed char con2fb_map[]; ++static int fbcon_decor_enable(struct vc_data *vc); ++char fbcon_decor_path[KMOD_PATH_LEN] = "/sbin/fbcondecor_helper"; ++static int initialized = 0; ++ ++int fbcon_decor_call_helper(char* cmd, unsigned short vc) ++{ ++ char *envp[] = { ++ "HOME=/", ++ "PATH=/sbin:/bin", ++ NULL ++ }; ++ ++ char tfb[5]; ++ char tcons[5]; ++ unsigned char fb = (int) con2fb_map[vc]; ++ ++ char *argv[] = { ++ fbcon_decor_path, ++ "2", ++ cmd, ++ tcons, ++ tfb, ++ vc_cons[vc].d->vc_decor.theme, ++ NULL ++ }; ++ ++ snprintf(tfb,5,"%d",fb); ++ snprintf(tcons,5,"%d",vc); ++ ++ return call_usermodehelper(fbcon_decor_path, argv, envp, 1); ++} ++ ++/* Disables fbcondecor on a virtual console; called with console sem held. */ ++int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw) ++{ ++ struct fb_info* info; ++ ++ if (!vc->vc_decor.state) ++ return -EINVAL; ++ ++ info = registered_fb[(int) con2fb_map[vc->vc_num]]; ++ ++ if (info == NULL) ++ return -EINVAL; ++ ++ vc->vc_decor.state = 0; ++ vc_resize(vc, info->var.xres / vc->vc_font.width, ++ info->var.yres / vc->vc_font.height); ++ ++ if (fg_console == vc->vc_num && redraw) { ++ redraw_screen(vc, 0); ++ update_region(vc, vc->vc_origin + ++ vc->vc_size_row * vc->vc_top, ++ vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2); ++ } ++ ++ printk(KERN_INFO "fbcondecor: switched decor state to 'off' on console %d\n", ++ vc->vc_num); ++ ++ return 0; ++} ++ ++/* Enables fbcondecor on a virtual console; called with console sem held. */ ++static int fbcon_decor_enable(struct vc_data *vc) ++{ ++ struct fb_info* info; ++ ++ info = registered_fb[(int) con2fb_map[vc->vc_num]]; ++ ++ if (vc->vc_decor.twidth == 0 || vc->vc_decor.theight == 0 || ++ info == NULL || vc->vc_decor.state || (!info->bgdecor.data && ++ vc->vc_num == fg_console)) ++ return -EINVAL; ++ ++ vc->vc_decor.state = 1; ++ vc_resize(vc, vc->vc_decor.twidth / vc->vc_font.width, ++ vc->vc_decor.theight / vc->vc_font.height); ++ ++ if (fg_console == vc->vc_num) { ++ redraw_screen(vc, 0); ++ update_region(vc, vc->vc_origin + ++ vc->vc_size_row * vc->vc_top, ++ vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2); ++ fbcon_decor_clear_margins(vc, info, 0); ++ } ++ ++ printk(KERN_INFO "fbcondecor: switched decor state to 'on' on console %d\n", ++ vc->vc_num); ++ ++ return 0; ++} ++ ++static inline int fbcon_decor_ioctl_dosetstate(struct vc_data *vc, unsigned int __user* state, unsigned char origin) ++{ ++ int tmp, ret; ++ ++ if (get_user(tmp, state)) ++ return -EFAULT; ++ ++ if (origin == FBCON_DECOR_IO_ORIG_USER) ++ acquire_console_sem(); ++ if (!tmp) ++ ret = fbcon_decor_disable(vc, 1); ++ else ++ ret = fbcon_decor_enable(vc); ++ if (origin == FBCON_DECOR_IO_ORIG_USER) ++ release_console_sem(); ++ ++ return ret; ++} ++ ++static inline int fbcon_decor_ioctl_dogetstate(struct vc_data *vc, unsigned int __user *state) ++{ ++ return put_user(vc->vc_decor.state, (unsigned int __user*) state); ++} ++ ++static int fbcon_decor_ioctl_dosetcfg(struct vc_data *vc, struct vc_decor __user *arg, unsigned char origin) ++{ ++ struct vc_decor cfg; ++ struct fb_info *info; ++ int len; ++ char *tmp; ++ ++ info = registered_fb[(int) con2fb_map[vc->vc_num]]; ++ ++ if (copy_from_user(&cfg, arg, sizeof(struct vc_decor))) ++ return -EFAULT; ++ if (info == NULL || !cfg.twidth || !cfg.theight || ++ cfg.tx + cfg.twidth > info->var.xres || ++ cfg.ty + cfg.theight > info->var.yres) ++ return -EINVAL; ++ ++ len = strlen_user(cfg.theme); ++ if (!len || len > FBCON_DECOR_THEME_LEN) ++ return -EINVAL; ++ tmp = kmalloc(len, GFP_KERNEL); ++ if (!tmp) ++ return -ENOMEM; ++ if (copy_from_user(tmp, (void __user *)cfg.theme, len)) ++ return -EFAULT; ++ cfg.theme = tmp; ++ cfg.state = 0; ++ ++ /* If this ioctl is a response to a request from kernel, the console sem ++ * is already held; we also don't need to disable decor because either the ++ * new config and background picture will be successfully loaded, and the ++ * decor will stay on, or in case of a failure it'll be turned off in fbcon. */ ++ if (origin == FBCON_DECOR_IO_ORIG_USER) { ++ acquire_console_sem(); ++ if (vc->vc_decor.state) ++ fbcon_decor_disable(vc, 1); ++ } ++ ++ if (vc->vc_decor.theme) ++ kfree(vc->vc_decor.theme); ++ ++ vc->vc_decor = cfg; ++ ++ if (origin == FBCON_DECOR_IO_ORIG_USER) ++ release_console_sem(); ++ ++ printk(KERN_INFO "fbcondecor: console %d using theme '%s'\n", ++ vc->vc_num, vc->vc_decor.theme); ++ return 0; ++} ++ ++static int fbcon_decor_ioctl_dogetcfg(struct vc_data *vc, struct vc_decor __user *arg) ++{ ++ struct vc_decor decor; ++ char __user *tmp; ++ ++ if (get_user(tmp, &arg->theme)) ++ return -EFAULT; ++ ++ decor = vc->vc_decor; ++ decor.theme = tmp; ++ ++ if (vc->vc_decor.theme) { ++ if (copy_to_user(tmp, vc->vc_decor.theme, strlen(vc->vc_decor.theme) + 1)) ++ return -EFAULT; ++ } else ++ if (put_user(0, tmp)) ++ return -EFAULT; ++ ++ if (copy_to_user(arg, &decor, sizeof(struct vc_decor))) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++static int fbcon_decor_ioctl_dosetpic(struct vc_data *vc, struct fb_image __user *arg, unsigned char origin) ++{ ++ struct fb_image img; ++ struct fb_info *info; ++ int len; ++ u8 *tmp; ++ ++ if (vc->vc_num != fg_console) ++ return -EINVAL; ++ ++ info = registered_fb[(int) con2fb_map[vc->vc_num]]; ++ ++ if (info == NULL) ++ return -EINVAL; ++ ++ if (copy_from_user(&img, arg, sizeof(struct fb_image))) ++ return -EFAULT; ++ ++ if (img.width != info->var.xres || img.height != info->var.yres) { ++ printk(KERN_ERR "fbcondecor: picture dimensions mismatch\n"); ++ return -EINVAL; ++ } ++ ++ if (img.depth != info->var.bits_per_pixel) { ++ printk(KERN_ERR "fbcondecor: picture depth mismatch\n"); ++ return -EINVAL; ++ } ++ ++ if (img.depth == 8) { ++ if (!img.cmap.len || !img.cmap.red || !img.cmap.green || ++ !img.cmap.blue) ++ return -EINVAL; ++ ++ tmp = vmalloc(img.cmap.len * 3 * 2); ++ if (!tmp) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmp, (void __user*)img.cmap.red, img.cmap.len * 2) || ++ copy_from_user(tmp + (img.cmap.len << 1), ++ (void __user*)img.cmap.green, (img.cmap.len << 1)) || ++ copy_from_user(tmp + (img.cmap.len << 2), ++ (void __user*)img.cmap.blue, (img.cmap.len << 1))) { ++ vfree(tmp); ++ return -EFAULT; ++ } ++ ++ img.cmap.transp = NULL; ++ img.cmap.red = (u16*)tmp; ++ img.cmap.green = img.cmap.red + img.cmap.len; ++ img.cmap.blue = img.cmap.green + img.cmap.len; ++ } else { ++ img.cmap.red = NULL; ++ } ++ ++ len = ((img.depth + 7) >> 3) * img.width * img.height; ++ tmp = vmalloc(len); ++ ++ if (!tmp) ++ goto out; ++ ++ if (copy_from_user(tmp, (void __user*)img.data, len)) ++ goto out; ++ ++ img.data = tmp; ++ ++ /* If this ioctl is a response to a request from kernel, the console sem ++ * is already held. */ ++ if (origin == FBCON_DECOR_IO_ORIG_USER) ++ acquire_console_sem(); ++ ++ if (info->bgdecor.data) ++ vfree((u8*)info->bgdecor.data); ++ if (info->bgdecor.cmap.red) ++ vfree(info->bgdecor.cmap.red); ++ ++ info->bgdecor = img; ++ ++ if (origin == FBCON_DECOR_IO_ORIG_USER) ++ release_console_sem(); ++ ++ return 0; ++ ++out: if (img.cmap.red) ++ vfree(img.cmap.red); ++ if (tmp) ++ vfree(tmp); ++ return -ENOMEM; ++} ++ ++static int fbcon_decor_ioctl(struct inode * inode, struct file *filp, u_int cmd, ++ u_long arg) ++{ ++ struct fbcon_decor_iowrapper __user *wrapper = (void __user*) arg; ++ struct vc_data *vc = NULL; ++ unsigned short vc_num = 0; ++ unsigned char origin = 0; ++ void __user *data = NULL; ++ ++ if (!access_ok(VERIFY_READ, wrapper, ++ sizeof(struct fbcon_decor_iowrapper))) ++ return -EFAULT; ++ ++ __get_user(vc_num, &wrapper->vc); ++ __get_user(origin, &wrapper->origin); ++ __get_user(data, &wrapper->data); ++ ++ if (!vc_cons_allocated(vc_num)) ++ return -EINVAL; ++ ++ vc = vc_cons[vc_num].d; ++ ++ switch (cmd) { ++ case FBIOCONDECOR_SETPIC: ++ return fbcon_decor_ioctl_dosetpic(vc, (struct fb_image __user*)data, origin); ++ case FBIOCONDECOR_SETCFG: ++ return fbcon_decor_ioctl_dosetcfg(vc, (struct vc_decor*)data, origin); ++ case FBIOCONDECOR_GETCFG: ++ return fbcon_decor_ioctl_dogetcfg(vc, (struct vc_decor*)data); ++ case FBIOCONDECOR_SETSTATE: ++ return fbcon_decor_ioctl_dosetstate(vc, (unsigned int *)data, origin); ++ case FBIOCONDECOR_GETSTATE: ++ return fbcon_decor_ioctl_dogetstate(vc, (unsigned int *)data); ++ default: ++ return -ENOIOCTLCMD; ++ } ++} ++ ++static struct file_operations fbcon_decor_ops = { ++ .owner = THIS_MODULE, ++ .ioctl = fbcon_decor_ioctl ++}; ++ ++static struct miscdevice fbcon_decor_dev = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = "fbcondecor", ++ .fops = &fbcon_decor_ops ++}; ++ ++void fbcon_decor_reset(void) ++{ ++ struct fb_info *info; ++ struct vc_data *vc; ++ int i; ++ ++ vc = vc_cons[0].d; ++ info = registered_fb[0]; ++ ++ for (i = 0; i < num_registered_fb; i++) { ++ registered_fb[i]->bgdecor.data = NULL; ++ registered_fb[i]->bgdecor.cmap.red = NULL; ++ } ++ ++ for (i = 0; i < MAX_NR_CONSOLES && vc_cons[i].d; i++) { ++ vc_cons[i].d->vc_decor.state = vc_cons[i].d->vc_decor.twidth = ++ vc_cons[i].d->vc_decor.theight = 0; ++ vc_cons[i].d->vc_decor.theme = NULL; ++ } ++ ++ return; ++} ++ ++int fbcon_decor_init(void) ++{ ++ int i; ++ ++ fbcon_decor_reset(); ++ ++ if (initialized) ++ return 0; ++ ++ i = misc_register(&fbcon_decor_dev); ++ if (i) { ++ printk(KERN_ERR "fbcondecor: failed to register device\n"); ++ return i; ++ } ++ ++ fbcon_decor_call_helper("init", 0); ++ initialized = 1; ++ return 0; ++} ++ ++int fbcon_decor_exit(void) ++{ ++ fbcon_decor_reset(); ++ return 0; ++} ++ ++EXPORT_SYMBOL(fbcon_decor_path); +diff -Naurp -x .git /tmp/linux/drivers/video/console/fbcondecor.h ./drivers/video/console/fbcondecor.h +--- /tmp/linux/drivers/video/console/fbcondecor.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./drivers/video/console/fbcondecor.h 2008-03-25 23:04:10.000000000 +0100 +@@ -0,0 +1,78 @@ ++/* ++ * linux/drivers/video/console/fbcondecor.h -- Framebuffer Console Decoration headers ++ * ++ * Copyright (C) 2004 Michal Januszewski ++ * ++ */ ++ ++#ifndef __FBCON_DECOR_H ++#define __FBCON_DECOR_H ++ ++#ifndef _LINUX_FB_H ++#include ++#endif ++ ++/* This is needed for vc_cons in fbcmap.c */ ++#include ++ ++struct fb_cursor; ++struct fb_info; ++struct vc_data; ++ ++#ifdef CONFIG_FB_CON_DECOR ++/* fbcondecor.c */ ++int fbcon_decor_init(void); ++int fbcon_decor_exit(void); ++int fbcon_decor_call_helper(char* cmd, unsigned short cons); ++int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw); ++ ++/* cfbcondecor.c */ ++void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx); ++void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor); ++void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width); ++void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only); ++void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank); ++void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width); ++void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes, int srclinesbytes, int bpp); ++void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc); ++ ++/* vt.c */ ++void acquire_console_sem(void); ++void release_console_sem(void); ++void do_unblank_screen(int entering_gfx); ++ ++/* struct vc_data *y */ ++#define fbcon_decor_active_vc(y) (y->vc_decor.state && y->vc_decor.theme) ++ ++/* struct fb_info *x, struct vc_data *y */ ++#define fbcon_decor_active_nores(x,y) (x->bgdecor.data && fbcon_decor_active_vc(y)) ++ ++/* struct fb_info *x, struct vc_data *y */ ++#define fbcon_decor_active(x,y) (fbcon_decor_active_nores(x,y) && \ ++ x->bgdecor.width == x->var.xres && \ ++ x->bgdecor.height == x->var.yres && \ ++ x->bgdecor.depth == x->var.bits_per_pixel) ++ ++ ++#else /* CONFIG_FB_CON_DECOR */ ++ ++static inline void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx) {} ++static inline void fbcon_decor_putc(struct vc_data *vc, struct fb_info *info, int c, int ypos, int xpos) {} ++static inline void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor) {} ++static inline void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) {} ++static inline void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only) {} ++static inline void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank) {} ++static inline void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width) {} ++static inline void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc) {} ++static inline int fbcon_decor_call_helper(char* cmd, unsigned short cons) { return 0; } ++static inline int fbcon_decor_init(void) { return 0; } ++static inline int fbcon_decor_exit(void) { return 0; } ++static inline int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw) { return 0; } ++ ++#define fbcon_decor_active_vc(y) (0) ++#define fbcon_decor_active_nores(x,y) (0) ++#define fbcon_decor_active(x,y) (0) ++ ++#endif /* CONFIG_FB_CON_DECOR */ ++ ++#endif /* __FBCON_DECOR_H */ +diff -Naurp -x .git /tmp/linux/drivers/video/console/Kconfig ./drivers/video/console/Kconfig +--- /tmp/linux/drivers/video/console/Kconfig 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/video/console/Kconfig 2008-03-25 23:04:10.000000000 +0100 +@@ -144,6 +144,19 @@ config FRAMEBUFFER_CONSOLE_ROTATION + such that other users of the framebuffer will remain normally + oriented. + ++config FB_CON_DECOR ++ bool "Support for the Framebuffer Console Decorations" ++ depends on FRAMEBUFFER_CONSOLE=y && !FB_TILEBLITTING ++ default n ++ ---help--- ++ This option enables support for framebuffer console decorations which ++ makes it possible to display images in the background of the system ++ consoles. Note that userspace utilities are necessary in order to take ++ advantage of these features. Refer to Documentation/fb/fbcondecor.txt ++ for more information. ++ ++ If unsure, say N. ++ + config STI_CONSOLE + bool "STI text console" + depends on PARISC +diff -Naurp -x .git /tmp/linux/drivers/video/console/Makefile ./drivers/video/console/Makefile +--- /tmp/linux/drivers/video/console/Makefile 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/video/console/Makefile 2008-03-25 23:04:10.000000000 +0100 +@@ -35,6 +35,7 @@ obj-$(CONFIG_FRAMEBUFFER_CONSOLE) += + fbcon_ccw.o + endif + ++obj-$(CONFIG_FB_CON_DECOR) += fbcondecor.o cfbcondecor.o + obj-$(CONFIG_FB_STI) += sticore.o font.o + + ifeq ($(CONFIG_USB_SISUSBVGA_CON),y) +diff -Naurp -x .git /tmp/linux/drivers/video/fbcmap.c ./drivers/video/fbcmap.c +--- /tmp/linux/drivers/video/fbcmap.c 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/video/fbcmap.c 2008-03-25 23:04:10.000000000 +0100 +@@ -17,6 +17,8 @@ + #include + #include + ++#include "console/fbcondecor.h" ++ + static u16 red2[] __read_mostly = { + 0x0000, 0xaaaa + }; +@@ -234,14 +236,17 @@ int fb_set_cmap(struct fb_cmap *cmap, st + if (transp) + htransp = *transp++; + if (info->fbops->fb_setcolreg(start++, +- hred, hgreen, hblue, ++ hred, hgreen, hblue, + htransp, info)) + break; + } + } +- if (rc == 0) ++ if (rc == 0) { + fb_copy_cmap(cmap, &info->cmap); +- ++ if (fbcon_decor_active(info, vc_cons[fg_console].d) && ++ info->fix.visual == FB_VISUAL_DIRECTCOLOR) ++ fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d); ++ } + return rc; + } + +@@ -249,7 +254,7 @@ int fb_set_user_cmap(struct fb_cmap_user + { + int rc, size = cmap->len * sizeof(u16); + struct fb_cmap umap; +- ++ + if (cmap->start < 0 || (!info->fbops->fb_setcolreg && + !info->fbops->fb_setcmap)) + return -EINVAL; +diff -Naurp -x .git /tmp/linux/drivers/video/Kconfig ./drivers/video/Kconfig +--- /tmp/linux/drivers/video/Kconfig 2008-03-25 07:24:16.000000000 +0100 ++++ ./drivers/video/Kconfig 2008-03-25 23:04:10.000000000 +0100 +@@ -1134,7 +1134,6 @@ config FB_MATROX + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT +- select FB_TILEBLITTING + select FB_MACMODES if PPC_PMAC + ---help--- + Say Y here if you have a Matrox Millennium, Matrox Millennium II, +diff -Naurp -x .git /tmp/linux/include/linux/console_decor.h ./include/linux/console_decor.h +--- /tmp/linux/include/linux/console_decor.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/linux/console_decor.h 2008-03-25 23:04:10.000000000 +0100 +@@ -0,0 +1,13 @@ ++#ifndef _LINUX_CONSOLE_DECOR_H_ ++#define _LINUX_CONSOLE_DECOR_H_ 1 ++ ++/* A structure used by the framebuffer console decorations (drivers/video/console/fbcondecor.c) */ ++struct vc_decor { ++ __u8 bg_color; /* The color that is to be treated as transparent */ ++ __u8 state; /* Current decor state: 0 = off, 1 = on */ ++ __u16 tx, ty; /* Top left corner coordinates of the text field */ ++ __u16 twidth, theight; /* Width and height of the text field */ ++ char* theme; ++}; ++ ++#endif +diff -Naurp -x .git /tmp/linux/include/linux/console_struct.h ./include/linux/console_struct.h +--- /tmp/linux/include/linux/console_struct.h 2008-03-25 07:24:16.000000000 +0100 ++++ ./include/linux/console_struct.h 2008-03-25 23:04:10.000000000 +0100 +@@ -19,6 +19,7 @@ + struct vt_struct; + + #define NPAR 16 ++#include + + struct vc_data { + unsigned short vc_num; /* Console number */ +@@ -106,6 +107,8 @@ struct vc_data { + struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ + unsigned long vc_uni_pagedir; + unsigned long *vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */ ++ ++ struct vc_decor vc_decor; + /* additional information is in vt_kern.h */ + }; + +diff -Naurp -x .git /tmp/linux/include/linux/fb.h ./include/linux/fb.h +--- /tmp/linux/include/linux/fb.h 2008-03-25 07:24:16.000000000 +0100 ++++ ./include/linux/fb.h 2008-03-25 23:04:10.000000000 +0100 +@@ -11,6 +11,13 @@ struct dentry; + #define FB_MAJOR 29 + #define FB_MAX 32 /* sufficient for now */ + ++struct fbcon_decor_iowrapper ++{ ++ unsigned short vc; /* Virtual console */ ++ unsigned char origin; /* Point of origin of the request */ ++ void *data; ++}; ++ + /* ioctls + 0x46 is 'F' */ + #define FBIOGET_VSCREENINFO 0x4600 +@@ -38,7 +45,15 @@ struct dentry; + #define FBIOGET_HWCINFO 0x4616 + #define FBIOPUT_MODEINFO 0x4617 + #define FBIOGET_DISPINFO 0x4618 +- ++#define FBIOCONDECOR_SETCFG _IOWR('F', 0x19, struct fbcon_decor_iowrapper) ++#define FBIOCONDECOR_GETCFG _IOR('F', 0x1A, struct fbcon_decor_iowrapper) ++#define FBIOCONDECOR_SETSTATE _IOWR('F', 0x1B, struct fbcon_decor_iowrapper) ++#define FBIOCONDECOR_GETSTATE _IOR('F', 0x1C, struct fbcon_decor_iowrapper) ++#define FBIOCONDECOR_SETPIC _IOWR('F', 0x1D, struct fbcon_decor_iowrapper) ++ ++#define FBCON_DECOR_THEME_LEN 128 /* Maximum lenght of a theme name */ ++#define FBCON_DECOR_IO_ORIG_KERNEL 0 /* Kernel ioctl origin */ ++#define FBCON_DECOR_IO_ORIG_USER 1 /* User ioctl origin */ + + #define FB_TYPE_PACKED_PIXELS 0 /* Packed Pixels */ + #define FB_TYPE_PLANES 1 /* Non interleaved planes */ +@@ -833,6 +848,9 @@ struct fb_info { + #define FBINFO_STATE_SUSPENDED 1 + u32 state; /* Hardware state i.e suspend */ + void *fbcon_par; /* fbcon use-only private area */ ++ ++ struct fb_image bgdecor; ++ + /* From here on everything is device dependent */ + void *par; + }; +diff -Naurp -x .git /tmp/linux/include/linux/fb.h.rej ./include/linux/fb.h.rej +--- /tmp/linux/include/linux/fb.h.rej 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/linux/fb.h.rej 2006-08-21 02:51:22.000000000 +0200 +@@ -0,0 +1,32 @@ ++*************** ++*** 1,7 **** ++ #ifndef _LINUX_FB_H ++ #define _LINUX_FB_H ++ ++- #include ++ #include ++ ++ /* Definitions of frame buffers */ ++--- 1,6 ---- ++ #ifndef _LINUX_FB_H ++ #define _LINUX_FB_H ++ ++ #include ++ ++ /* Definitions of frame buffers */ ++*************** ++*** 381,386 **** ++ #include ++ #include ++ #include ++ #include ++ ++ struct vm_area_struct; ++--- 380,386 ---- ++ #include ++ #include ++ #include +++ #include ++ #include ++ ++ struct vm_area_struct; +diff -Naurp -x .git /tmp/linux/kernel/sysctl.c ./kernel/sysctl.c +--- /tmp/linux/kernel/sysctl.c 2008-03-25 07:24:16.000000000 +0100 ++++ ./kernel/sysctl.c 2008-03-25 23:04:10.000000000 +0100 +@@ -107,6 +107,9 @@ static int ngroups_max = NGROUPS_MAX; + #ifdef CONFIG_KMOD + extern char modprobe_path[]; + #endif ++#ifdef CONFIG_FB_CON_DECOR ++extern char fbcon_decor_path[]; ++#endif + #ifdef CONFIG_CHR_DEV_SG + extern int sg_big_buff; + #endif +@@ -820,6 +823,18 @@ static struct ctl_table kern_table[] = { + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, ++#ifdef CONFIG_FB_CON_DECOR ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "fbcondecor", ++ .data = &fbcon_decor_path, ++ .maxlen = KMOD_PATH_LEN, ++ .mode = 0644, ++ .proc_handler = &proc_dostring, ++ .strategy = &sysctl_string, ++ }, ++#endif ++ + /* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt diff --git a/kernel-forcedeth-WON.patch b/kernel-forcedeth-WON.patch index 308471d5..c5bb8c1b 100644 --- a/kernel-forcedeth-WON.patch +++ b/kernel-forcedeth-WON.patch @@ -1,15 +1,15 @@ --- linux-2.6.19/drivers/net/forcedeth.c.orig 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.19/drivers/net/forcedeth.c 2006-11-25 14:19:31.000000000 +0100 -@@ -162,7 +162,8 @@ +@@ -166,7 +166,8 @@ * Hardware access: */ --#define DEV_NEED_TIMERIRQ 0x0001 /* set the timer irq flag in the irq mask */ -+#define DEV_NEED_TIMERIRQ_ORIG 0x0001 /* set the timer irq flag in the irq mask */ -+#define DEV_NEED_TIMERIRQ 0x0000 /* work-around for Wake-On-Lan functionality */ - #define DEV_NEED_LINKTIMER 0x0002 /* poll link settings. Relies on the timer irq */ - #define DEV_HAS_LARGEDESC 0x0004 /* device supports jumbo frames and needs packet format 2 */ - #define DEV_HAS_HIGH_DMA 0x0008 /* device supports 64bit dma */ +-#define DEV_NEED_TIMERIRQ 0x000001 /* set the timer irq flag in the irq mask */ ++#define DEV_NEED_TIMERIRQ_ORIG 0x000001 /* set the timer irq flag in the irq mask */ ++#define DEV_NEED_TIMERIRQ 0x000000 /* work-around for Wake-On-Lan functionality */ + #define DEV_NEED_LINKTIMER 0x000002 /* poll link settings. Relies on the timer irq */ + #define DEV_HAS_LARGEDESC 0x000004 /* device supports jumbo frames and needs packet format 2 */ + #define DEV_HAS_HIGH_DMA 0x000008 /* device supports 64bit dma */ @@ -4342,7 +4343,7 @@ np->msi_flags |= 0x0001; } diff --git a/kernel-grsec-caps.patch b/kernel-grsec-caps.patch new file mode 100644 index 00000000..e768ece8 --- /dev/null +++ b/kernel-grsec-caps.patch @@ -0,0 +1,12 @@ +--- e/grsecurity/gracl_cap.c~ 2008-05-18 23:53:55.000000000 +0200 ++++ e/grsecurity/gracl_cap.c 2008-05-18 23:55:05.591733291 +0200 +@@ -39,7 +39,8 @@ static const char *captab_log[] = { + "CAP_AUDIT_CONTROL", + "CAP_SETFCAP", + "CAP_MAC_OVERRIDE", +- "CAP_MAC_ADMIN" ++ "CAP_MAC_ADMIN", ++ "CAP_CONTEXT" + }; + + EXPORT_SYMBOL(gr_task_is_capable); diff --git a/kernel-grsec-common.patch b/kernel-grsec-common.patch new file mode 100644 index 00000000..5649139c --- /dev/null +++ b/kernel-grsec-common.patch @@ -0,0 +1,39 @@ +--- linux-2.6.20/grsecurity/gracl_shm.c~ 2007-03-24 04:54:27.000000000 +0100 ++++ linux-2.6.20/grsecurity/gracl_shm.c 2007-03-24 04:55:46.332159000 +0100 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + + int + gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, +--- linux-2.6.20/localversion-grsec 2007-03-24 04:54:27.000000000 +0100 ++++ /dev/null 2007-03-24 05:05:10.455414500 +0100 +@@ -1 +0,0 @@ +--grsec +--- linux-2.6.20/grsecurity/grsec_sock.c~ 2007-03-24 05:38:40.000000000 +0100 ++++ linux-2.6.20/grsecurity/grsec_sock.c 2007-03-24 05:47:11.347998750 +0100 +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + #include +=== +=== analogous as capable() +=== +--- a/kernel/capability.c~ 2007-12-11 00:46:02.000000000 +0100 ++++ a/kernel/capability.c 2007-12-11 01:35:00.244481500 +0100 +@@ -322,6 +322,8 @@ + + int capable_nolog(int cap) + { ++ if (vs_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap)) ++ return 0; + if (security_capable(cap) == 0 && gr_is_capable_nolog(cap)) { + current->flags |= PF_SUPERPRIV; + return 1; + diff --git a/kernel-grsec-minimal.patch b/kernel-grsec-minimal.patch new file mode 100644 index 00000000..e681c5ab --- /dev/null +++ b/kernel-grsec-minimal.patch @@ -0,0 +1,1189 @@ +diff -urNp linux-2.6.26.orig/arch/sparc/Makefile linux-2.6.26/arch/sparc/Makefile +--- linux-2.6.26.orig/arch/sparc/Makefile 2008-09-01 11:44:21.000000000 +0200 ++++ linux-2.6.26/arch/sparc/Makefile 2008-09-02 12:17:21.000000000 +0200 +@@ -36,7 +36,7 @@ drivers-$(CONFIG_OPROFILE) += arch/sparc + # Renaming is done to avoid confusing pattern matching rules in 2.5.45 (multy-) + INIT_Y := $(patsubst %/, %/built-in.o, $(init-y)) + CORE_Y := $(core-y) +-CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ++CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ + CORE_Y := $(patsubst %/, %/built-in.o, $(CORE_Y)) + DRIVERS_Y := $(patsubst %/, %/built-in.o, $(drivers-y)) + NET_Y := $(patsubst %/, %/built-in.o, $(net-y)) +diff -urNp linux-2.6.26.orig/drivers/char/keyboard.c linux-2.6.26/drivers/char/keyboard.c +--- linux-2.6.26.orig/drivers/char/keyboard.c 2008-09-01 11:43:37.000000000 +0200 ++++ linux-2.6.26/drivers/char/keyboard.c 2008-09-02 12:17:21.000000000 +0200 +@@ -633,6 +633,16 @@ static void k_spec(struct vc_data *vc, u + kbd->kbdmode == VC_MEDIUMRAW) && + value != KVAL(K_SAK)) + return; /* SAK is allowed even in raw mode */ ++ ++#if defined(CONFIG_GRKERNSEC_PROC) ++ { ++ void *func = fn_handler[value]; ++ if (func == fn_show_state || func == fn_show_ptregs || ++ func == fn_show_mem) ++ return; ++ } ++#endif ++ + fn_handler[value](vc); + } + +diff -urNp linux-2.6.26.orig/drivers/pci/proc.c linux-2.6.26/drivers/pci/proc.c +--- linux-2.6.26.orig/drivers/pci/proc.c 2008-09-01 11:43:47.000000000 +0200 ++++ linux-2.6.26/drivers/pci/proc.c 2008-09-02 12:17:21.000000000 +0200 +@@ -472,7 +472,16 @@ static const struct file_operations proc + static int __init pci_proc_init(void) + { + struct pci_dev *dev = NULL; ++ ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_bus_pci_dir = proc_mkdir_mode("bus/pci", S_IRUSR | S_IXUSR, NULL); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_bus_pci_dir = proc_mkdir_mode("bus/pci", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); ++#endif ++#else + proc_bus_pci_dir = proc_mkdir("bus/pci", NULL); ++#endif + proc_create("devices", 0, proc_bus_pci_dir, + &proc_bus_pci_dev_operations); + proc_initialized = 1; +diff -urNp linux-2.6.26.orig/fs/Kconfig linux-2.6.26/fs/Kconfig +--- linux-2.6.26.orig/fs/proc/Kconfig 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/fs/proc/Kconfig 2008-09-02 12:17:21.000000000 +0200 +@@ -926,12 +926,12 @@ config PROC_FS + + config PROC_KCORE + bool "/proc/kcore support" if !ARM +- depends on PROC_FS && MMU ++ depends on PROC_FS && MMU && !GRKERNSEC_PROC_ADD + + config PROC_VMCORE + bool "/proc/vmcore support (EXPERIMENTAL)" +- depends on PROC_FS && CRASH_DUMP +- default y ++ depends on PROC_FS && CRASH_DUMP && !GRKERNSEC ++ default n + help + Exports the dump image of crashed kernel in ELF format. + +diff -urNp linux-2.6.26.orig/fs/namei.c linux-2.6.26/fs/namei.c +--- linux-2.6.26.orig/fs/namei.c 2008-09-01 11:43:59.000000000 +0200 ++++ linux-2.6.26/fs/namei.c 2008-09-02 12:17:21.000000000 +0200 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -740,6 +741,13 @@ static inline int do_follow_link(struct + err = security_inode_follow_link(path->dentry, nd); + if (err) + goto loop; ++ ++ if (gr_handle_follow_link(path->dentry->d_parent->d_inode, ++ path->dentry->d_inode, path->dentry)) { ++ err = -EACCES; ++ goto loop; ++ } ++ + current->link_count++; + current->total_link_count++; + nd->depth++; +@@ -1925,6 +1933,12 @@ do_last: + /* + * It already exists. + */ ++ ++ if (gr_handle_fifo(path.dentry, dir, flag, acc_mode)) { ++ error = -EACCES; ++ goto exit_mutex_unlock; ++ } ++ + mutex_unlock(&dir->d_inode->i_mutex); + audit_inode(pathname, path.dentry); + +@@ -2028,6 +2042,13 @@ do_link: + error = security_inode_follow_link(path.dentry, &nd); + if (error) + goto exit_dput; ++ ++ if (gr_handle_follow_link(path.dentry->d_parent->d_inode, path.dentry->d_inode, ++ path.dentry)) { ++ error = -EACCES; ++ goto exit_dput; ++ } ++ + error = __do_follow_link(&path, &nd); + if (error) { + /* Does someone understand code flow here? Or it is only +@@ -2669,6 +2690,13 @@ asmlinkage long sys_linkat(int olddfd, c + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto out_unlock; ++ ++ if (gr_handle_hardlink(old_path.dentry, old_path.dentry->d_inode, ++ old_path.dentry->d_inode->i_mode, to)) { ++ error = -EACCES; ++ goto out_dput; ++ } ++ + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; +diff -urNp linux-2.6.26.orig/fs/proc/array.c linux-2.6.26/fs/proc/array.c +--- linux-2.6.26.orig/fs/proc/array.c 2008-09-01 11:43:59.000000000 +0200 ++++ linux-2.6.26/fs/proc/array.c 2008-09-02 12:17:21.000000000 +0200 +@@ -639,3 +639,10 @@ int proc_pid_statm(struct seq_file *m, s + + return 0; + } ++ ++#ifdef CONFIG_GRKERNSEC_PROC_IPADDR ++int proc_pid_ipaddr(struct task_struct *task, char *buffer) ++{ ++ return sprintf(buffer, "%u.%u.%u.%u\n", NIPQUAD(task->signal->curr_ip)); ++} ++#endif +diff -urNp linux-2.6.26.orig/fs/proc/base.c linux-2.6.26/fs/proc/base.c +--- linux-2.6.26.orig/fs/proc/base.c 2008-09-01 11:43:59.000000000 +0200 ++++ linux-2.6.26/fs/proc/base.c 2008-09-02 12:23:45.000000000 +0200 +@@ -79,6 +79,8 @@ + #include + #include + #include ++#include ++ + #include "internal.h" + + /* NOTE: +@@ -307,9 +312,9 @@ static int proc_pid_auxv(struct task_str + struct mm_struct *mm = get_task_mm(task); + if (mm) { + unsigned int nwords = 0; +- do ++ do { + nwords += 2; +- while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ ++ } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + res = nwords * sizeof(mm->saved_auxv[0]); + if (res > PAGE_SIZE) + res = PAGE_SIZE; +@@ -1412,7 +1417,11 @@ static struct inode *proc_pid_make_inode + inode->i_gid = 0; + if (task_dumpable(task)) { + inode->i_uid = task->euid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + inode->i_gid = task->egid; ++#endif + } + /* procfs is xid tagged */ + inode->i_tag = (tag_t)vx_task_xid(task); +@@ -1430,17 +1439,39 @@ static int pid_getattr(struct vfsmount * + { + struct inode *inode = dentry->d_inode; + struct task_struct *task; ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ struct task_struct *tmp = current; ++#endif ++ + generic_fillattr(inode, stat); + + rcu_read_lock(); + stat->uid = 0; + stat->gid = 0; + task = pid_task(proc_pid(inode), PIDTYPE_PID); +- if (task) { ++ ++ if (task ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ && (!tmp->uid || (tmp->uid == task->uid) ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ || in_group_p(CONFIG_GRKERNSEC_PROC_GID) ++#endif ++ ) ++#endif ++ ) { + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IXUSR)) || ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP)) || ++#endif + task_dumpable(task)) { + stat->uid = task->euid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ stat->gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + stat->gid = task->egid; ++#endif + } + } + rcu_read_unlock(); +@@ -1468,11 +1505,21 @@ static int pid_revalidate(struct dentry + { + struct inode *inode = dentry->d_inode; + struct task_struct *task = get_proc_task(inode); ++ + if (task) { + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IXUSR)) || ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP)) || ++#endif + task_dumpable(task)) { + inode->i_uid = task->euid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + inode->i_gid = task->egid; ++#endif + } else { + inode->i_uid = 0; + inode->i_gid = 0; +@@ -1841,12 +1888,19 @@ static int proc_fd_permission(struct ino + struct nameidata *nd) + { + int rv; ++ struct task_struct *task; + + rv = generic_permission(inode, mask, NULL); +- if (rv == 0) +- return 0; ++ + if (task_pid(current) == proc_pid(inode)) + rv = 0; ++ ++ task = get_proc_task(inode); ++ if (task == NULL) ++ return rv; ++ ++ put_task_struct(task); ++ + return rv; + } + +@@ -2617,7 +2683,14 @@ static struct dentry *proc_pid_instantia + if (!inode) + goto out; + ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ inode->i_mode = S_IFDIR|S_IRUSR|S_IXUSR; ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++ inode->i_mode = S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP; ++#else + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; ++#endif + inode->i_op = &proc_tgid_base_inode_operations; + inode->i_fop = &proc_tgid_base_operations; + inode->i_flags|=S_IMMUTABLE; +@@ -2724,6 +2801,9 @@ int proc_pid_readdir(struct file * filp, + { + unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; + struct task_struct *reaper = get_proc_task_real(filp->f_path.dentry->d_inode); ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ struct task_struct *tmp = current; ++#endif + struct tgid_iter iter; + struct pid_namespace *ns; + +@@ -2742,6 +2822,15 @@ int proc_pid_readdir(struct file * filp, + for (iter = next_tgid(ns, iter); + iter.task; + iter.tgid += 1, iter = next_tgid(ns, iter)) { ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ if (tmp->uid && (iter.task->uid != tmp->uid) ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ && !in_group_p(CONFIG_GRKERNSEC_PROC_GID) ++#endif ++ ) ++#endif ++ continue; ++ + filp->f_pos = iter.tgid + TGID_OFFSET; + if (!vx_proc_task_visible(iter.task)) + continue; +@@ -2815,6 +2906,9 @@ static const struct pid_entry tid_base_s + #ifdef CONFIG_FAULT_INJECTION + REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + #endif ++#ifdef CONFIG_GRKERNSEC_PROC_IPADDR ++ INF("ipaddr", S_IRUSR, pid_ipaddr), ++#endif + }; + + static int proc_tid_base_readdir(struct file * filp, +diff -urNp linux-2.6.26.orig/fs/proc/inode.c linux-2.6.26/fs/proc/inode.c +--- linux-2.6.26.orig/fs/proc/inode.c 2008-09-01 11:43:59.000000000 +0200 ++++ linux-2.6.26/fs/proc/inode.c 2008-09-02 12:17:21.000000000 +0200 +@@ -403,7 +403,11 @@ struct inode *proc_get_inode(struct supe + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + inode->i_gid = de->gid; ++#endif + } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; +diff -urNp linux-2.6.26.orig/fs/proc/internal.h linux-2.6.26/fs/proc/internal.h +--- linux-2.6.26.orig/fs/proc/internal.h 2008-09-01 11:43:59.000000000 +0200 ++++ linux-2.6.26/fs/proc/internal.h 2008-09-02 12:17:21.000000000 +0200 +@@ -58,6 +58,9 @@ extern int proc_pid_statm(struct seq_fil + struct pid *pid, struct task_struct *task); + extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); ++#ifdef CONFIG_GRKERNSEC_PROC_IPADDR ++extern int proc_pid_ipaddr(struct task_struct *task, char *buffer); ++#endif + + extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); + +--- linux-2.6.26.orig/fs/proc/cmdline.c 2008-12-25 00:26:37.000000000 +0100 ++++ linux-2.6.26/fs/proc/cmdline.c 2009-01-02 17:46:34.278247774 +0100 +@@ -23,7 +23,15 @@ + + static int __init proc_cmdline_init(void) + { +- proc_create("cmdline", 0, NULL, &cmdline_proc_fops); ++ int gr_mode = 0; ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ gr_mode = S_IRUSR; ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ gr_mode = S_IRUSR | S_IRGRP; ++#endif ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++ proc_create("cmdline", gr_mode, NULL, &cmdline_proc_fops); ++#endif + return 0; + } + module_init(proc_cmdline_init); +--- linux-2.6.26.orig/fs/proc/devices.c 2008-12-25 00:26:37.000000000 +0100 ++++ linux-2.6.26/fs/proc/devices.c 2009-01-02 17:43:00.758269666 +0100 +@@ -64,7 +64,13 @@ + + static int __init proc_devices_init(void) + { +- proc_create("devices", 0, NULL, &proc_devinfo_operations); ++ int gr_mode = 0; ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ gr_mode = S_IRUSR; ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ gr_mode = S_IRUSR | S_IRGRP; ++#endif ++ proc_create("devices", gr_mode, NULL, &proc_devinfo_operations); + return 0; + } + module_init(proc_devices_init); +--- linux-2.6.26.orig/fs/proc/kcore.c 2008-12-25 00:26:37.000000000 +0100 ++++ linux-2.6.26/fs/proc/kcore.c 2009-01-02 17:45:03.714922801 +0100 +@@ -404,10 +404,12 @@ + + static int __init proc_kcore_init(void) + { ++#if defined(CONFIG_PROC_KCORE) && !defined(CONFIG_GRKERNSEC_PROC_ADD) + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) + proc_root_kcore->size = + (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; ++#endif + return 0; + } + module_init(proc_kcore_init); +diff -urNp linux-2.6.26.orig/fs/proc/root.c linux-2.6.26/fs/proc/root.c +--- linux-2.6.26.orig/fs/proc/root.c 2008-09-01 11:43:59.000000000 +0200 ++++ linux-2.6.26/fs/proc/root.c 2008-09-02 12:17:21.000000000 +0200 +@@ -139,7 +139,15 @@ void __init proc_root_init(void) + #ifdef CONFIG_PROC_DEVICETREE + proc_device_tree_init(); + #endif ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_mkdir_mode("bus", S_IRUSR | S_IXUSR, NULL); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_mkdir_mode("bus", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); ++#endif ++#else + proc_mkdir("bus", NULL); ++#endif + proc_sys_init(); + proc_vx_init(); + } +diff -urNp linux-2.6.26.orig/grsecurity/grsec_disabled.c linux-2.6.26/grsecurity/grsec_disabled.c +--- linux-2.6.26.orig/grsecurity/grsec_disabled.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/grsec_disabled.c 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,6 @@ ++void ++grsecurity_init(void) ++{ ++ return; ++} ++ +diff -urNp linux-2.6.26.orig/grsecurity/grsec_fifo.c linux-2.6.26/grsecurity/grsec_fifo.c +--- linux-2.6.26.orig/grsecurity/grsec_fifo.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/grsec_fifo.c 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,20 @@ ++#include ++#include ++#include ++#include ++#include ++ ++int ++gr_handle_fifo(const struct dentry *dentry, const struct vfsmount *mnt, ++ const struct dentry *dir, const int flag, const int acc_mode) ++{ ++#ifdef CONFIG_GRKERNSEC_FIFO ++ if (grsec_enable_fifo && S_ISFIFO(dentry->d_inode->i_mode) && ++ !(flag & O_EXCL) && (dir->d_inode->i_mode & S_ISVTX) && ++ (dentry->d_inode->i_uid != dir->d_inode->i_uid) && ++ (current->fsuid != dentry->d_inode->i_uid)) { ++ return -EACCES; ++ } ++#endif ++ return 0; ++} +diff -urNp linux-2.6.26.orig/grsecurity/grsec_init.c linux-2.6.26/grsecurity/grsec_init.c +--- linux-2.6.26.orig/grsecurity/grsec_init.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/grsec_init.c 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,29 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int grsec_enable_link; ++int grsec_enable_fifo; ++int grsec_lock; ++ ++void ++grsecurity_init(void) ++{ ++#if !defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_SYSCTL_ON) ++#ifndef CONFIG_GRKERNSEC_SYSCTL ++ grsec_lock = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_LINK ++ grsec_enable_link = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_FIFO ++ grsec_enable_fifo = 1; ++#endif ++#endif ++ ++ return; ++} +diff -urNp linux-2.6.26.orig/grsecurity/grsec_link.c linux-2.6.26/grsecurity/grsec_link.c +--- linux-2.6.26.orig/grsecurity/grsec_link.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/grsec_link.c 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,37 @@ ++#include ++#include ++#include ++#include ++#include ++ ++int ++gr_handle_follow_link(const struct inode *parent, ++ const struct inode *inode, ++ const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++#ifdef CONFIG_GRKERNSEC_LINK ++ if (grsec_enable_link && S_ISLNK(inode->i_mode) && ++ (parent->i_mode & S_ISVTX) && (parent->i_uid != inode->i_uid) && ++ (parent->i_mode & S_IWOTH) && (current->fsuid != inode->i_uid)) { ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_hardlink(const struct dentry *dentry, ++ const struct vfsmount *mnt, ++ struct inode *inode, const int mode, const char *to) ++{ ++#ifdef CONFIG_GRKERNSEC_LINK ++ if (grsec_enable_link && current->fsuid != inode->i_uid && ++ (!S_ISREG(mode) || (mode & S_ISUID) || ++ ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) || ++ (generic_permission(inode, MAY_READ | MAY_WRITE, NULL))) && ++ !capable(CAP_FOWNER) && current->uid) { ++ return -EPERM; ++ } ++#endif ++ return 0; ++} +diff -urNp linux-2.6.26.orig/grsecurity/grsec_sock.c linux-2.6.26/grsecurity/grsec_sock.c +--- linux-2.6.26.orig/grsecurity/grsec_sock.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/grsec_sock.c 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,170 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_GRKERNSEC ++#define gr_conn_table_size 32749 ++struct conn_table_entry { ++ struct conn_table_entry *next; ++ struct signal_struct *sig; ++}; ++ ++struct conn_table_entry *gr_conn_table[gr_conn_table_size]; ++spinlock_t gr_conn_table_lock = SPIN_LOCK_UNLOCKED; ++ ++extern const char * gr_socktype_to_name(unsigned char type); ++extern const char * gr_proto_to_name(unsigned char proto); ++ ++static __inline__ int ++conn_hash(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport, unsigned int size) ++{ ++ return ((daddr + saddr + (sport << 8) + (dport << 16)) % size); ++} ++ ++static __inline__ int ++conn_match(const struct signal_struct *sig, __u32 saddr, __u32 daddr, ++ __u16 sport, __u16 dport) ++{ ++ if (unlikely(sig->gr_saddr == saddr && sig->gr_daddr == daddr && ++ sig->gr_sport == sport && sig->gr_dport == dport)) ++ return 1; ++ else ++ return 0; ++} ++ ++static void gr_add_to_task_ip_table_nolock(struct signal_struct *sig, struct conn_table_entry *newent) ++{ ++ struct conn_table_entry **match; ++ unsigned int index; ++ ++ index = conn_hash(sig->gr_saddr, sig->gr_daddr, ++ sig->gr_sport, sig->gr_dport, ++ gr_conn_table_size); ++ ++ newent->sig = sig; ++ ++ match = &gr_conn_table[index]; ++ newent->next = *match; ++ *match = newent; ++ ++ return; ++} ++ ++static void gr_del_task_from_ip_table_nolock(struct signal_struct *sig) ++{ ++ struct conn_table_entry *match, *last = NULL; ++ unsigned int index; ++ ++ index = conn_hash(sig->gr_saddr, sig->gr_daddr, ++ sig->gr_sport, sig->gr_dport, ++ gr_conn_table_size); ++ ++ match = gr_conn_table[index]; ++ while (match && !conn_match(match->sig, ++ sig->gr_saddr, sig->gr_daddr, sig->gr_sport, ++ sig->gr_dport)) { ++ last = match; ++ match = match->next; ++ } ++ ++ if (match) { ++ if (last) ++ last->next = match->next; ++ else ++ gr_conn_table[index] = NULL; ++ kfree(match); ++ } ++ ++ return; ++} ++ ++static struct signal_struct * gr_lookup_task_ip_table(__u32 saddr, __u32 daddr, ++ __u16 sport, __u16 dport) ++{ ++ struct conn_table_entry *match; ++ unsigned int index; ++ ++ index = conn_hash(saddr, daddr, sport, dport, gr_conn_table_size); ++ ++ match = gr_conn_table[index]; ++ while (match && !conn_match(match->sig, saddr, daddr, sport, dport)) ++ match = match->next; ++ ++ if (match) ++ return match->sig; ++ else ++ return NULL; ++} ++ ++#endif ++ ++void gr_update_task_in_ip_table(struct task_struct *task, const struct inet_sock *inet) ++{ ++#ifdef CONFIG_GRKERNSEC ++ struct signal_struct *sig = task->signal; ++ struct conn_table_entry *newent; ++ ++ newent = kmalloc(sizeof(struct conn_table_entry), GFP_ATOMIC); ++ if (newent == NULL) ++ return; ++ /* no bh lock needed since we are called with bh disabled */ ++ spin_lock(&gr_conn_table_lock); ++ gr_del_task_from_ip_table_nolock(sig); ++ sig->gr_saddr = inet->rcv_saddr; ++ sig->gr_daddr = inet->daddr; ++ sig->gr_sport = inet->sport; ++ sig->gr_dport = inet->dport; ++ gr_add_to_task_ip_table_nolock(sig, newent); ++ spin_unlock(&gr_conn_table_lock); ++#endif ++ return; ++} ++ ++void gr_del_task_from_ip_table(struct task_struct *task) ++{ ++#ifdef CONFIG_GRKERNSEC ++ spin_lock(&gr_conn_table_lock); ++ gr_del_task_from_ip_table_nolock(task->signal); ++ spin_unlock(&gr_conn_table_lock); ++#endif ++ return; ++} ++ ++void ++gr_attach_curr_ip(const struct sock *sk) ++{ ++#ifdef CONFIG_GRKERNSEC ++ struct signal_struct *p, *set; ++ const struct inet_sock *inet = inet_sk(sk); ++ ++ if (unlikely(sk->sk_protocol != IPPROTO_TCP)) ++ return; ++ ++ set = current->signal; ++ ++ spin_lock_bh(&gr_conn_table_lock); ++ p = gr_lookup_task_ip_table(inet->daddr, inet->rcv_saddr, ++ inet->dport, inet->sport); ++ if (unlikely(p != NULL)) { ++ set->curr_ip = p->curr_ip; ++ set->used_accept = 1; ++ gr_del_task_from_ip_table_nolock(p); ++ spin_unlock_bh(&gr_conn_table_lock); ++ return; ++ } ++ spin_unlock_bh(&gr_conn_table_lock); ++ ++ set->curr_ip = inet->daddr; ++ set->used_accept = 1; ++#endif ++ return; ++} ++ +diff -urNp linux-2.6.26.orig/grsecurity/grsec_sysctl.c linux-2.6.26/grsecurity/grsec_sysctl.c +--- linux-2.6.26.orig/grsecurity/grsec_sysctl.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/grsec_sysctl.c 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,52 @@ ++#include ++#include ++#include ++#include ++#include ++ ++int ++gr_handle_sysctl_mod(const char *dirname, const char *name, const int op) ++{ ++#ifdef CONFIG_GRKERNSEC_SYSCTL ++ if (!strcmp(dirname, "grsecurity") && grsec_lock && (op & 002)) { ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++#if defined(CONFIG_GRKERNSEC_SYSCTL) ++ctl_table grsecurity_table[] = { ++#ifdef CONFIG_GRKERNSEC_SYSCTL ++#ifdef CONFIG_GRKERNSEC_LINK ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "linking_restrictions", ++ .data = &grsec_enable_link, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_FIFO ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "fifo_restrictions", ++ .data = &grsec_enable_fifo, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "grsec_lock", ++ .data = &grsec_lock, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++ { .ctl_name = 0 } ++}; ++#endif +diff -urNp linux-2.6.26.orig/grsecurity/Kconfig linux-2.6.26/grsecurity/Kconfig +--- linux-2.6.26.orig/grsecurity/Kconfig 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/Kconfig 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,123 @@ ++# ++# grecurity configuration ++# ++ ++menu "Grsecurity" ++ ++config GRKERNSEC ++ bool "Grsecurity" ++ select CRYPTO ++ select CRYPTO_SHA256 ++ select SECURITY ++ select SECURITY_CAPABILITIES ++ help ++ If you say Y here, you will be able to configure many features ++ that will enhance the security of your system. It is highly ++ recommended that you say Y here and read through the help ++ for each option so that you fully understand the features and ++ can evaluate their usefulness for your machine. ++ ++menu "Filesystem Protections" ++depends on GRKERNSEC ++ ++config GRKERNSEC_PROC ++ bool "Proc restrictions" ++ help ++ If you say Y here, the permissions of the /proc filesystem ++ will be altered to enhance system security and privacy. You MUST ++ choose either a user only restriction or a user and group restriction. ++ Depending upon the option you choose, you can either restrict users to ++ see only the processes they themselves run, or choose a group that can ++ view all processes and files normally restricted to root if you choose ++ the "restrict to user only" option. NOTE: If you're running identd as ++ a non-root user, you will have to run it as the group you specify here. ++ ++config GRKERNSEC_PROC_USER ++ bool "Restrict /proc to user only" ++ depends on GRKERNSEC_PROC ++ help ++ If you say Y here, non-root users will only be able to view their own ++ processes, and restricts them from viewing network-related information, ++ and viewing kernel symbol and module information. ++ ++config GRKERNSEC_PROC_USERGROUP ++ bool "Allow special group" ++ depends on GRKERNSEC_PROC && !GRKERNSEC_PROC_USER ++ help ++ If you say Y here, you will be able to select a group that will be ++ able to view all processes, network-related information, and ++ kernel and symbol information. This option is useful if you want ++ to run identd as a non-root user. ++ ++config GRKERNSEC_PROC_GID ++ int "GID for special group" ++ depends on GRKERNSEC_PROC_USERGROUP ++ default 1001 ++ ++config GRKERNSEC_PROC_ADD ++ bool "Additional restrictions" ++ depends on GRKERNSEC_PROC_USER || GRKERNSEC_PROC_USERGROUP ++ help ++ If you say Y here, additional restrictions will be placed on ++ /proc that keep normal users from viewing device information and ++ slabinfo information that could be useful for exploits. ++ ++config GRKERNSEC_LINK ++ bool "Linking restrictions" ++ help ++ If you say Y here, /tmp race exploits will be prevented, since users ++ will no longer be able to follow symlinks owned by other users in ++ world-writable +t directories (i.e. /tmp), unless the owner of the ++ symlink is the owner of the directory. users will also not be ++ able to hardlink to files they do not own. If the sysctl option is ++ enabled, a sysctl option with name "linking_restrictions" is created. ++ ++config GRKERNSEC_FIFO ++ bool "FIFO restrictions" ++ help ++ If you say Y here, users will not be able to write to FIFOs they don't ++ own in world-writable +t directories (i.e. /tmp), unless the owner of ++ the FIFO is the same owner of the directory it's held in. If the sysctl ++ option is enabled, a sysctl option with name "fifo_restrictions" is ++ created. ++ ++config GRKERNSEC_PROC_IPADDR ++ bool "/proc//ipaddr support" ++ help ++ If you say Y here, a new entry will be added to each /proc/ ++ directory that contains the IP address of the person using the task. ++ The IP is carried across local TCP and AF_UNIX stream sockets. ++ This information can be useful for IDS/IPSes to perform remote response ++ to a local attack. The entry is readable by only the owner of the ++ process (and root if he has CAP_DAC_OVERRIDE, which can be removed via ++ the RBAC system), and thus does not create privacy concerns. ++ ++endmenu ++ ++config GRKERNSEC_SYSCTL ++ bool "Sysctl support" ++ help ++ If you say Y here, you will be able to change the options that ++ grsecurity runs with at bootup, without having to recompile your ++ kernel. You can echo values to files in /proc/sys/kernel/grsecurity ++ to enable (1) or disable (0) various features. All the sysctl entries ++ are mutable until the "grsec_lock" entry is set to a non-zero value. ++ All features enabled in the kernel configuration are disabled at boot ++ if you do not say Y to the "Turn on features by default" option. ++ All options should be set at startup, and the grsec_lock entry should ++ be set to a non-zero value after all the options are set. ++ *THIS IS EXTREMELY IMPORTANT* ++ ++config GRKERNSEC_SYSCTL_ON ++ bool "Turn on features by default" ++ depends on GRKERNSEC_SYSCTL ++ help ++ If you say Y here, instead of having all features enabled in the ++ kernel configuration disabled at boot time, the features will be ++ enabled at boot time. It is recommended you say Y here unless ++ there is some reason you would want all sysctl-tunable features to ++ be disabled by default. As mentioned elsewhere, it is important ++ to enable the grsec_lock entry once you have finished modifying ++ the sysctl entries. ++ ++endmenu +diff -urNp linux-2.6.26.orig/grsecurity/Makefile linux-2.6.26/grsecurity/Makefile +--- linux-2.6.26.orig/grsecurity/Makefile 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/grsecurity/Makefile 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,11 @@ ++# All code in this directory and various hooks inserted throughout the kernel ++# are copyright Brad Spengler, and released under the GPL v2 or higher ++ ++obj-y = grsec_fifo.o grsec_sock.o grsec_sysctl.o grsec_link.o ++ ++obj-$(CONFIG_GRKERNSEC) += grsec_init.o ++ ++ifndef CONFIG_GRKERNSEC ++obj-y += grsec_disabled.o ++endif ++ +diff -urNp linux-2.6.26.orig/include/linux/grinternal.h linux-2.6.26/include/linux/grinternal.h +--- linux-2.6.26.orig/include/linux/grinternal.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/include/linux/grinternal.h 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,14 @@ ++#ifndef __GRINTERNAL_H ++#define __GRINTERNAL_H ++ ++#ifdef CONFIG_GRKERNSEC ++ ++#include ++ ++extern int grsec_enable_link; ++extern int grsec_enable_fifo; ++extern int grsec_lock; ++ ++#endif ++ ++#endif +diff -urNp linux-2.6.26.orig/include/linux/grsecurity.h linux-2.6.26/include/linux/grsecurity.h +--- linux-2.6.26.orig/include/linux/grsecurity.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.26/include/linux/grsecurity.h 2008-09-02 12:17:21.000000000 +0200 +@@ -0,0 +1,18 @@ ++#ifndef GR_SECURITY_H ++#define GR_SECURITY_H ++#include ++#include ++ ++void gr_del_task_from_ip_table(struct task_struct *p); ++ ++int gr_handle_follow_link(const struct inode *parent, ++ const struct inode *inode, ++ const struct dentry *dentry); ++int gr_handle_fifo(const struct dentry *dentry, ++ const struct dentry *dir, const int flag, ++ const int acc_mode); ++int gr_handle_hardlink(const struct dentry *dentry, ++ struct inode *inode, ++ const int mode, const char *to); ++ ++#endif +diff -urNp linux-2.6.26.orig/include/linux/sched.h linux-2.6.26/include/linux/sched.h +--- linux-2.6.26.orig/include/linux/sched.h 2008-09-01 11:43:34.000000000 +0200 ++++ linux-2.6.26/include/linux/sched.h 2008-09-02 12:17:21.000000000 +0200 +@@ -544,6 +544,15 @@ struct signal_struct { + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; + #endif ++ ++#ifdef CONFIG_GRKERNSEC ++ u32 curr_ip; ++ u32 gr_saddr; ++ u32 gr_daddr; ++ u16 gr_sport; ++ u16 gr_dport; ++ u8 used_accept:1; ++#endif + }; + + /* Context switch must be unlocked if interrupts are to be enabled */ +diff -urNp linux-2.6.26.orig/include/linux/sysctl.h linux-2.6.26/include/linux/sysctl.h +--- linux-2.6.26.orig/include/linux/sysctl.h 2008-09-01 11:43:34.000000000 +0200 ++++ linux-2.6.26/include/linux/sysctl.h 2008-09-02 12:17:21.000000000 +0200 +@@ -165,8 +165,11 @@ enum + KERN_MAX_LOCK_DEPTH=74, + KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ + KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ +-}; ++#ifdef CONFIG_GRKERNSEC ++ KERN_GRSECURITY=98, /* grsecurity */ ++#endif + ++}; + + + /* CTL_VM names: */ +diff -urNp linux-2.6.26.orig/kernel/configs.c linux-2.6.26/kernel/configs.c +--- linux-2.6.26.orig/kernel/configs.c 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/kernel/configs.c 2008-09-02 12:17:21.000000000 +0200 +@@ -79,8 +79,19 @@ static int __init ikconfig_init(void) + struct proc_dir_entry *entry; + + /* create the current config file */ ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ entry = proc_create("config.gz", S_IFREG | S_IRUSR, NULL, ++ &ikconfig_file_ops); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ entry = proc_create("config.gz", S_IFREG | S_IRUSR | S_IRGRP, NULL, ++ &ikconfig_file_ops); ++#endif ++#else + entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL, + &ikconfig_file_ops); ++#endif ++ + if (!entry) + return -ENOMEM; + +diff -urNp linux-2.6.26.orig/kernel/exit.c linux-2.6.26/kernel/exit.c +--- linux-2.6.26.orig/kernel/exit.c 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/kernel/exit.c 2008-09-02 12:17:21.000000000 +0200 +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -137,6 +138,7 @@ static void __exit_signal(struct task_st + */ + flush_sigqueue(&tsk->pending); + ++ gr_del_task_from_ip_table(tsk); + tsk->signal = NULL; + tsk->sighand = NULL; + spin_unlock(&sighand->siglock); +diff -urNp linux-2.6.26.orig/kernel/kallsyms.c linux-2.6.26/kernel/kallsyms.c +--- linux-2.6.26.orig/kernel/kallsyms.c 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/kernel/kallsyms.c 2008-09-02 12:17:21.000000000 +0200 +@@ -472,7 +472,15 @@ static const struct file_operations kall + + static int __init kallsyms_init(void) + { ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_create("kallsyms", S_IFREG | S_IRUSR, NULL, &kallsyms_operations); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_create("kallsyms", S_IFREG | S_IRUSR | S_IRGRP, NULL, &kallsyms_operations); ++#endif ++#else + proc_create("kallsyms", 0444, NULL, &kallsyms_operations); ++#endif + return 0; + } + __initcall(kallsyms_init); +diff -urNp linux-2.6.26.orig/kernel/resource.c linux-2.6.26/kernel/resource.c +--- linux-2.6.26.orig/kernel/resource.c 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/kernel/resource.c 2008-09-02 12:17:21.000000000 +0200 +@@ -131,8 +131,18 @@ static const struct file_operations proc + + static int __init ioresources_init(void) + { ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_create("ioports", S_IRUSR, NULL, &proc_ioports_operations); ++ proc_create("iomem", S_IRUSR, NULL, &proc_iomem_operations); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_create("ioports", S_IRUSR | S_IRGRP, NULL, &proc_ioports_operations); ++ proc_create("iomem", S_IRUSR | S_IRGRP, NULL, &proc_iomem_operations); ++#endif ++#else + proc_create("ioports", 0, NULL, &proc_ioports_operations); + proc_create("iomem", 0, NULL, &proc_iomem_operations); ++#endif + return 0; + } + __initcall(ioresources_init); +diff -urNp linux-2.6.26.orig/kernel/sysctl.c linux-2.6.26/kernel/sysctl.c +--- linux-2.6.26.orig/kernel/sysctl.c 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/kernel/sysctl.c 2008-09-02 12:17:21.000000000 +0200 +@@ -59,6 +59,11 @@ + static int deprecated_sysctl_warning(struct __sysctl_args *args); + + #if defined(CONFIG_SYSCTL) ++#include ++#include ++ ++extern int gr_handle_sysctl_mod(const char *dirname, const char *name, ++ const int op); + + /* External variables not in a header file. */ + extern int C_A_D; +@@ -153,6 +158,7 @@ static int proc_do_cad_pid(struct ctl_ta + static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #endif ++extern ctl_table grsecurity_table[]; + + static struct ctl_table root_table[]; + static struct ctl_table_root sysctl_table_root; +@@ -823,6 +829,15 @@ static struct ctl_table kern_table[] = { + .child = key_sysctls, + }, + #endif ++ ++#if defined(CONFIG_GRKERNSEC_SYSCTL) ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "grsecurity", ++ .mode = 0500, ++ .child = grsecurity_table, ++ }, ++#endif + /* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt +@@ -1585,6 +1600,10 @@ int sysctl_perm(struct ctl_table_root *r + int error; + int mode; + ++ if (table->parent != NULL && table->parent->procname != NULL && ++ table->procname != NULL && ++ gr_handle_sysctl_mod(table->parent->procname, table->procname, op)) ++ return -EACCES; + error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); + if (error) + return error; +diff -urNp linux-2.6.26.orig/Makefile linux-2.6.26/Makefile +--- linux-2.6.26.orig/Makefile 2008-09-01 11:44:01.000000000 +0200 ++++ linux-2.6.26/Makefile 2008-09-02 12:17:21.000000000 +0200 +@@ -607,7 +607,7 @@ export mod_strip_cmd + + + ifeq ($(KBUILD_EXTMOD),) +-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ++core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ + + vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ + $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ +diff -urNp linux-2.6.26.orig/net/ipv4/inet_hashtables.c linux-2.6.26/net/ipv4/inet_hashtables.c +--- linux-2.6.26.orig/net/ipv4/inet_hashtables.c 2008-09-01 11:43:37.000000000 +0200 ++++ linux-2.6.26/net/ipv4/inet_hashtables.c 2008-09-02 12:17:21.000000000 +0200 +@@ -18,12 +18,15 @@ + #include + #include + #include ++#include + + #include + #include + #include + #include + ++extern void gr_update_task_in_ip_table(struct task_struct *task, const struct inet_sock *inet); ++ + /* + * Allocate and initialize a new local port bind bucket. + * The bindhash mutex for snum's hash chain must be held here. +@@ -484,6 +487,8 @@ ok: + } + spin_unlock(&head->lock); + ++ gr_update_task_in_ip_table(current, inet_sk(sk)); ++ + if (tw) { + inet_twsk_deschedule(tw, death_row); + inet_twsk_put(tw); +diff -urNp linux-2.6.26.orig/net/socket.c linux-2.6.26/net/socket.c +--- linux-2.6.26.orig/net/socket.c 2008-09-01 11:43:36.000000000 +0200 ++++ linux-2.6.26/net/socket.c 2008-09-02 12:17:21.000000000 +0200 +@@ -85,6 +85,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -98,6 +99,8 @@ + #include + #include + ++extern void gr_attach_curr_ip(const struct sock *sk); ++ + static int sock_no_open(struct inode *irrelevant, struct file *dontcare); + static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +@@ -1502,6 +1505,7 @@ asmlinkage long sys_accept(int fd, struc + err = newfd; + + security_socket_post_accept(sock, newsock); ++ gr_attach_curr_ip(newsock->sk); + + out_put: + fput_light(sock->file, fput_needed); +diff -urNp linux-2.6.26.orig/security/Kconfig linux-2.6.26/security/Kconfig +--- linux-2.6.26.orig/security/Kconfig 2008-09-01 11:43:58.000000000 +0200 ++++ linux-2.6.26/security/Kconfig 2008-09-02 12:17:21.000000000 +0200 +@@ -4,6 +4,8 @@ + + menu "Security options" + ++source grsecurity/Kconfig ++ + config KEYS + bool "Enable access key retention support" + help diff --git a/kernel-grsec-no-stupid-SbO.patch b/kernel-grsec-no-stupid-SbO.patch index f0a2f1bc..99dc06d4 100644 --- a/kernel-grsec-no-stupid-SbO.patch +++ b/kernel-grsec-no-stupid-SbO.patch @@ -1,7 +1,7 @@ ---- linux-2.6.27/init/Kconfig~ 2008-11-20 23:26:34.000000000 +0100 -+++ linux-2.6.27/init/Kconfig 2008-12-01 20:37:12.000000000 +0100 -@@ -781,8 +781,8 @@ - source "arch/Kconfig" +--- linux-2.6.28/fs/proc/Kconfig~ 2008-11-20 23:26:34.000000000 +0100 ++++ linux-2.6.28/fs/proc/Kconfig 2008-12-01 20:37:12.000000000 +0100 +@@ -59,8 +59,8 @@ + limited in memory. config PROC_PAGE_MONITOR - default n @@ -11,31 +11,23 @@ bool "Enable /proc page monitoring" if EMBEDDED help Various /proc files exist to monitor process memory utilization: -@@ -798,9 +798,9 @@ +--- linux-2.6.28/mm/slab.c~ 2009-01-18 02:10:12.395711069 +0100 ++++ linux-2.6.28/mm/slab.c 2009-01-18 02:18:05.632401077 +0100 +@@ -4496,8 +4496,15 @@ - config SLABINFO - bool -- depends on PROC_FS && !GRKERNSEC_PROC_ADD -+ depends on PROC_FS - depends on SLAB || SLUB_DEBUG -- default n -+ default y - - config RT_MUTEXES - boolean ---- linux-2.6.27/fs/proc/proc_misc.c~ 2008-11-20 23:26:30.000000000 +0100 -+++ linux-2.6.27/fs/proc/proc_misc.c 2008-12-01 20:35:55.000000000 +0100 -@@ -934,8 +934,12 @@ - #endif - proc_create("stat", 0, NULL, &proc_stat_operations); - proc_create("interrupts", 0, NULL, &proc_interrupts_operations); --#if defined(CONFIG_SLABINFO) && !defined(CONFIG_GRKERNSEC_PROC_ADD) -+#ifdef CONFIG_SLABINFO + static int __init slab_proc_init(void) + { +-#if !defined(CONFIG_GRKERNSEC_PROC_ADD) +- proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); ++ int gr_mode = S_IWUSR|S_IRUGO; ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ gr_mode = S_IRUSR; ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ gr_mode = S_IRUSR | S_IRGRP; ++#endif ++ +#ifdef CONFIG_GRKERNSEC_PROC_ADD + proc_create("slabinfo",gr_mode,NULL,&proc_slabinfo_operations); -+#else - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); -+#endif #ifdef CONFIG_DEBUG_SLAB_LEAK proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif diff --git a/kernel-grsec.config b/kernel-grsec.config index 7f1a3346..bbac6e16 100644 --- a/kernel-grsec.config +++ b/kernel-grsec.config @@ -12,9 +12,10 @@ CONFIG_GRKERNSEC_CUSTOM=y # # CONFIG_GRKERNSEC_KMEM is not set # CONFIG_GRKERNSEC_IO is not set -CONFIG_GRKERNSEC_PROC_MEMMAP=y +# CONFIG_GRKERNSEC_PROC_MEMMAP is not set CONFIG_GRKERNSEC_BRUTE=y -CONFIG_GRKERNSEC_HIDESYM=y +CONFIG_GRKERNSEC_MODSTOP=y +# CONFIG_GRKERNSEC_HIDESYM is not set # # Role Based Access Control Options @@ -51,35 +52,34 @@ CONFIG_GRKERNSEC_CHROOT_CAPS=y # # Kernel Auditing # -# CONFIG_GRKERNSEC_AUDIT_GROUP is not set -# CONFIG_GRKERNSEC_EXECLOG is not set +CONFIG_GRKERNSEC_AUDIT_GROUP=y +CONFIG_GRKERNSEC_AUDIT_GID=1007 +CONFIG_GRKERNSEC_EXECLOG=y CONFIG_GRKERNSEC_RESLOG=y -# CONFIG_GRKERNSEC_CHROOT_EXECLOG is not set -# CONFIG_GRKERNSEC_AUDIT_CHDIR is not set -# CONFIG_GRKERNSEC_AUDIT_MOUNT is not set -# CONFIG_GRKERNSEC_AUDIT_IPC is not set +CONFIG_GRKERNSEC_CHROOT_EXECLOG=y +CONFIG_GRKERNSEC_AUDIT_CHDIR=y +CONFIG_GRKERNSEC_AUDIT_MOUNT=y +CONFIG_GRKERNSEC_AUDIT_IPC=y CONFIG_GRKERNSEC_SIGNAL=y CONFIG_GRKERNSEC_FORKFAIL=y CONFIG_GRKERNSEC_TIME=y CONFIG_GRKERNSEC_PROC_IPADDR=y -# CONFIG_GRKERNSEC_AUDIT_TEXTREL is not set +CONFIG_GRKERNSEC_AUDIT_TEXTREL=y # # Executable Protections # CONFIG_GRKERNSEC_EXECVE=y CONFIG_GRKERNSEC_DMESG=y -CONFIG_GRKERNSEC_RANDPID=y -# CONFIG_GRKERNSEC_TPE is not set +CONFIG_GRKERNSEC_TPE=y +CONFIG_GRKERNSEC_TPE_ALL=y +# CONFIG_GRKERNSEC_TPE_INVERT is not set +CONFIG_GRKERNSEC_TPE_GID=65500 # # Network Protections # CONFIG_GRKERNSEC_RANDNET=y -CONFIG_GRKERNSEC_RANDISN=y -CONFIG_GRKERNSEC_RANDID=y -CONFIG_GRKERNSEC_RANDSRC=y -CONFIG_GRKERNSEC_RANDRPC=y CONFIG_GRKERNSEC_SOCKET=y CONFIG_GRKERNSEC_SOCKET_ALL=y CONFIG_GRKERNSEC_SOCKET_ALL_GID=65501 @@ -92,14 +92,12 @@ CONFIG_GRKERNSEC_SOCKET_SERVER_GID=65503 # Sysctl support # CONFIG_GRKERNSEC_SYSCTL=y +# CONFIG_GRKERNSEC_SYSCTL_ON is not set # # Logging Options # CONFIG_GRKERNSEC_FLOODTIME=10 -CONFIG_GRKERNSEC_FLOODBURST=4 +CONFIG_GRKERNSEC_FLOODBURST=10 -# -# PaX -# -# CONFIG_PAX is not set +CONFIG_IP_NF_MATCH_STEALTH=m diff --git a/kernel-grsec_fixes.patch b/kernel-grsec_fixes.patch new file mode 100644 index 00000000..995fa76e --- /dev/null +++ b/kernel-grsec_fixes.patch @@ -0,0 +1,149 @@ +netlink +cap_dac* +diff -upr a/grsecurity/gracl_cap.c c/grsecurity/gracl_cap.c +--- a/grsecurity/gracl_cap.c 2007-12-01 00:54:57.312774500 +0000 ++++ c/grsecurity/gracl_cap.c 2007-12-01 01:09:34.923621750 +0000 +@@ -110,3 +110,19 @@ gr_is_capable_nolog(const int cap) + return 0; + } + ++void ++gr_log_cap_pid(const int cap, const pid_t pid) ++{ ++ struct task_struct *p; ++ ++ if (gr_acl_is_enabled()) { ++ read_lock(&tasklist_lock); ++ p = find_task_by_vpid(pid); ++ if (p) { ++ get_task_struct(p); ++ gr_log_cap(GR_DONT_AUDIT, GR_CAP_ACL_MSG, p, captab_log[cap]); ++ } ++ read_unlock(&tasklist_lock); ++ } ++ return; ++} +--- a/grsecurity/grsec_sock.c 2008-03-24 00:24:22.482633101 +0100 ++++ c/grsecurity/grsec_sock.c 2008-03-24 00:27:01.971671763 +0100 +@@ -247,23 +247,26 @@ + gr_cap_rtnetlink(struct sock *sock) + { + #ifdef CONFIG_GRKERNSEC ++ struct acl_subject_label *curracl; ++ kernel_cap_t cap_dropp = __cap_empty_set, cap_mask = __cap_empty_set; ++ + if (!gr_acl_is_enabled()) + return current_cap(); +- else if (sock->sk_protocol == NETLINK_ISCSI && +- cap_raised(current_cap(), CAP_SYS_ADMIN) && +- gr_is_capable(CAP_SYS_ADMIN)) +- return current_cap(); +- else if (sock->sk_protocol == NETLINK_AUDIT && +- cap_raised(current_cap(), CAP_AUDIT_WRITE) && +- gr_is_capable(CAP_AUDIT_WRITE) && +- cap_raised(current_cap(), CAP_AUDIT_CONTROL) && +- gr_is_capable(CAP_AUDIT_CONTROL)) +- return current_cap(); +- else if (cap_raised(current_cap(), CAP_NET_ADMIN) && +- gr_is_capable(CAP_NET_ADMIN)) +- return current_cap(); +- else +- return __cap_empty_set; ++ else { ++ curracl = current->acl; ++ ++ cap_dropp = curracl->cap_lower; ++ cap_mask = curracl->cap_mask; ++ ++ while ((curracl = curracl->parent_subject)) { ++ cap_dropp = cap_combine(cap_dropp, ++ cap_intersect(curracl->cap_lower, ++ cap_drop(cap_mask, curracl->cap_mask))); ++ cap_mask = cap_combine(cap_mask, curracl->cap_mask); ++ } ++ return cap_drop(current_cap(), ++ cap_intersect(cap_dropp, cap_mask)); ++ } + #else + return current_cap(); + #endif +diff -upr a/include/linux/grsecurity.h c/include/linux/grsecurity.h +--- a/include/linux/grsecurity.h 2007-12-01 00:54:57.224769000 +0000 ++++ c/include/linux/grsecurity.h 2007-12-01 01:09:34.923621750 +0000 +@@ -76,6 +76,7 @@ void gr_log_semrm(const uid_t uid, const + void gr_log_shmget(const int err, const int shmflg, const size_t size); + void gr_log_shmrm(const uid_t uid, const uid_t cuid); + void gr_log_textrel(struct vm_area_struct *vma); ++void gr_log_cap_pid(const int cap, pid_t pid); + + int gr_handle_follow_link(const struct inode *parent, + const struct inode *inode, +diff -upr a/security/commoncap.c c/security/commoncap.c +--- a/security/commoncap.c 2007-12-01 00:54:57.300773750 +0000 ++++ c/security/commoncap.c 2007-12-01 01:09:34.923621750 +0000 +@@ -55,8 +55,12 @@ + + int cap_netlink_recv(struct sk_buff *skb, int cap) + { +- if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) ++ if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) { ++#ifdef CONFIG_GRKERNSEC ++ gr_log_cap_pid(cap, NETLINK_CREDS(skb)->pid); ++#endif + return -EPERM; ++ } + return 0; + } + +=== +=== cap_dac_ succession with capable_nolog +=== +diff -upr a/fs./namei.c a/fs/namei.c +--- a/fs./namei.c 2008-04-05 01:23:49.741310000 +0200 ++++ a/fs/namei.c 2008-04-05 14:36:39.350275977 +0200 +@@ -215,6 +215,13 @@ int generic_permission(struct inode *ino + + check_capabilities: + /* ++ * Searching includes executable on directories, else just read. ++ */ ++ if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) ++ if (capable_nolog(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) ++ return 0; ++ ++ /* + * Read/write DACs are always overridable. + * Executable DACs are overridable if at least one exec bit is set. + */ +@@ -223,13 +230,6 @@ int generic_permission(struct inode *ino + if (capable(CAP_DAC_OVERRIDE)) + return 0; + +- /* +- * Searching includes executable on directories, else just read. +- */ +- if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) +- if (capable(CAP_DAC_READ_SEARCH)) +- return 0; +- + return -EACCES; + } + +@@ -498,13 +498,13 @@ static int exec_permission_lite(struct i + if (mode & MAY_EXEC) + goto ok; + +- if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) ++ if (S_ISDIR(inode->i_mode) && capable_nolog(CAP_DAC_OVERRIDE)) + goto ok; + +- if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) ++ if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) + goto ok; + +- if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) ++ if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) + goto ok; + + return -EACCES; + diff --git a/kernel-grsec_full.patch b/kernel-grsec_full.patch new file mode 100644 index 00000000..8f48ad0a --- /dev/null +++ b/kernel-grsec_full.patch @@ -0,0 +1,37330 @@ +diff -urNp linux-2.6.29/arch/alpha/include/asm/elf.h linux-2.6.29/arch/alpha/include/asm/elf.h +--- linux-2.6.29/arch/alpha/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -91,6 +91,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N + + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (current->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL) ++ ++#define PAX_DELTA_MMAP_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 28) ++#define PAX_DELTA_STACK_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 19) ++#endif ++ + /* $0 is set by ld.so to a pointer to a function which might be + registered using atexit. This provides a mean for the dynamic + linker to call DT_FINI functions for shared libraries that have +diff -urNp linux-2.6.29/arch/alpha/include/asm/kmap_types.h linux-2.6.29/arch/alpha/include/asm/kmap_types.h +--- linux-2.6.29/arch/alpha/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -24,7 +24,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/alpha/include/asm/pgtable.h linux-2.6.29/arch/alpha/include/asm/pgtable.h +--- linux-2.6.29/arch/alpha/include/asm/pgtable.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/include/asm/pgtable.h 2009-03-28 14:26:18.000000000 -0400 +@@ -101,6 +101,17 @@ struct vm_area_struct; + #define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS) + #define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) + #define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE) ++# define PAGE_COPY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) ++# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_COPY_NOEXEC PAGE_COPY ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++#endif ++ + #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE) + + #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) +diff -urNp linux-2.6.29/arch/alpha/kernel/module.c linux-2.6.29/arch/alpha/kernel/module.c +--- linux-2.6.29/arch/alpha/kernel/module.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/kernel/module.c 2009-03-28 14:26:18.000000000 -0400 +@@ -182,7 +182,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, + + /* The small sections were sorted to the end of the segment. + The following should definitely cover them. */ +- gp = (u64)me->module_core + me->core_size - 0x8000; ++ gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; + got = sechdrs[me->arch.gotsecindex].sh_addr; + + for (i = 0; i < n; i++) { +diff -urNp linux-2.6.29/arch/alpha/kernel/osf_sys.c linux-2.6.29/arch/alpha/kernel/osf_sys.c +--- linux-2.6.29/arch/alpha/kernel/osf_sys.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/kernel/osf_sys.c 2009-03-28 14:26:18.000000000 -0400 +@@ -1217,6 +1217,10 @@ arch_get_unmapped_area(struct file *filp + merely specific addresses, but regions of memory -- perhaps + this feature should be incorporated into all ports? */ + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); + if (addr != (unsigned long) -ENOMEM) +@@ -1224,8 +1228,8 @@ arch_get_unmapped_area(struct file *filp + } + + /* Next, try allocating at TASK_UNMAPPED_BASE. */ +- addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), +- len, limit); ++ addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); ++ + if (addr != (unsigned long) -ENOMEM) + return addr; + +diff -urNp linux-2.6.29/arch/alpha/kernel/ptrace.c linux-2.6.29/arch/alpha/kernel/ptrace.c +--- linux-2.6.29/arch/alpha/kernel/ptrace.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/kernel/ptrace.c 2009-03-28 14:26:18.000000000 -0400 +@@ -266,6 +266,9 @@ long arch_ptrace(struct task_struct *chi + size_t copied; + long ret; + ++ if (gr_handle_ptrace(child, request)) ++ return -EPERM; ++ + switch (request) { + /* When I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ +diff -urNp linux-2.6.29/arch/alpha/mm/fault.c linux-2.6.29/arch/alpha/mm/fault.c +--- linux-2.6.29/arch/alpha/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/alpha/mm/fault.c 2009-03-28 14:26:18.000000000 -0400 +@@ -54,6 +54,124 @@ __load_new_mm_context(struct mm_struct * + __reload_thread(pcb); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++/* ++ * PaX: decide what to do with offenders (regs->pc = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched PLT trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int ldah, ldq, jmp; ++ ++ err = get_user(ldah, (unsigned int *)regs->pc); ++ err |= get_user(ldq, (unsigned int *)(regs->pc+4)); ++ err |= get_user(jmp, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((ldah & 0xFFFF0000U) == 0x277B0000U && ++ (ldq & 0xFFFF0000U) == 0xA77B0000U && ++ jmp == 0x6BFB0000U) ++ { ++ unsigned long r27, addr; ++ unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; ++ unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; ++ ++ addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); ++ err = get_user(r27, (unsigned long *)addr); ++ if (err) ++ break; ++ ++ regs->r27 = r27; ++ regs->pc = r27; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #2 */ ++ unsigned int ldah, lda, br; ++ ++ err = get_user(ldah, (unsigned int *)regs->pc); ++ err |= get_user(lda, (unsigned int *)(regs->pc+4)); ++ err |= get_user(br, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((ldah & 0xFFFF0000U) == 0x277B0000U && ++ (lda & 0xFFFF0000U) == 0xA77B0000U && ++ (br & 0xFFE00000U) == 0xC3E00000U) ++ { ++ unsigned long addr = br | 0xFFFFFFFFFFE00000UL; ++ unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; ++ unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; ++ ++ regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); ++ regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation */ ++ unsigned int br; ++ ++ err = get_user(br, (unsigned int *)regs->pc); ++ ++ if (!err && (br & 0xFFE00000U) == 0xC3800000U) { ++ unsigned int br2, ldq, nop, jmp; ++ unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; ++ ++ addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); ++ err = get_user(br2, (unsigned int *)addr); ++ err |= get_user(ldq, (unsigned int *)(addr+4)); ++ err |= get_user(nop, (unsigned int *)(addr+8)); ++ err |= get_user(jmp, (unsigned int *)(addr+12)); ++ err |= get_user(resolver, (unsigned long *)(addr+16)); ++ ++ if (err) ++ break; ++ ++ if (br2 == 0xC3600000U && ++ ldq == 0xA77B000CU && ++ nop == 0x47FF041FU && ++ jmp == 0x6B7B0000U) ++ { ++ regs->r28 = regs->pc+4; ++ regs->r27 = addr+16; ++ regs->pc = resolver; ++ return 3; ++ } ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif + + /* + * This routine handles page faults. It determines the address, +@@ -131,8 +249,29 @@ do_page_fault(unsigned long address, uns + good_area: + si_code = SEGV_ACCERR; + if (cause < 0) { +- if (!(vma->vm_flags & VM_EXEC)) ++ if (!(vma->vm_flags & VM_EXEC)) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) ++ goto bad_area; ++ ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)regs->pc, (void *)rdusp()); ++ do_group_exit(SIGKILL); ++#else + goto bad_area; ++#endif ++ ++ } + } else if (!cause) { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_WRITE))) +diff -urNp linux-2.6.29/arch/arm/include/asm/elf.h linux-2.6.29/arch/arm/include/asm/elf.h +--- linux-2.6.29/arch/arm/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/arm/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -99,7 +99,14 @@ extern int arm_elf_read_implies_exec(con + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) ++#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) ++ ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x00008000UL ++ ++#define PAX_DELTA_MMAP_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) ++#define PAX_DELTA_STACK_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) ++#endif + + /* When the program starts, a1 contains a pointer to a function to be + registered with atexit, as per the SVR4 ABI. A value of 0 means we +diff -urNp linux-2.6.29/arch/arm/include/asm/kmap_types.h linux-2.6.29/arch/arm/include/asm/kmap_types.h +--- linux-2.6.29/arch/arm/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/arm/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -18,6 +18,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/arm/mm/mmap.c linux-2.6.29/arch/arm/mm/mmap.c +--- linux-2.6.29/arch/arm/mm/mmap.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/arm/mm/mmap.c 2009-03-28 14:26:18.000000000 -0400 +@@ -62,6 +62,10 @@ arch_get_unmapped_area(struct file *filp + if (len > TASK_SIZE) + return -ENOMEM; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); +@@ -74,10 +78,10 @@ arch_get_unmapped_area(struct file *filp + return addr; + } + if (len > mm->cached_hole_size) { +- start_addr = addr = mm->free_area_cache; ++ start_addr = addr = mm->free_area_cache; + } else { +- start_addr = addr = TASK_UNMAPPED_BASE; +- mm->cached_hole_size = 0; ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; + } + + full_search: +@@ -93,8 +97,8 @@ full_search: + * Start a new search - just in case we missed + * some holes. + */ +- if (start_addr != TASK_UNMAPPED_BASE) { +- start_addr = addr = TASK_UNMAPPED_BASE; ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +diff -urNp linux-2.6.29/arch/avr32/include/asm/elf.h linux-2.6.29/arch/avr32/include/asm/elf.h +--- linux-2.6.29/arch/avr32/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/avr32/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -85,8 +85,14 @@ typedef struct user_fpu_struct elf_fpreg + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) ++#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x00001000UL ++ ++#define PAX_DELTA_MMAP_LEN 15 ++#define PAX_DELTA_STACK_LEN 15 ++#endif + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, +diff -urNp linux-2.6.29/arch/avr32/include/asm/kmap_types.h linux-2.6.29/arch/avr32/include/asm/kmap_types.h +--- linux-2.6.29/arch/avr32/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/avr32/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -22,7 +22,8 @@ D(10) KM_IRQ0, + D(11) KM_IRQ1, + D(12) KM_SOFTIRQ0, + D(13) KM_SOFTIRQ1, +-D(14) KM_TYPE_NR ++D(14) KM_CLEARPAGE, ++D(15) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/avr32/mm/fault.c linux-2.6.29/arch/avr32/mm/fault.c +--- linux-2.6.29/arch/avr32/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/avr32/mm/fault.c 2009-03-28 14:26:18.000000000 -0400 +@@ -41,6 +41,23 @@ static inline int notify_page_fault(stru + + int exception_trace = 1; + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 20; i++) { ++ unsigned char c; ++ if (get_user(c, (unsigned char *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%02x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + /* + * This routine handles page faults. It determines the address and the + * problem, and then passes it off to one of the appropriate routines. +@@ -157,6 +174,16 @@ bad_area: + up_read(&mm->mmap_sem); + + if (user_mode(regs)) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (mm->pax_flags & MF_PAX_PAGEEXEC) { ++ if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) { ++ pax_report_fault(regs, (void *)regs->pc, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++ } ++#endif ++ + if (exception_trace && printk_ratelimit()) + printk("%s%s[%d]: segfault at %08lx pc %08lx " + "sp %08lx ecr %lu\n", +diff -urNp linux-2.6.29/arch/blackfin/include/asm/kmap_types.h linux-2.6.29/arch/blackfin/include/asm/kmap_types.h +--- linux-2.6.29/arch/blackfin/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/blackfin/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/cris/include/asm/kmap_types.h linux-2.6.29/arch/cris/include/asm/kmap_types.h +--- linux-2.6.29/arch/cris/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/cris/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -19,6 +19,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/h8300/include/asm/kmap_types.h linux-2.6.29/arch/h8300/include/asm/kmap_types.h +--- linux-2.6.29/arch/h8300/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/h8300/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/ia64/ia32/binfmt_elf32.c linux-2.6.29/arch/ia64/ia32/binfmt_elf32.c +--- linux-2.6.29/arch/ia64/ia32/binfmt_elf32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/ia32/binfmt_elf32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -45,6 +45,13 @@ randomize_stack_top(unsigned long stack_ + + #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) ++ ++#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#endif ++ + /* Ugly but avoids duplication */ + #include "../../../fs/binfmt_elf.c" + +diff -urNp linux-2.6.29/arch/ia64/ia32/ia32priv.h linux-2.6.29/arch/ia64/ia32/ia32priv.h +--- linux-2.6.29/arch/ia64/ia32/ia32priv.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/ia32/ia32priv.h 2009-03-28 14:26:18.000000000 -0400 +@@ -296,7 +296,14 @@ typedef struct compat_siginfo { + #define ELF_DATA ELFDATA2LSB + #define ELF_ARCH EM_386 + +-#define IA32_STACK_TOP IA32_PAGE_OFFSET ++#ifdef CONFIG_PAX_RANDUSTACK ++#define __IA32_DELTA_STACK (current->mm->delta_stack) ++#else ++#define __IA32_DELTA_STACK 0UL ++#endif ++ ++#define IA32_STACK_TOP (IA32_PAGE_OFFSET - __IA32_DELTA_STACK) ++ + #define IA32_GATE_OFFSET IA32_PAGE_OFFSET + #define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE + +diff -urNp linux-2.6.29/arch/ia64/include/asm/elf.h linux-2.6.29/arch/ia64/include/asm/elf.h +--- linux-2.6.29/arch/ia64/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -43,6 +43,13 @@ + */ + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) ++ ++#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#endif ++ + #define PT_IA_64_UNWIND 0x70000001 + + /* IA-64 relocations: */ +diff -urNp linux-2.6.29/arch/ia64/include/asm/kmap_types.h linux-2.6.29/arch/ia64/include/asm/kmap_types.h +--- linux-2.6.29/arch/ia64/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -22,7 +22,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/ia64/include/asm/pgtable.h linux-2.6.29/arch/ia64/include/asm/pgtable.h +--- linux-2.6.29/arch/ia64/include/asm/pgtable.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/include/asm/pgtable.h 2009-03-28 14:26:18.000000000 -0400 +@@ -143,6 +143,17 @@ + #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) + #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) + #define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++# define PAGE_SHARED_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) ++# define PAGE_READONLY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) ++# define PAGE_COPY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++# define PAGE_COPY_NOEXEC PAGE_COPY ++#endif ++ + #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) + #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) + #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) +diff -urNp linux-2.6.29/arch/ia64/kernel/module.c linux-2.6.29/arch/ia64/kernel/module.c +--- linux-2.6.29/arch/ia64/kernel/module.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/kernel/module.c 2009-03-28 14:26:18.000000000 -0400 +@@ -312,8 +312,7 @@ module_alloc (unsigned long size) + void + module_free (struct module *mod, void *module_region) + { +- if (mod && mod->arch.init_unw_table && +- module_region == mod->module_init) { ++ if (mod && mod->arch.init_unw_table && module_region == mod->module_init_rx) { + unw_remove_unwind_table(mod->arch.init_unw_table); + mod->arch.init_unw_table = NULL; + } +@@ -491,15 +490,39 @@ module_frob_arch_sections (Elf_Ehdr *ehd + } + + static inline int ++in_init_rx (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx; ++} ++ ++static inline int ++in_init_rw (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw; ++} ++ ++static inline int + in_init (const struct module *mod, uint64_t addr) + { +- return addr - (uint64_t) mod->module_init < mod->init_size; ++ return in_init_rx(mod, addr) || in_init_rw(mod, addr); ++} ++ ++static inline int ++in_core_rx (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx; ++} ++ ++static inline int ++in_core_rw (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw; + } + + static inline int + in_core (const struct module *mod, uint64_t addr) + { +- return addr - (uint64_t) mod->module_core < mod->core_size; ++ return in_core_rx(mod, addr) || in_core_rw(mod, addr); + } + + static inline int +@@ -683,7 +706,14 @@ do_reloc (struct module *mod, uint8_t r_ + break; + + case RV_BDREL: +- val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); ++ if (in_init_rx(mod, val)) ++ val -= (uint64_t) mod->module_init_rx; ++ else if (in_init_rw(mod, val)) ++ val -= (uint64_t) mod->module_init_rw; ++ else if (in_core_rx(mod, val)) ++ val -= (uint64_t) mod->module_core_rx; ++ else if (in_core_rw(mod, val)) ++ val -= (uint64_t) mod->module_core_rw; + break; + + case RV_LTV: +@@ -817,15 +847,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, + * addresses have been selected... + */ + uint64_t gp; +- if (mod->core_size > MAX_LTOFF) ++ if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF) + /* + * This takes advantage of fact that SHF_ARCH_SMALL gets allocated + * at the end of the module. + */ +- gp = mod->core_size - MAX_LTOFF / 2; ++ gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2; + else +- gp = mod->core_size / 2; +- gp = (uint64_t) mod->module_core + ((gp + 7) & -8); ++ gp = (mod->core_size_rx + mod->core_size_rw) / 2; ++ gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8); + mod->arch.gp = gp; + DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); + } +diff -urNp linux-2.6.29/arch/ia64/kernel/sys_ia64.c linux-2.6.29/arch/ia64/kernel/sys_ia64.c +--- linux-2.6.29/arch/ia64/kernel/sys_ia64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/kernel/sys_ia64.c 2009-03-28 14:26:18.000000000 -0400 +@@ -43,6 +43,13 @@ arch_get_unmapped_area (struct file *fil + if (REGION_NUMBER(addr) == RGN_HPAGE) + addr = 0; + #endif ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ addr = mm->free_area_cache; ++ else ++#endif ++ + if (!addr) + addr = mm->free_area_cache; + +@@ -61,9 +68,9 @@ arch_get_unmapped_area (struct file *fil + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { +- if (start_addr != TASK_UNMAPPED_BASE) { ++ if (start_addr != mm->mmap_base) { + /* Start a new search --- just in case we missed some holes. */ +- addr = TASK_UNMAPPED_BASE; ++ addr = mm->mmap_base; + goto full_search; + } + return -ENOMEM; +diff -urNp linux-2.6.29/arch/ia64/mm/fault.c linux-2.6.29/arch/ia64/mm/fault.c +--- linux-2.6.29/arch/ia64/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/mm/fault.c 2009-03-28 14:26:18.000000000 -0400 +@@ -72,6 +72,23 @@ mapped_kernel_page_is_present (unsigned + return pte_present(pte); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 8; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + void __kprobes + ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) + { +@@ -145,9 +162,23 @@ ia64_do_page_fault (unsigned long addres + mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); + +- if ((vma->vm_flags & mask) != mask) ++ if ((vma->vm_flags & mask) != mask) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { ++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) ++ goto bad_area; ++ ++ up_read(&mm->mmap_sem); ++ pax_report_fault(regs, (void *)regs->cr_iip, (void *)regs->r12); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + goto bad_area; + ++ } ++ + survive: + /* + * If for any reason at all we couldn't handle the fault, make +diff -urNp linux-2.6.29/arch/ia64/mm/init.c linux-2.6.29/arch/ia64/mm/init.c +--- linux-2.6.29/arch/ia64/mm/init.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/ia64/mm/init.c 2009-03-28 14:26:18.000000000 -0400 +@@ -121,6 +121,19 @@ ia64_init_addr_space (void) + vma->vm_start = current->thread.rbs_bot & PAGE_MASK; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (current->mm->pax_flags & MF_PAX_PAGEEXEC) { ++ vma->vm_flags &= ~VM_EXEC; ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (current->mm->pax_flags & MF_PAX_MPROTECT) ++ vma->vm_flags &= ~VM_MAYEXEC; ++#endif ++ ++ } ++#endif ++ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + down_write(¤t->mm->mmap_sem); + if (insert_vm_struct(current->mm, vma)) { +diff -urNp linux-2.6.29/arch/m68k/include/asm/kmap_types_mm.h linux-2.6.29/arch/m68k/include/asm/kmap_types_mm.h +--- linux-2.6.29/arch/m68k/include/asm/kmap_types_mm.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/m68k/include/asm/kmap_types_mm.h 2009-03-28 14:26:18.000000000 -0400 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/m68k/include/asm/kmap_types_no.h linux-2.6.29/arch/m68k/include/asm/kmap_types_no.h +--- linux-2.6.29/arch/m68k/include/asm/kmap_types_no.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/m68k/include/asm/kmap_types_no.h 2009-03-28 14:26:18.000000000 -0400 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/mips/include/asm/elf.h linux-2.6.29/arch/mips/include/asm/elf.h +--- linux-2.6.29/arch/mips/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -364,4 +364,11 @@ extern int dump_task_fpu(struct task_str + #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + #endif + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#endif ++ + #endif /* _ASM_ELF_H */ +diff -urNp linux-2.6.29/arch/mips/include/asm/kmap_types.h linux-2.6.29/arch/mips/include/asm/kmap_types.h +--- linux-2.6.29/arch/mips/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -22,7 +22,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/mips/include/asm/page.h linux-2.6.29/arch/mips/include/asm/page.h +--- linux-2.6.29/arch/mips/include/asm/page.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/include/asm/page.h 2009-03-28 14:26:18.000000000 -0400 +@@ -82,7 +82,7 @@ extern void copy_user_highpage(struct pa + #ifdef CONFIG_CPU_MIPS32 + typedef struct { unsigned long pte_low, pte_high; } pte_t; + #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +- #define __pte(x) ({ pte_t __pte = {(x), ((unsigned long long)(x)) >> 32}; __pte; }) ++ #define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; }) + #else + typedef struct { unsigned long long pte; } pte_t; + #define pte_val(x) ((x).pte) +diff -urNp linux-2.6.29/arch/mips/include/asm/system.h linux-2.6.29/arch/mips/include/asm/system.h +--- linux-2.6.29/arch/mips/include/asm/system.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/include/asm/system.h 2009-03-28 14:26:18.000000000 -0400 +@@ -217,6 +217,6 @@ extern void per_cpu_trap_init(void); + */ + #define __ARCH_WANT_UNLOCKED_CTXSW + +-extern unsigned long arch_align_stack(unsigned long sp); ++#define arch_align_stack(x) ((x) & ALMASK) + + #endif /* _ASM_SYSTEM_H */ +diff -urNp linux-2.6.29/arch/mips/kernel/binfmt_elfn32.c linux-2.6.29/arch/mips/kernel/binfmt_elfn32.c +--- linux-2.6.29/arch/mips/kernel/binfmt_elfn32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/kernel/binfmt_elfn32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -50,6 +50,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N + #undef ELF_ET_DYN_BASE + #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#endif ++ + #include + #include + #include +diff -urNp linux-2.6.29/arch/mips/kernel/binfmt_elfo32.c linux-2.6.29/arch/mips/kernel/binfmt_elfo32.c +--- linux-2.6.29/arch/mips/kernel/binfmt_elfo32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/kernel/binfmt_elfo32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -52,6 +52,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N + #undef ELF_ET_DYN_BASE + #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#endif ++ + #include + #include + #include +diff -urNp linux-2.6.29/arch/mips/kernel/process.c linux-2.6.29/arch/mips/kernel/process.c +--- linux-2.6.29/arch/mips/kernel/process.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/kernel/process.c 2009-03-28 14:26:18.000000000 -0400 +@@ -457,15 +457,3 @@ unsigned long get_wchan(struct task_stru + out: + return pc; + } +- +-/* +- * Don't forget that the stack pointer must be aligned on a 8 bytes +- * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. +- */ +-unsigned long arch_align_stack(unsigned long sp) +-{ +- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +- sp -= get_random_int() & ~PAGE_MASK; +- +- return sp & ALMASK; +-} +diff -urNp linux-2.6.29/arch/mips/kernel/syscall.c linux-2.6.29/arch/mips/kernel/syscall.c +--- linux-2.6.29/arch/mips/kernel/syscall.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/kernel/syscall.c 2009-03-28 14:26:18.000000000 -0400 +@@ -99,6 +99,11 @@ unsigned long arch_get_unmapped_area(str + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); +@@ -109,7 +114,7 @@ unsigned long arch_get_unmapped_area(str + (!vmm || addr + len <= vmm->vm_start)) + return addr; + } +- addr = TASK_UNMAPPED_BASE; ++ addr = current->mm->mmap_base; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else +diff -urNp linux-2.6.29/arch/mips/mm/fault.c linux-2.6.29/arch/mips/mm/fault.c +--- linux-2.6.29/arch/mips/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/mips/mm/fault.c 2009-03-28 14:26:18.000000000 -0400 +@@ -26,6 +26,23 @@ + #include + #include /* For VMALLOC_END */ + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_report_insns(void *pc) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + /* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate +diff -urNp linux-2.6.29/arch/parisc/include/asm/elf.h linux-2.6.29/arch/parisc/include/asm/elf.h +--- linux-2.6.29/arch/parisc/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -333,6 +333,13 @@ struct pt_regs; /* forward declaration.. + + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x10000UL ++ ++#define PAX_DELTA_MMAP_LEN 16 ++#define PAX_DELTA_STACK_LEN 16 ++#endif ++ + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ +diff -urNp linux-2.6.29/arch/parisc/include/asm/kmap_types.h linux-2.6.29/arch/parisc/include/asm/kmap_types.h +--- linux-2.6.29/arch/parisc/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -22,7 +22,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/parisc/include/asm/pgtable.h linux-2.6.29/arch/parisc/include/asm/pgtable.h +--- linux-2.6.29/arch/parisc/include/asm/pgtable.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/include/asm/pgtable.h 2009-03-28 14:26:18.000000000 -0400 +@@ -202,6 +202,17 @@ + #define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) + #define PAGE_COPY PAGE_EXECREAD + #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) ++# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) ++# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_COPY_NOEXEC PAGE_COPY ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++#endif ++ + #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) + #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) + #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) +diff -urNp linux-2.6.29/arch/parisc/kernel/module.c linux-2.6.29/arch/parisc/kernel/module.c +--- linux-2.6.29/arch/parisc/kernel/module.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/kernel/module.c 2009-03-28 14:26:18.000000000 -0400 +@@ -93,16 +93,38 @@ + + /* three functions to determine where in the module core + * or init pieces the location is */ ++static inline int in_init_rx(struct module *me, void *loc) ++{ ++ return (loc >= me->module_init_rx && ++ loc < (me->module_init_rx + me->init_size_rx)); ++} ++ ++static inline int in_init_rw(struct module *me, void *loc) ++{ ++ return (loc >= me->module_init_rw && ++ loc < (me->module_init_rw + me->init_size_rw)); ++} ++ + static inline int in_init(struct module *me, void *loc) + { +- return (loc >= me->module_init && +- loc <= (me->module_init + me->init_size)); ++ return in_init_rx(me, loc) || in_init_rw(me, loc); ++} ++ ++static inline int in_core_rx(struct module *me, void *loc) ++{ ++ return (loc >= me->module_core_rx && ++ loc < (me->module_core_rx + me->core_size_rx)); ++} ++ ++static inline int in_core_rw(struct module *me, void *loc) ++{ ++ return (loc >= me->module_core_rw && ++ loc < (me->module_core_rw + me->core_size_rw)); + } + + static inline int in_core(struct module *me, void *loc) + { +- return (loc >= me->module_core && +- loc <= (me->module_core + me->core_size)); ++ return in_core_rx(me, loc) || in_core_rw(me, loc); + } + + static inline int in_local(struct module *me, void *loc) +@@ -340,13 +362,13 @@ int module_frob_arch_sections(CONST Elf_ + } + + /* align things a bit */ +- me->core_size = ALIGN(me->core_size, 16); +- me->arch.got_offset = me->core_size; +- me->core_size += gots * sizeof(struct got_entry); +- +- me->core_size = ALIGN(me->core_size, 16); +- me->arch.fdesc_offset = me->core_size; +- me->core_size += fdescs * sizeof(Elf_Fdesc); ++ me->core_size_rw = ALIGN(me->core_size_rw, 16); ++ me->arch.got_offset = me->core_size_rw; ++ me->core_size_rw += gots * sizeof(struct got_entry); ++ ++ me->core_size_rw = ALIGN(me->core_size_rw, 16); ++ me->arch.fdesc_offset = me->core_size_rw; ++ me->core_size_rw += fdescs * sizeof(Elf_Fdesc); + + me->arch.got_max = gots; + me->arch.fdesc_max = fdescs; +@@ -364,7 +386,7 @@ static Elf64_Word get_got(struct module + + BUG_ON(value == 0); + +- got = me->module_core + me->arch.got_offset; ++ got = me->module_core_rw + me->arch.got_offset; + for (i = 0; got[i].addr; i++) + if (got[i].addr == value) + goto out; +@@ -382,7 +404,7 @@ static Elf64_Word get_got(struct module + #ifdef CONFIG_64BIT + static Elf_Addr get_fdesc(struct module *me, unsigned long value) + { +- Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; ++ Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset; + + if (!value) { + printk(KERN_ERR "%s: zero OPD requested!\n", me->name); +@@ -400,7 +422,7 @@ static Elf_Addr get_fdesc(struct module + + /* Create new one */ + fdesc->addr = value; +- fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; ++ fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; + return (Elf_Addr)fdesc; + } + #endif /* CONFIG_64BIT */ +@@ -816,7 +838,7 @@ register_unwind_table(struct module *me, + + table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; + end = table + sechdrs[me->arch.unwind_section].sh_size; +- gp = (Elf_Addr)me->module_core + me->arch.got_offset; ++ gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; + + DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", + me->arch.unwind_section, table, end, gp); +diff -urNp linux-2.6.29/arch/parisc/kernel/sys_parisc.c linux-2.6.29/arch/parisc/kernel/sys_parisc.c +--- linux-2.6.29/arch/parisc/kernel/sys_parisc.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/kernel/sys_parisc.c 2009-03-28 14:26:18.000000000 -0400 +@@ -98,7 +98,7 @@ unsigned long arch_get_unmapped_area(str + if (flags & MAP_FIXED) + return addr; + if (!addr) +- addr = TASK_UNMAPPED_BASE; ++ addr = current->mm->mmap_base; + + if (filp) { + addr = get_shared_area(filp->f_mapping, addr, len, pgoff); +diff -urNp linux-2.6.29/arch/parisc/kernel/traps.c linux-2.6.29/arch/parisc/kernel/traps.c +--- linux-2.6.29/arch/parisc/kernel/traps.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/kernel/traps.c 2009-03-28 14:26:18.000000000 -0400 +@@ -731,9 +731,7 @@ void handle_interruption(int code, struc + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm,regs->iaoq[0]); +- if (vma && (regs->iaoq[0] >= vma->vm_start) +- && (vma->vm_flags & VM_EXEC)) { +- ++ if (vma && (regs->iaoq[0] >= vma->vm_start)) { + fault_address = regs->iaoq[0]; + fault_space = regs->iasq[0]; + +diff -urNp linux-2.6.29/arch/parisc/mm/fault.c linux-2.6.29/arch/parisc/mm/fault.c +--- linux-2.6.29/arch/parisc/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/parisc/mm/fault.c 2009-03-28 14:26:18.000000000 -0400 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -53,7 +54,7 @@ DEFINE_PER_CPU(struct exception_data, ex + static unsigned long + parisc_acctyp(unsigned long code, unsigned int inst) + { +- if (code == 6 || code == 16) ++ if (code == 6 || code == 7 || code == 16) + return VM_EXEC; + + switch (inst & 0xf0000000) { +@@ -139,6 +140,116 @@ parisc_acctyp(unsigned long code, unsign + } + #endif + ++#ifdef CONFIG_PAX_PAGEEXEC ++/* ++ * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when rt_sigreturn trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: unpatched PLT emulation */ ++ unsigned int bl, depwi; ++ ++ err = get_user(bl, (unsigned int *)instruction_pointer(regs)); ++ err |= get_user(depwi, (unsigned int *)(instruction_pointer(regs)+4)); ++ ++ if (err) ++ break; ++ ++ if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { ++ unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; ++ ++ err = get_user(ldw, (unsigned int *)addr); ++ err |= get_user(bv, (unsigned int *)(addr+4)); ++ err |= get_user(ldw2, (unsigned int *)(addr+8)); ++ ++ if (err) ++ break; ++ ++ if (ldw == 0x0E801096U && ++ bv == 0xEAC0C000U && ++ ldw2 == 0x0E881095U) ++ { ++ unsigned int resolver, map; ++ ++ err = get_user(resolver, (unsigned int *)(instruction_pointer(regs)+8)); ++ err |= get_user(map, (unsigned int *)(instruction_pointer(regs)+12)); ++ if (err) ++ break; ++ ++ regs->gr[20] = instruction_pointer(regs)+8; ++ regs->gr[21] = map; ++ regs->gr[22] = resolver; ++ regs->iaoq[0] = resolver | 3UL; ++ regs->iaoq[1] = regs->iaoq[0] + 4; ++ return 3; ++ } ++ } ++ } while (0); ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ ++#ifndef CONFIG_PAX_EMUSIGRT ++ if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) ++ return 1; ++#endif ++ ++ do { /* PaX: rt_sigreturn emulation */ ++ unsigned int ldi1, ldi2, bel, nop; ++ ++ err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); ++ err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); ++ err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); ++ err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); ++ ++ if (err) ++ break; ++ ++ if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && ++ ldi2 == 0x3414015AU && ++ bel == 0xE4008200U && ++ nop == 0x08000240U) ++ { ++ regs->gr[25] = (ldi1 & 2) >> 1; ++ regs->gr[20] = __NR_rt_sigreturn; ++ regs->gr[31] = regs->iaoq[1] + 16; ++ regs->sr[0] = regs->iasq[1]; ++ regs->iaoq[0] = 0x100UL; ++ regs->iaoq[1] = regs->iaoq[0] + 4; ++ regs->iasq[0] = regs->sr[2]; ++ regs->iasq[1] = regs->sr[2]; ++ return 2; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + int fixup_exception(struct pt_regs *regs) + { + const struct exception_table_entry *fix; +@@ -193,8 +304,33 @@ good_area: + + acc_type = parisc_acctyp(code,regs->iir); + +- if ((vma->vm_flags & acc_type) != acc_type) ++ if ((vma->vm_flags & acc_type) != acc_type) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && ++ (address & ~3UL) == instruction_pointer(regs)) ++ { ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 3: ++ return; ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ case 2: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)instruction_pointer(regs), (void *)regs->gr[30]); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + goto bad_area; ++ } + + /* + * If for any reason at all we couldn't handle the fault, make +diff -urNp linux-2.6.29/arch/powerpc/include/asm/elf.h linux-2.6.29/arch/powerpc/include/asm/elf.h +--- linux-2.6.29/arch/powerpc/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -180,6 +180,18 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[E + + #define ELF_ET_DYN_BASE (0x20000000) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (0x10000000UL) ++ ++#ifdef __powerpc64__ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 16 : 28) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 16 : 28) ++#else ++#define PAX_DELTA_MMAP_LEN 15 ++#define PAX_DELTA_STACK_LEN 15 ++#endif ++#endif ++ + /* + * Our registers are always unsigned longs, whether we're a 32 bit + * process or 64 bit, on either a 64 bit or 32 bit kernel. +diff -urNp linux-2.6.29/arch/powerpc/include/asm/kmap_types.h linux-2.6.29/arch/powerpc/include/asm/kmap_types.h +--- linux-2.6.29/arch/powerpc/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -26,6 +26,7 @@ enum km_type { + KM_SOFTIRQ1, + KM_PPC_SYNC_PAGE, + KM_PPC_SYNC_ICACHE, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/powerpc/include/asm/page_64.h linux-2.6.29/arch/powerpc/include/asm/page_64.h +--- linux-2.6.29/arch/powerpc/include/asm/page_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/include/asm/page_64.h 2009-03-28 14:26:18.000000000 -0400 +@@ -170,15 +170,18 @@ do { \ + * stack by default, so in the absense of a PT_GNU_STACK program header + * we turn execute permission off. + */ +-#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ +- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) ++#define VM_STACK_DEFAULT_FLAGS32 \ ++ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ ++ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + #define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + ++#ifndef CONFIG_PAX_PAGEEXEC + #define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) ++#endif + + #include + +diff -urNp linux-2.6.29/arch/powerpc/include/asm/page.h linux-2.6.29/arch/powerpc/include/asm/page.h +--- linux-2.6.29/arch/powerpc/include/asm/page.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/include/asm/page.h 2009-03-28 14:26:18.000000000 -0400 +@@ -114,8 +114,9 @@ extern phys_addr_t kernstart_addr; + * and needs to be executable. This means the whole heap ends + * up being executable. + */ +-#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ +- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) ++#define VM_DATA_DEFAULT_FLAGS32 \ ++ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ ++ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +diff -urNp linux-2.6.29/arch/powerpc/kernel/module_32.c linux-2.6.29/arch/powerpc/kernel/module_32.c +--- linux-2.6.29/arch/powerpc/kernel/module_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/kernel/module_32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -162,7 +162,7 @@ int module_frob_arch_sections(Elf32_Ehdr + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { +- printk("Module doesn't contain .plt or .init.plt sections.\n"); ++ printk("Module %s doesn't contain .plt or .init.plt sections.\n", me->name); + return -ENOEXEC; + } + +@@ -203,11 +203,16 @@ static uint32_t do_plt_call(void *locati + + DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + /* Init, or core PLT? */ +- if (location >= mod->module_core +- && location < mod->module_core + mod->core_size) ++ if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) || ++ (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw)) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; +- else ++ else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) || ++ (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw)) + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; ++ else { ++ printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name); ++ return ~0UL; ++ } + + /* Find this entry, or if that fails, the next avail. entry */ + while (entry->jump[0]) { +diff -urNp linux-2.6.29/arch/powerpc/kernel/signal_32.c linux-2.6.29/arch/powerpc/kernel/signal_32.c +--- linux-2.6.29/arch/powerpc/kernel/signal_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/kernel/signal_32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -857,7 +857,7 @@ int handle_rt_signal32(unsigned long sig + /* Save user registers on the stack */ + frame = &rt_sf->uc.uc_mcontext; + addr = frame; +- if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { ++ if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { + if (save_user_regs(regs, frame, 0, 1)) + goto badframe; + regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; +diff -urNp linux-2.6.29/arch/powerpc/kernel/signal_64.c linux-2.6.29/arch/powerpc/kernel/signal_64.c +--- linux-2.6.29/arch/powerpc/kernel/signal_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/kernel/signal_64.c 2009-03-28 14:26:18.000000000 -0400 +@@ -429,7 +429,7 @@ int handle_rt_signal64(int signr, struct + current->thread.fpscr.val = 0; + + /* Set up to return from userspace. */ +- if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { ++ if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { + regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); +diff -urNp linux-2.6.29/arch/powerpc/kernel/vdso.c linux-2.6.29/arch/powerpc/kernel/vdso.c +--- linux-2.6.29/arch/powerpc/kernel/vdso.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/kernel/vdso.c 2009-03-28 14:26:18.000000000 -0400 +@@ -211,7 +211,7 @@ int arch_setup_additional_pages(struct l + vdso_base = VDSO32_MBASE; + #endif + +- current->mm->context.vdso_base = 0; ++ current->mm->context.vdso_base = ~0UL; + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process +@@ -228,7 +228,7 @@ int arch_setup_additional_pages(struct l + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, vdso_base, +- vdso_pages << PAGE_SHIFT, 0, 0); ++ vdso_pages << PAGE_SHIFT, 0, MAP_PRIVATE | MAP_EXECUTABLE); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto fail_mmapsem; +diff -urNp linux-2.6.29/arch/powerpc/mm/fault.c linux-2.6.29/arch/powerpc/mm/fault.c +--- linux-2.6.29/arch/powerpc/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/mm/fault.c 2009-03-28 14:26:18.000000000 -0400 +@@ -29,6 +29,10 @@ + #include + #include + #include ++#include ++#include ++#include ++#include + + #include + #include +@@ -63,6 +67,363 @@ static inline int notify_page_fault(stru + } + #endif + ++#ifdef CONFIG_PAX_EMUSIGRT ++void pax_syscall_close(struct vm_area_struct *vma) ++{ ++ vma->vm_mm->call_syscall = 0UL; ++} ++ ++static int pax_syscall_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ unsigned int *kaddr; ++ ++ vmf->page = alloc_page(GFP_HIGHUSER); ++ if (!vmf->page) ++ return VM_FAULT_OOM; ++ ++ kaddr = kmap(vmf->page); ++ memset(kaddr, 0, PAGE_SIZE); ++ kaddr[0] = 0x44000002U; /* sc */ ++ __flush_dcache_icache(kaddr); ++ kunmap(vmf->page); ++ return VM_FAULT_MAJOR; ++} ++ ++static struct vm_operations_struct pax_vm_ops = { ++ .close = pax_syscall_close, ++ .fault = pax_syscall_fault ++}; ++ ++static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) ++{ ++ int ret; ++ ++ vma->vm_mm = current->mm; ++ vma->vm_start = addr; ++ vma->vm_end = addr + PAGE_SIZE; ++ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ vma->vm_ops = &pax_vm_ops; ++ ++ ret = insert_vm_struct(current->mm, vma); ++ if (ret) ++ return ret; ++ ++ ++current->mm->total_vm; ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++/* ++ * PaX: decide what to do with offenders (regs->nip = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched GOT trampoline was detected ++ * 3 when patched PLT trampoline was detected ++ * 4 when unpatched PLT trampoline was detected ++ * 5 when sigreturn trampoline was detected ++ * 6 when rt_sigreturn trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) ++ int err; ++#endif ++ ++#ifdef CONFIG_PAX_EMUPLT ++ do { /* PaX: patched GOT emulation */ ++ unsigned int blrl; ++ ++ err = get_user(blrl, (unsigned int *)regs->nip); ++ ++ if (!err && blrl == 0x4E800021U) { ++ unsigned long temp = regs->nip; ++ ++ regs->nip = regs->link & 0xFFFFFFFCUL; ++ regs->link = temp + 4UL; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int b; ++ ++ err = get_user(b, (unsigned int *)regs->nip); ++ ++ if (!err && (b & 0xFC000003U) == 0x48000000U) { ++ regs->nip += (((b | 0xFC000000UL) ^ 0x02000000UL) + 0x02000000UL); ++ return 3; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation #1 */ ++ unsigned int li, b; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(b, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { ++ unsigned int rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; ++ unsigned long addr = b | 0xFC000000UL; ++ ++ addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); ++ err = get_user(rlwinm, (unsigned int *)addr); ++ err |= get_user(add, (unsigned int *)(addr+4)); ++ err |= get_user(li2, (unsigned int *)(addr+8)); ++ err |= get_user(addis2, (unsigned int *)(addr+12)); ++ err |= get_user(mtctr, (unsigned int *)(addr+16)); ++ err |= get_user(li3, (unsigned int *)(addr+20)); ++ err |= get_user(addis3, (unsigned int *)(addr+24)); ++ err |= get_user(bctr, (unsigned int *)(addr+28)); ++ ++ if (err) ++ break; ++ ++ if (rlwinm == 0x556C083CU && ++ add == 0x7D6C5A14U && ++ (li2 & 0xFFFF0000U) == 0x39800000U && ++ (addis2 & 0xFFFF0000U) == 0x3D8C0000U && ++ mtctr == 0x7D8903A6U && ++ (li3 & 0xFFFF0000U) == 0x39800000U && ++ (addis3 & 0xFFFF0000U) == 0x3D8C0000U && ++ bctr == 0x4E800420U) ++ { ++ regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; ++ regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->ctr += (addis2 & 0xFFFFU) << 16; ++ regs->nip = regs->ctr; ++ return 4; ++ } ++ } ++ } while (0); ++ ++#if 0 ++ do { /* PaX: unpatched PLT emulation #2 */ ++ unsigned int lis, lwzu, b, bctr; ++ ++ err = get_user(lis, (unsigned int *)regs->nip); ++ err |= get_user(lwzu, (unsigned int *)(regs->nip+4)); ++ err |= get_user(b, (unsigned int *)(regs->nip+8)); ++ err |= get_user(bctr, (unsigned int *)(regs->nip+12)); ++ ++ if (err) ++ break; ++ ++ if ((lis & 0xFFFF0000U) == 0x39600000U && ++ (lwzu & 0xU) == 0xU && ++ (b & 0xFC000003U) == 0x48000000U && ++ bctr == 0x4E800420U) ++ { ++ unsigned int addis, addi, rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; ++ unsigned long addr = b | 0xFC000000UL; ++ ++ addr = regs->nip + 12 + ((addr ^ 0x02000000UL) + 0x02000000UL); ++ err = get_user(addis, (unsigned int *)addr); ++ err |= get_user(addi, (unsigned int *)(addr+4)); ++ err |= get_user(rlwinm, (unsigned int *)(addr+8)); ++ err |= get_user(add, (unsigned int *)(addr+12)); ++ err |= get_user(li2, (unsigned int *)(addr+16)); ++ err |= get_user(addis2, (unsigned int *)(addr+20)); ++ err |= get_user(mtctr, (unsigned int *)(addr+24)); ++ err |= get_user(li3, (unsigned int *)(addr+28)); ++ err |= get_user(addis3, (unsigned int *)(addr+32)); ++ err |= get_user(bctr, (unsigned int *)(addr+36)); ++ ++ if (err) ++ break; ++ ++ if ((addis & 0xFFFF0000U) == 0x3D6B0000U && ++ (addi & 0xFFFF0000U) == 0x396B0000U && ++ rlwinm == 0x556C083CU && ++ add == 0x7D6C5A14U && ++ (li2 & 0xFFFF0000U) == 0x39800000U && ++ (addis2 & 0xFFFF0000U) == 0x3D8C0000U && ++ mtctr == 0x7D8903A6U && ++ (li3 & 0xFFFF0000U) == 0x39800000U && ++ (addis3 & 0xFFFF0000U) == 0x3D8C0000U && ++ bctr == 0x4E800420U) ++ { ++ regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; ++ regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->ctr += (addis2 & 0xFFFFU) << 16; ++ regs->nip = regs->ctr; ++ return 4; ++ } ++ } ++ } while (0); ++#endif ++ ++ do { /* PaX: unpatched PLT emulation #3 */ ++ unsigned int li, b; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(b, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { ++ unsigned int addis, lwz, mtctr, bctr; ++ unsigned long addr = b | 0xFC000000UL; ++ ++ addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); ++ err = get_user(addis, (unsigned int *)addr); ++ err |= get_user(lwz, (unsigned int *)(addr+4)); ++ err |= get_user(mtctr, (unsigned int *)(addr+8)); ++ err |= get_user(bctr, (unsigned int *)(addr+12)); ++ ++ if (err) ++ break; ++ ++ if ((addis & 0xFFFF0000U) == 0x3D6B0000U && ++ (lwz & 0xFFFF0000U) == 0x816B0000U && ++ mtctr == 0x7D6903A6U && ++ bctr == 0x4E800420U) ++ { ++ unsigned int r11; ++ ++ addr = (addis << 16) + (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ addr += (((lwz | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ ++ err = get_user(r11, (unsigned int *)addr); ++ if (err) ++ break; ++ ++ regs->gpr[PT_R11] = r11; ++ regs->ctr = r11; ++ regs->nip = r11; ++ return 4; ++ } ++ } ++ } while (0); ++#endif ++ ++#ifdef CONFIG_PAX_EMUSIGRT ++ do { /* PaX: sigreturn emulation */ ++ unsigned int li, sc; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(sc, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && li == 0x38000000U + __NR_sigreturn && sc == 0x44000002U) { ++ struct vm_area_struct *vma; ++ unsigned long call_syscall; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_syscall = current->mm->call_syscall; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_syscall)) ++ goto emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_syscall) { ++ call_syscall = current->mm->call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto emulate; ++ } ++ ++ call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_syscall & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_syscall)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_syscall = call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ ++emulate: ++ regs->gpr[PT_R0] = __NR_sigreturn; ++ regs->nip = call_syscall; ++ return 5; ++ } ++ } while (0); ++ ++ do { /* PaX: rt_sigreturn emulation */ ++ unsigned int li, sc; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(sc, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && li == 0x38000000U + __NR_rt_sigreturn && sc == 0x44000002U) { ++ struct vm_area_struct *vma; ++ unsigned int call_syscall; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_syscall = current->mm->call_syscall; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_syscall)) ++ goto rt_emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_syscall) { ++ call_syscall = current->mm->call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto rt_emulate; ++ } ++ ++ call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_syscall & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_syscall)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_syscall = call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ ++rt_emulate: ++ regs->gpr[PT_R0] = __NR_rt_sigreturn; ++ regs->nip = call_syscall; ++ return 6; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + /* + * Check whether the instruction at regs->nip is a store using + * an update addressing form which will update r1. +@@ -133,7 +494,7 @@ int __kprobes do_page_fault(struct pt_re + * indicate errors in DSISR but can validly be set in SRR1. + */ + if (trap == 0x400) +- error_code &= 0x48200000; ++ error_code &= 0x58200000; + else + is_write = error_code & DSISR_ISSTORE; + #else +@@ -339,6 +700,37 @@ bad_area: + bad_area_nosemaphore: + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (mm->pax_flags & MF_PAX_PAGEEXEC) { ++#ifdef CONFIG_PPC64 ++ if (is_exec && (error_code & DSISR_PROTFAULT)) { ++#else ++ if (is_exec && regs->nip == address) { ++#endif ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ case 4: ++ return 0; ++#endif ++ ++#ifdef CONFIG_PAX_EMUSIGRT ++ case 5: ++ case 6: ++ return 0; ++#endif ++ ++ } ++ ++ pax_report_fault(regs, (void *)regs->nip, (void *)regs->gpr[PT_R1]); ++ do_group_exit(SIGKILL); ++ } ++ } ++#endif ++ + _exception(SIGSEGV, regs, code, address); + return 0; + } +diff -urNp linux-2.6.29/arch/powerpc/mm/mmap.c linux-2.6.29/arch/powerpc/mm/mmap.c +--- linux-2.6.29/arch/powerpc/mm/mmap.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/powerpc/mm/mmap.c 2009-03-28 14:26:18.000000000 -0400 +@@ -75,10 +75,22 @@ void arch_pick_mmap_layout(struct mm_str + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +diff -urNp linux-2.6.29/arch/s390/include/asm/kmap_types.h linux-2.6.29/arch/s390/include/asm/kmap_types.h +--- linux-2.6.29/arch/s390/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/s390/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -16,6 +16,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/s390/kernel/module.c linux-2.6.29/arch/s390/kernel/module.c +--- linux-2.6.29/arch/s390/kernel/module.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/s390/kernel/module.c 2009-03-28 14:26:18.000000000 -0400 +@@ -166,11 +166,11 @@ module_frob_arch_sections(Elf_Ehdr *hdr, + + /* Increase core size by size of got & plt and set start + offsets for got and plt. */ +- me->core_size = ALIGN(me->core_size, 4); +- me->arch.got_offset = me->core_size; +- me->core_size += me->arch.got_size; +- me->arch.plt_offset = me->core_size; +- me->core_size += me->arch.plt_size; ++ me->core_size_rw = ALIGN(me->core_size_rw, 4); ++ me->arch.got_offset = me->core_size_rw; ++ me->core_size_rw += me->arch.got_size; ++ me->arch.plt_offset = me->core_size_rx; ++ me->core_size_rx += me->arch.plt_size; + return 0; + } + +@@ -256,7 +256,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + if (info->got_initialized == 0) { + Elf_Addr *gotent; + +- gotent = me->module_core + me->arch.got_offset + ++ gotent = me->module_core_rw + me->arch.got_offset + + info->got_offset; + *gotent = val; + info->got_initialized = 1; +@@ -280,7 +280,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + else if (r_type == R_390_GOTENT || + r_type == R_390_GOTPLTENT) + *(unsigned int *) loc = +- (val + (Elf_Addr) me->module_core - loc) >> 1; ++ (val + (Elf_Addr) me->module_core_rw - loc) >> 1; + else if (r_type == R_390_GOT64 || + r_type == R_390_GOTPLT64) + *(unsigned long *) loc = val; +@@ -294,7 +294,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ + if (info->plt_initialized == 0) { + unsigned int *ip; +- ip = me->module_core + me->arch.plt_offset + ++ ip = me->module_core_rx + me->arch.plt_offset + + info->plt_offset; + #ifndef CONFIG_64BIT + ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ +@@ -316,7 +316,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + val = me->arch.plt_offset - me->arch.got_offset + + info->plt_offset + rela->r_addend; + else +- val = (Elf_Addr) me->module_core + ++ val = (Elf_Addr) me->module_core_rx + + me->arch.plt_offset + info->plt_offset + + rela->r_addend - loc; + if (r_type == R_390_PLT16DBL) +@@ -336,7 +336,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + case R_390_GOTOFF32: /* 32 bit offset to GOT. */ + case R_390_GOTOFF64: /* 64 bit offset to GOT. */ + val = val + rela->r_addend - +- ((Elf_Addr) me->module_core + me->arch.got_offset); ++ ((Elf_Addr) me->module_core_rw + me->arch.got_offset); + if (r_type == R_390_GOTOFF16) + *(unsigned short *) loc = val; + else if (r_type == R_390_GOTOFF32) +@@ -346,7 +346,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + break; + case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ + case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ +- val = (Elf_Addr) me->module_core + me->arch.got_offset + ++ val = (Elf_Addr) me->module_core_rw + me->arch.got_offset + + rela->r_addend - loc; + if (r_type == R_390_GOTPC) + *(unsigned int *) loc = val; +diff -urNp linux-2.6.29/arch/sh/include/asm/kmap_types.h linux-2.6.29/arch/sh/include/asm/kmap_types.h +--- linux-2.6.29/arch/sh/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sh/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -24,7 +24,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/sparc/include/asm/elf_32.h linux-2.6.29/arch/sparc/include/asm/elf_32.h +--- linux-2.6.29/arch/sparc/include/asm/elf_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/include/asm/elf_32.h 2009-03-28 14:26:18.000000000 -0400 +@@ -116,6 +116,13 @@ typedef struct { + + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x10000UL ++ ++#define PAX_DELTA_MMAP_LEN 16 ++#define PAX_DELTA_STACK_LEN 16 ++#endif ++ + /* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. This can NOT be done in userspace + on Sparc. */ +diff -urNp linux-2.6.29/arch/sparc/include/asm/elf_64.h linux-2.6.29/arch/sparc/include/asm/elf_64.h +--- linux-2.6.29/arch/sparc/include/asm/elf_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/include/asm/elf_64.h 2009-03-28 14:26:18.000000000 -0400 +@@ -163,6 +163,12 @@ typedef struct { + #define ELF_ET_DYN_BASE 0x0000010000000000UL + #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT) ? 0x10000UL : 0x100000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 14 : 28 ) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 15 : 29 ) ++#endif + + /* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. */ +diff -urNp linux-2.6.29/arch/sparc/include/asm/kmap_types.h linux-2.6.29/arch/sparc/include/asm/kmap_types.h +--- linux-2.6.29/arch/sparc/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -19,6 +19,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/sparc/include/asm/pgtable_32.h linux-2.6.29/arch/sparc/include/asm/pgtable_32.h +--- linux-2.6.29/arch/sparc/include/asm/pgtable_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/include/asm/pgtable_32.h 2009-03-28 14:26:18.000000000 -0400 +@@ -43,6 +43,13 @@ BTFIXUPDEF_SIMM13(user_ptrs_per_pgd) + BTFIXUPDEF_INT(page_none) + BTFIXUPDEF_INT(page_copy) + BTFIXUPDEF_INT(page_readonly) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++BTFIXUPDEF_INT(page_shared_noexec) ++BTFIXUPDEF_INT(page_copy_noexec) ++BTFIXUPDEF_INT(page_readonly_noexec) ++#endif ++ + BTFIXUPDEF_INT(page_kernel) + + #define PMD_SHIFT SUN4C_PMD_SHIFT +@@ -64,6 +71,16 @@ extern pgprot_t PAGE_SHARED; + #define PAGE_COPY __pgprot(BTFIXUP_INT(page_copy)) + #define PAGE_READONLY __pgprot(BTFIXUP_INT(page_readonly)) + ++#ifdef CONFIG_PAX_PAGEEXEC ++extern pgprot_t PAGE_SHARED_NOEXEC; ++# define PAGE_COPY_NOEXEC __pgprot(BTFIXUP_INT(page_copy_noexec)) ++# define PAGE_READONLY_NOEXEC __pgprot(BTFIXUP_INT(page_readonly_noexec)) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_COPY_NOEXEC PAGE_COPY ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++#endif ++ + extern unsigned long page_kernel; + + #ifdef MODULE +diff -urNp linux-2.6.29/arch/sparc/include/asm/pgtsrmmu.h linux-2.6.29/arch/sparc/include/asm/pgtsrmmu.h +--- linux-2.6.29/arch/sparc/include/asm/pgtsrmmu.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/include/asm/pgtsrmmu.h 2009-03-28 14:26:18.000000000 -0400 +@@ -115,6 +115,13 @@ + SRMMU_EXEC | SRMMU_REF) + #define SRMMU_PAGE_RDONLY __pgprot(SRMMU_VALID | SRMMU_CACHE | \ + SRMMU_EXEC | SRMMU_REF) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++#define SRMMU_PAGE_SHARED_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_WRITE | SRMMU_REF) ++#define SRMMU_PAGE_COPY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) ++#define SRMMU_PAGE_RDONLY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) ++#endif ++ + #define SRMMU_PAGE_KERNEL __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \ + SRMMU_DIRTY | SRMMU_REF) + +diff -urNp linux-2.6.29/arch/sparc/kernel/Makefile linux-2.6.29/arch/sparc/kernel/Makefile +--- linux-2.6.29/arch/sparc/kernel/Makefile 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/kernel/Makefile 2009-03-28 14:26:18.000000000 -0400 +@@ -3,7 +3,7 @@ + # + + asflags-y := -ansi +-ccflags-y := -Werror ++#ccflags-y := -Werror + + extra-y := head_$(BITS).o + extra-y += init_task.o +diff -urNp linux-2.6.29/arch/sparc/kernel/sys_sparc_32.c linux-2.6.29/arch/sparc/kernel/sys_sparc_32.c +--- linux-2.6.29/arch/sparc/kernel/sys_sparc_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/kernel/sys_sparc_32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -56,7 +56,7 @@ unsigned long arch_get_unmapped_area(str + if (ARCH_SUN4C && len > 0x20000000) + return -ENOMEM; + if (!addr) +- addr = TASK_UNMAPPED_BASE; ++ addr = current->mm->mmap_base; + + if (flags & MAP_SHARED) + addr = COLOUR_ALIGN(addr); +diff -urNp linux-2.6.29/arch/sparc/kernel/sys_sparc_64.c linux-2.6.29/arch/sparc/kernel/sys_sparc_64.c +--- linux-2.6.29/arch/sparc/kernel/sys_sparc_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/kernel/sys_sparc_64.c 2009-03-28 14:26:18.000000000 -0400 +@@ -125,7 +125,7 @@ unsigned long arch_get_unmapped_area(str + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ +- if ((flags & MAP_SHARED) && ++ if ((filp || (flags & MAP_SHARED)) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; +@@ -140,6 +140,10 @@ unsigned long arch_get_unmapped_area(str + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); +@@ -153,9 +157,9 @@ unsigned long arch_get_unmapped_area(str + } + + if (len > mm->cached_hole_size) { +- start_addr = addr = mm->free_area_cache; ++ start_addr = addr = mm->free_area_cache; + } else { +- start_addr = addr = TASK_UNMAPPED_BASE; +++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +@@ -175,8 +179,8 @@ full_search: + vma = find_vma(mm, VA_EXCLUDE_END); + } + if (unlikely(task_size < addr)) { +- if (start_addr != TASK_UNMAPPED_BASE) { +- start_addr = addr = TASK_UNMAPPED_BASE; ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +@@ -216,7 +220,7 @@ arch_get_unmapped_area_topdown(struct fi + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ +- if ((flags & MAP_SHARED) && ++ if ((filp || (flags & MAP_SHARED)) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; +@@ -380,6 +384,12 @@ void arch_pick_mmap_layout(struct mm_str + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || + sysctl_legacy_va_layout) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { +@@ -394,6 +404,12 @@ void arch_pick_mmap_layout(struct mm_str + gap = (task_size / 6 * 5); + + mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +diff -urNp linux-2.6.29/arch/sparc/Makefile linux-2.6.29/arch/sparc/Makefile +--- linux-2.6.29/arch/sparc/Makefile 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/Makefile 2009-03-28 14:26:18.000000000 -0400 +@@ -81,7 +81,7 @@ drivers-$(CONFIG_OPROFILE) += arch/sparc + # Export what is needed by arch/sparc/boot/Makefile + export VMLINUX_INIT VMLINUX_MAIN + VMLINUX_INIT := $(head-y) $(init-y) +-VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ++VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ + VMLINUX_MAIN += $(patsubst %/, %/lib.a, $(libs-y)) $(libs-y) + VMLINUX_MAIN += $(drivers-y) $(net-y) + +diff -urNp linux-2.6.29/arch/sparc/mm/fault_32.c linux-2.6.29/arch/sparc/mm/fault_32.c +--- linux-2.6.29/arch/sparc/mm/fault_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/mm/fault_32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -21,6 +21,9 @@ + #include + #include + #include ++#include ++#include ++#include + + #include + #include +@@ -167,6 +170,249 @@ static unsigned long compute_si_addr(str + return safe_compute_effective_address(regs, insn); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_emuplt_close(struct vm_area_struct *vma) ++{ ++ vma->vm_mm->call_dl_resolve = 0UL; ++} ++ ++static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ unsigned int *kaddr; ++ ++ vmf->page = alloc_page(GFP_HIGHUSER); ++ if (!vmf->page) ++ return VM_FAULT_OOM; ++ ++ kaddr = kmap(vmf->page); ++ memset(kaddr, 0, PAGE_SIZE); ++ kaddr[0] = 0x9DE3BFA8U; /* save */ ++ flush_dcache_page(vmf->page); ++ kunmap(vmf->page); ++ return VM_FAULT_MAJOR; ++} ++ ++static struct vm_operations_struct pax_vm_ops = { ++ .close = pax_emuplt_close, ++ .fault = pax_emuplt_fault ++}; ++ ++static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) ++{ ++ int ret; ++ ++ vma->vm_mm = current->mm; ++ vma->vm_start = addr; ++ vma->vm_end = addr + PAGE_SIZE; ++ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ vma->vm_ops = &pax_vm_ops; ++ ++ ret = insert_vm_struct(current->mm, vma); ++ if (ret) ++ return ret; ++ ++ ++current->mm->total_vm; ++ return 0; ++} ++ ++/* ++ * PaX: decide what to do with offenders (regs->pc = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched PLT trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int sethi1, sethi2, jmpl; ++ ++ err = get_user(sethi1, (unsigned int *)regs->pc); ++ err |= get_user(sethi2, (unsigned int *)(regs->pc+4)); ++ err |= get_user(jmpl, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U) ++ { ++ unsigned int addr; ++ ++ regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; ++ addr = regs->u_regs[UREG_G1]; ++ addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); ++ regs->pc = addr; ++ regs->npc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ { /* PaX: patched PLT emulation #2 */ ++ unsigned int ba; ++ ++ err = get_user(ba, (unsigned int *)regs->pc); ++ ++ if (!err && (ba & 0xFFC00000U) == 0x30800000U) { ++ unsigned int addr; ++ ++ addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); ++ regs->pc = addr; ++ regs->npc = addr+4; ++ return 2; ++ } ++ } ++ ++ do { /* PaX: patched PLT emulation #3 */ ++ unsigned int sethi, jmpl, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->pc); ++ err |= get_user(jmpl, (unsigned int *)(regs->pc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U && ++ nop == 0x01000000U) ++ { ++ unsigned int addr; ++ ++ addr = (sethi & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] = addr; ++ addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); ++ regs->pc = addr; ++ regs->npc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 1 */ ++ unsigned int sethi, ba, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->pc); ++ err |= get_user(ba, (unsigned int *)(regs->pc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && ++ nop == 0x01000000U) ++ { ++ unsigned int addr, save, call; ++ ++ if ((ba & 0xFFC00000U) == 0x30800000U) ++ addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); ++ else ++ addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); ++ ++ err = get_user(save, (unsigned int *)addr); ++ err |= get_user(call, (unsigned int *)(addr+4)); ++ err |= get_user(nop, (unsigned int *)(addr+8)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ struct vm_area_struct *vma; ++ unsigned long call_dl_resolve; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_dl_resolve)) ++ goto emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_dl_resolve) { ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto emulate; ++ } ++ ++ call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_dl_resolve & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_dl_resolve)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_dl_resolve = call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ ++emulate: ++ regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; ++ regs->pc = call_dl_resolve; ++ regs->npc = addr+4; ++ return 3; ++ } ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 2 */ ++ unsigned int save, call, nop; ++ ++ err = get_user(save, (unsigned int *)(regs->pc-4)); ++ err |= get_user(call, (unsigned int *)regs->pc); ++ err |= get_user(nop, (unsigned int *)(regs->pc+4)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2); ++ ++ regs->u_regs[UREG_RETPC] = regs->pc; ++ regs->pc = dl_resolve; ++ regs->npc = dl_resolve+4; ++ return 3; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, + unsigned long address) + { +@@ -231,6 +477,24 @@ good_area: + if(!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) { ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)regs->pc, (void *)regs->u_regs[UREG_FP]); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + /* Allow reads even for write-only mappings */ + if(!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; +diff -urNp linux-2.6.29/arch/sparc/mm/fault_64.c linux-2.6.29/arch/sparc/mm/fault_64.c +--- linux-2.6.29/arch/sparc/mm/fault_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/mm/fault_64.c 2009-03-28 14:26:18.000000000 -0400 +@@ -20,6 +20,9 @@ + #include + #include + #include ++#include ++#include ++#include + + #include + #include +@@ -249,6 +252,367 @@ static void noinline bogus_32bit_fault_a + show_regs(regs); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++#ifdef CONFIG_PAX_EMUPLT ++static void pax_emuplt_close(struct vm_area_struct *vma) ++{ ++ vma->vm_mm->call_dl_resolve = 0UL; ++} ++ ++static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ unsigned int *kaddr; ++ ++ vmf->page = alloc_page(GFP_HIGHUSER); ++ if (!vmf->page) ++ return VM_FAULT_OOM; ++ ++ kaddr = kmap(vmf->page); ++ memset(kaddr, 0, PAGE_SIZE); ++ kaddr[0] = 0x9DE3BFA8U; /* save */ ++ flush_dcache_page(vmf->page); ++ kunmap(vmf->page); ++ return VM_FAULT_MAJOR; ++} ++ ++static struct vm_operations_struct pax_vm_ops = { ++ .close = pax_emuplt_close, ++ .fault = pax_emuplt_fault ++}; ++ ++static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) ++{ ++ int ret; ++ ++ vma->vm_mm = current->mm; ++ vma->vm_start = addr; ++ vma->vm_end = addr + PAGE_SIZE; ++ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ vma->vm_ops = &pax_vm_ops; ++ ++ ret = insert_vm_struct(current->mm, vma); ++ if (ret) ++ return ret; ++ ++ ++current->mm->total_vm; ++ return 0; ++} ++#endif ++ ++/* ++ * PaX: decide what to do with offenders (regs->tpc = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched PLT trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int sethi1, sethi2, jmpl; ++ ++ err = get_user(sethi1, (unsigned int *)regs->tpc); ++ err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; ++ addr = regs->u_regs[UREG_G1]; ++ addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ { /* PaX: patched PLT emulation #2 */ ++ unsigned int ba; ++ ++ err = get_user(ba, (unsigned int *)regs->tpc); ++ ++ if (!err && (ba & 0xFFC00000U) == 0x30800000U) { ++ unsigned long addr; ++ ++ addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } ++ ++ do { /* PaX: patched PLT emulation #3 */ ++ unsigned int sethi, jmpl, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->tpc); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ addr = (sethi & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] = addr; ++ addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #4 */ ++ unsigned int mov1, call, mov2; ++ ++ err = get_user(mov1, (unsigned int *)regs->tpc); ++ err |= get_user(call, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(mov2, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0x8210000FU && ++ (call & 0xC0000000U) == 0x40000000U && ++ mov2 == 0x9E100001U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC]; ++ addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #5 */ ++ unsigned int sethi1, sethi2, or1, or2, sllx, jmpl, nop; ++ ++ err = get_user(sethi1, (unsigned int *)regs->tpc); ++ err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(or1, (unsigned int *)(regs->tpc+8)); ++ err |= get_user(or2, (unsigned int *)(regs->tpc+12)); ++ err |= get_user(sllx, (unsigned int *)(regs->tpc+16)); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+20)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+24)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x0B000000U && ++ (or1 & 0xFFFFE000U) == 0x82106000U && ++ (or2 & 0xFFFFE000U) == 0x8A116000U && ++ sllx == 0x83287020 && ++ jmpl == 0x81C04005U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); ++ regs->u_regs[UREG_G1] <<= 32; ++ regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); ++ addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #6 */ ++ unsigned int sethi1, sethi2, sllx, or, jmpl, nop; ++ ++ err = get_user(sethi1, (unsigned int *)regs->tpc); ++ err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(sllx, (unsigned int *)(regs->tpc+8)); ++ err |= get_user(or, (unsigned int *)(regs->tpc+12)); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+16)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+20)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x0B000000U && ++ sllx == 0x83287020 && ++ (or & 0xFFFFE000U) == 0x8A116000U && ++ jmpl == 0x81C04005U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] <<= 32; ++ regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU); ++ addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #7 */ ++ unsigned int sethi, ba, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->tpc); ++ err |= get_user(ba, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ (ba & 0xFFF00000U) == 0x30600000U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ addr = (sethi & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] = addr; ++ addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 1 */ ++ unsigned int sethi, ba, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->tpc); ++ err |= get_user(ba, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ unsigned int save, call; ++ ++ if ((ba & 0xFFC00000U) == 0x30800000U) ++ addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); ++ else ++ addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); ++ ++ err = get_user(save, (unsigned int *)addr); ++ err |= get_user(call, (unsigned int *)(addr+4)); ++ err |= get_user(nop, (unsigned int *)(addr+8)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ struct vm_area_struct *vma; ++ unsigned long call_dl_resolve; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_dl_resolve)) ++ goto emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_dl_resolve) { ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto emulate; ++ } ++ ++ call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_dl_resolve & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_dl_resolve)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_dl_resolve = call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ ++emulate: ++ regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; ++ regs->tpc = call_dl_resolve; ++ regs->tnpc = addr+4; ++ return 3; ++ } ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 2 */ ++ unsigned int save, call, nop; ++ ++ err = get_user(save, (unsigned int *)(regs->tpc-4)); ++ err |= get_user(call, (unsigned int *)regs->tpc); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+4)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); ++ ++ regs->u_regs[UREG_RETPC] = regs->tpc; ++ regs->tpc = dl_resolve; ++ regs->tnpc = dl_resolve+4; ++ return 3; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) + { + struct mm_struct *mm = current->mm; +@@ -315,6 +679,29 @@ asmlinkage void __kprobes do_sparc64_fau + if (!vma) + goto bad_area; + ++#ifdef CONFIG_PAX_PAGEEXEC ++ /* PaX: detect ITLB misses on non-exec pages */ ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address && ++ !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB)) ++ { ++ if (address != regs->tpc) ++ goto good_area; ++ ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)regs->tpc, (void *)(regs->u_regs[UREG_FP] + STACK_BIAS)); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + /* Pure DTLB misses do not tell us whether the fault causing + * load/store/atomic was a write or not, it only says that there + * was no match. So in such a case we (carefully) read the +diff -urNp linux-2.6.29/arch/sparc/mm/init_32.c linux-2.6.29/arch/sparc/mm/init_32.c +--- linux-2.6.29/arch/sparc/mm/init_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/mm/init_32.c 2009-03-28 14:26:18.000000000 -0400 +@@ -316,6 +316,9 @@ extern void device_scan(void); + pgprot_t PAGE_SHARED __read_mostly; + EXPORT_SYMBOL(PAGE_SHARED); + ++pgprot_t PAGE_SHARED_NOEXEC __read_mostly; ++EXPORT_SYMBOL(PAGE_SHARED_NOEXEC); ++ + void __init paging_init(void) + { + switch(sparc_cpu_model) { +@@ -341,17 +344,17 @@ void __init paging_init(void) + + /* Initialize the protection map with non-constant, MMU dependent values. */ + protection_map[0] = PAGE_NONE; +- protection_map[1] = PAGE_READONLY; +- protection_map[2] = PAGE_COPY; +- protection_map[3] = PAGE_COPY; ++ protection_map[1] = PAGE_READONLY_NOEXEC; ++ protection_map[2] = PAGE_COPY_NOEXEC; ++ protection_map[3] = PAGE_COPY_NOEXEC; + protection_map[4] = PAGE_READONLY; + protection_map[5] = PAGE_READONLY; + protection_map[6] = PAGE_COPY; + protection_map[7] = PAGE_COPY; + protection_map[8] = PAGE_NONE; +- protection_map[9] = PAGE_READONLY; +- protection_map[10] = PAGE_SHARED; +- protection_map[11] = PAGE_SHARED; ++ protection_map[9] = PAGE_READONLY_NOEXEC; ++ protection_map[10] = PAGE_SHARED_NOEXEC; ++ protection_map[11] = PAGE_SHARED_NOEXEC; + protection_map[12] = PAGE_READONLY; + protection_map[13] = PAGE_READONLY; + protection_map[14] = PAGE_SHARED; +diff -urNp linux-2.6.29/arch/sparc/mm/Makefile linux-2.6.29/arch/sparc/mm/Makefile +--- linux-2.6.29/arch/sparc/mm/Makefile 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/mm/Makefile 2009-03-28 14:26:18.000000000 -0400 +@@ -2,7 +2,7 @@ + # + + asflags-y := -ansi +-ccflags-y := -Werror ++#ccflags-y := -Werror + + obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o + obj-y += fault_$(BITS).o +diff -urNp linux-2.6.29/arch/sparc/mm/srmmu.c linux-2.6.29/arch/sparc/mm/srmmu.c +--- linux-2.6.29/arch/sparc/mm/srmmu.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/sparc/mm/srmmu.c 2009-03-28 14:26:18.000000000 -0400 +@@ -2148,6 +2148,13 @@ void __init ld_mmu_srmmu(void) + PAGE_SHARED = pgprot_val(SRMMU_PAGE_SHARED); + BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); + BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ PAGE_SHARED_NOEXEC = pgprot_val(SRMMU_PAGE_SHARED_NOEXEC); ++ BTFIXUPSET_INT(page_copy_noexec, pgprot_val(SRMMU_PAGE_COPY_NOEXEC)); ++ BTFIXUPSET_INT(page_readonly_noexec, pgprot_val(SRMMU_PAGE_RDONLY_NOEXEC)); ++#endif ++ + BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); + page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); + +diff -urNp linux-2.6.29/arch/um/include/asm/kmap_types.h linux-2.6.29/arch/um/include/asm/kmap_types.h +--- linux-2.6.29/arch/um/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/um/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -23,6 +23,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/arch/um/include/asm/page.h linux-2.6.29/arch/um/include/asm/page.h +--- linux-2.6.29/arch/um/include/asm/page.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/um/include/asm/page.h 2009-03-28 14:26:18.000000000 -0400 +@@ -14,6 +14,9 @@ + #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) + #define PAGE_MASK (~(PAGE_SIZE-1)) + ++#define ktla_ktva(addr) (addr) ++#define ktva_ktla(addr) (addr) ++ + #ifndef __ASSEMBLY__ + + struct page; +diff -urNp linux-2.6.29/arch/um/sys-i386/syscalls.c linux-2.6.29/arch/um/sys-i386/syscalls.c +--- linux-2.6.29/arch/um/sys-i386/syscalls.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/um/sys-i386/syscalls.c 2009-03-28 14:26:18.000000000 -0400 +@@ -11,6 +11,21 @@ + #include "asm/uaccess.h" + #include "asm/unistd.h" + ++int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) ++{ ++ unsigned long pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size || addr > pax_task_size - len) ++ return -EINVAL; ++ ++ return 0; ++} ++ + /* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than +diff -urNp linux-2.6.29/arch/x86/boot/bitops.h linux-2.6.29/arch/x86/boot/bitops.h +--- linux-2.6.29/arch/x86/boot/bitops.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/bitops.h 2009-03-28 14:26:18.000000000 -0400 +@@ -26,7 +26,7 @@ static inline int variable_test_bit(int + u8 v; + const u32 *p = (const u32 *)addr; + +- asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); ++ asm volatile("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); + return v; + } + +@@ -37,7 +37,7 @@ static inline int variable_test_bit(int + + static inline void set_bit(int nr, void *addr) + { +- asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); ++ asm volatile("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); + } + + #endif /* BOOT_BITOPS_H */ +diff -urNp linux-2.6.29/arch/x86/boot/boot.h linux-2.6.29/arch/x86/boot/boot.h +--- linux-2.6.29/arch/x86/boot/boot.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/boot.h 2009-03-28 14:26:18.000000000 -0400 +@@ -80,7 +80,7 @@ static inline void io_delay(void) + static inline u16 ds(void) + { + u16 seg; +- asm("movw %%ds,%0" : "=rm" (seg)); ++ asm volatile("movw %%ds,%0" : "=rm" (seg)); + return seg; + } + +@@ -176,7 +176,7 @@ static inline void wrgs32(u32 v, addr_t + static inline int memcmp(const void *s1, const void *s2, size_t len) + { + u8 diff; +- asm("repe; cmpsb; setnz %0" ++ asm volatile("repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; + } +diff -urNp linux-2.6.29/arch/x86/boot/compressed/head_32.S linux-2.6.29/arch/x86/boot/compressed/head_32.S +--- linux-2.6.29/arch/x86/boot/compressed/head_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/compressed/head_32.S 2009-03-28 14:26:18.000000000 -0400 +@@ -70,7 +70,7 @@ startup_32: + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx + #else +- movl $LOAD_PHYSICAL_ADDR, %ebx ++ movl $____LOAD_PHYSICAL_ADDR, %ebx + #endif + + /* Replace the compressed data size with the uncompressed size */ +@@ -80,8 +80,8 @@ startup_32: + /* Add 8 bytes for every 32K input block */ + shrl $12, %eax + addl %eax, %ebx +- /* Add 32K + 18 bytes of extra slack */ +- addl $(32768 + 18), %ebx ++ /* Add 64K of extra slack */ ++ addl $65536, %ebx + /* Align on a 4K boundary */ + addl $4095, %ebx + andl $~4095, %ebx +@@ -105,7 +105,7 @@ startup_32: + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp + #else +- movl $LOAD_PHYSICAL_ADDR, %ebp ++ movl $____LOAD_PHYSICAL_ADDR, %ebp + #endif + + /* +@@ -160,16 +160,15 @@ relocated: + * and where it was actually loaded. + */ + movl %ebp, %ebx +- subl $LOAD_PHYSICAL_ADDR, %ebx ++ subl $____LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ + /* + * Process relocations. + */ + + 1: subl $4, %edi +- movl 0(%edi), %ecx +- testl %ecx, %ecx +- jz 2f ++ movl (%edi), %ecx ++ jecxz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b + 2: +diff -urNp linux-2.6.29/arch/x86/boot/compressed/misc.c linux-2.6.29/arch/x86/boot/compressed/misc.c +--- linux-2.6.29/arch/x86/boot/compressed/misc.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/compressed/misc.c 2009-03-28 14:26:18.000000000 -0400 +@@ -373,7 +373,7 @@ static void parse_elf(void *output) + case PT_LOAD: + #ifdef CONFIG_RELOCATABLE + dest = output; +- dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); ++ dest += (phdr->p_paddr - ____LOAD_PHYSICAL_ADDR); + #else + dest = (void *)(phdr->p_paddr); + #endif +@@ -425,7 +425,7 @@ asmlinkage void decompress_kernel(void * + if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) + error("Destination address too large"); + #ifndef CONFIG_RELOCATABLE +- if ((u32)output != LOAD_PHYSICAL_ADDR) ++ if ((u32)output != ____LOAD_PHYSICAL_ADDR) + error("Wrong destination address"); + #endif + #endif +diff -urNp linux-2.6.29/arch/x86/boot/compressed/relocs.c linux-2.6.29/arch/x86/boot/compressed/relocs.c +--- linux-2.6.29/arch/x86/boot/compressed/relocs.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/compressed/relocs.c 2009-03-28 14:26:18.000000000 -0400 +@@ -10,8 +10,11 @@ + #define USE_BSD + #include + ++#include "../../../../include/linux/autoconf.h" ++ + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + static Elf32_Ehdr ehdr; ++static Elf32_Phdr *phdr; + static unsigned long reloc_count, reloc_idx; + static unsigned long *relocs; + +@@ -245,6 +248,36 @@ static void read_ehdr(FILE *fp) + } + } + ++static void read_phdrs(FILE *fp) ++{ ++ int i; ++ ++ phdr = calloc(ehdr.e_phnum, sizeof(Elf32_Phdr)); ++ if (!phdr) { ++ die("Unable to allocate %d program headers\n", ++ ehdr.e_phnum); ++ } ++ if (fseek(fp, ehdr.e_phoff, SEEK_SET) < 0) { ++ die("Seek to %d failed: %s\n", ++ ehdr.e_phoff, strerror(errno)); ++ } ++ if (fread(phdr, sizeof(*phdr), ehdr.e_phnum, fp) != ehdr.e_phnum) { ++ die("Cannot read ELF program headers: %s\n", ++ strerror(errno)); ++ } ++ for(i = 0; i < ehdr.e_phnum; i++) { ++ phdr[i].p_type = elf32_to_cpu(phdr[i].p_type); ++ phdr[i].p_offset = elf32_to_cpu(phdr[i].p_offset); ++ phdr[i].p_vaddr = elf32_to_cpu(phdr[i].p_vaddr); ++ phdr[i].p_paddr = elf32_to_cpu(phdr[i].p_paddr); ++ phdr[i].p_filesz = elf32_to_cpu(phdr[i].p_filesz); ++ phdr[i].p_memsz = elf32_to_cpu(phdr[i].p_memsz); ++ phdr[i].p_flags = elf32_to_cpu(phdr[i].p_flags); ++ phdr[i].p_align = elf32_to_cpu(phdr[i].p_align); ++ } ++ ++} ++ + static void read_shdrs(FILE *fp) + { + int i; +@@ -341,6 +374,8 @@ static void read_symtabs(FILE *fp) + static void read_relocs(FILE *fp) + { + int i,j; ++ uint32_t base; ++ + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_REL) { +@@ -360,9 +395,18 @@ static void read_relocs(FILE *fp) + die("Cannot read symbol table: %s\n", + strerror(errno)); + } ++ base = 0; ++ for (j = 0; j < ehdr.e_phnum; j++) { ++ if (phdr[j].p_type != PT_LOAD ) ++ continue; ++ if (secs[sec->shdr.sh_info].shdr.sh_offset < phdr[j].p_offset || secs[sec->shdr.sh_info].shdr.sh_offset >= phdr[j].p_offset + phdr[j].p_filesz) ++ continue; ++ base = CONFIG_PAGE_OFFSET + phdr[j].p_paddr - phdr[j].p_vaddr; ++ break; ++ } + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel = &sec->reltab[j]; +- rel->r_offset = elf32_to_cpu(rel->r_offset); ++ rel->r_offset = elf32_to_cpu(rel->r_offset) + base; + rel->r_info = elf32_to_cpu(rel->r_info); + } + } +@@ -504,6 +548,23 @@ static void walk_relocs(void (*visit)(El + if (sym->st_shndx == SHN_ABS) { + continue; + } ++ /* Don't relocate actual per-cpu variables, they are absolute indices, not addresses */ ++ if (!strcmp(sec_name(sym->st_shndx), ".data.percpu") && strncmp(sym_name(sym_strtab, sym), "__per_cpu_", 10)) ++ continue; ++#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_X86_32) ++ /* Don't relocate actual code, they are relocated implicitly by the base address of KERNEL_CS */ ++ if (!strcmp(sec_name(sym->st_shndx), ".init.text")) ++ continue; ++ if (!strcmp(sec_name(sym->st_shndx), ".exit.text")) ++ continue; ++ if (!strcmp(sec_name(sym->st_shndx), ".text.head")) { ++ if (strcmp(sym_name(sym_strtab, sym), "__init_end") && ++ strcmp(sym_name(sym_strtab, sym), "KERNEL_TEXT_OFFSET")) ++ continue; ++ } ++ if (!strcmp(sec_name(sym->st_shndx), ".text")) ++ continue; ++#endif + if (r_type == R_386_PC32) { + /* PC relative relocations don't need to be adjusted */ + } +@@ -631,6 +692,7 @@ int main(int argc, char **argv) + fname, strerror(errno)); + } + read_ehdr(fp); ++ read_phdrs(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); +diff -urNp linux-2.6.29/arch/x86/boot/cpucheck.c linux-2.6.29/arch/x86/boot/cpucheck.c +--- linux-2.6.29/arch/x86/boot/cpucheck.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/cpucheck.c 2009-03-28 14:26:18.000000000 -0400 +@@ -74,7 +74,7 @@ static int has_fpu(void) + u16 fcw = -1, fsw = -1; + u32 cr0; + +- asm("movl %%cr0,%0" : "=r" (cr0)); ++ asm volatile("movl %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("movl %0,%%cr0" : : "r" (cr0)); +@@ -90,7 +90,7 @@ static int has_eflag(u32 mask) + { + u32 f0, f1; + +- asm("pushfl ; " ++ asm volatile("pushfl ; " + "pushfl ; " + "popl %0 ; " + "movl %0,%1 ; " +@@ -115,7 +115,7 @@ static void get_flags(void) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { +- asm("cpuid" ++ asm volatile("cpuid" + : "=a" (max_intel_level), + "=b" (cpu_vendor[0]), + "=d" (cpu_vendor[1]), +@@ -124,7 +124,7 @@ static void get_flags(void) + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { +- asm("cpuid" ++ asm volatile("cpuid" + : "=a" (tfms), + "=c" (cpu.flags[4]), + "=d" (cpu.flags[0]) +@@ -136,7 +136,7 @@ static void get_flags(void) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + +- asm("cpuid" ++ asm volatile("cpuid" + : "=a" (max_amd_level) + : "a" (0x80000000) + : "ebx", "ecx", "edx"); +@@ -144,7 +144,7 @@ static void get_flags(void) + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + u32 eax = 0x80000001; +- asm("cpuid" ++ asm volatile("cpuid" + : "+a" (eax), + "=c" (cpu.flags[6]), + "=d" (cpu.flags[1]) +@@ -203,9 +203,9 @@ int check_cpu(int *cpu_level_ptr, int *r + u32 ecx = MSR_K7_HWCR; + u32 eax, edx; + +- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); ++ asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax &= ~(1 << 15); +- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + get_flags(); /* Make sure it really did something */ + err = check_flags(); +@@ -218,9 +218,9 @@ int check_cpu(int *cpu_level_ptr, int *r + u32 ecx = MSR_VIA_FCR; + u32 eax, edx; + +- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); ++ asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax |= (1<<1)|(1<<7); +- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + set_bit(X86_FEATURE_CX8, cpu.flags); + err = check_flags(); +@@ -231,12 +231,12 @@ int check_cpu(int *cpu_level_ptr, int *r + u32 eax, edx; + u32 level = 1; + +- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); +- asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); +- asm("cpuid" ++ asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); ++ asm volatile("cpuid" + : "+a" (level), "=d" (cpu.flags[0]) + : : "ecx", "ebx"); +- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + err = check_flags(); + } +diff -urNp linux-2.6.29/arch/x86/boot/edd.c linux-2.6.29/arch/x86/boot/edd.c +--- linux-2.6.29/arch/x86/boot/edd.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/edd.c 2009-03-28 14:26:18.000000000 -0400 +@@ -81,7 +81,7 @@ static int get_edd_info(u8 devno, struct + ax = 0x4100; + bx = EDDMAGIC1; + dx = devno; +- asm("pushfl; stc; int $0x13; setc %%al; popfl" ++ asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" + : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) + : : "esi", "edi"); + +@@ -100,7 +100,7 @@ static int get_edd_info(u8 devno, struct + ei->params.length = sizeof(ei->params); + ax = 0x4800; + dx = devno; +- asm("pushfl; int $0x13; popfl" ++ asm volatile("pushfl; int $0x13; popfl" + : "+a" (ax), "+d" (dx), "=m" (ei->params) + : "S" (&ei->params) + : "ebx", "ecx", "edi"); +@@ -111,7 +111,7 @@ static int get_edd_info(u8 devno, struct + ax = 0x0800; + dx = devno; + di = 0; +- asm("pushw %%es; " ++ asm volatile("pushw %%es; " + "movw %%di,%%es; " + "pushfl; stc; int $0x13; setc %%al; popfl; " + "popw %%es" +diff -urNp linux-2.6.29/arch/x86/boot/main.c linux-2.6.29/arch/x86/boot/main.c +--- linux-2.6.29/arch/x86/boot/main.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/main.c 2009-03-28 14:26:18.000000000 -0400 +@@ -78,7 +78,7 @@ static void query_ist(void) + if (cpu.level < 6) + return; + +- asm("int $0x15" ++ asm volatile("int $0x15" + : "=a" (boot_params.ist_info.signature), + "=b" (boot_params.ist_info.command), + "=c" (boot_params.ist_info.event), +diff -urNp linux-2.6.29/arch/x86/boot/mca.c linux-2.6.29/arch/x86/boot/mca.c +--- linux-2.6.29/arch/x86/boot/mca.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/mca.c 2009-03-28 14:26:18.000000000 -0400 +@@ -19,7 +19,7 @@ int query_mca(void) + u8 err; + u16 es, bx, len; + +- asm("pushw %%es ; " ++ asm volatile("pushw %%es ; " + "int $0x15 ; " + "setc %0 ; " + "movw %%es, %1 ; " +diff -urNp linux-2.6.29/arch/x86/boot/memory.c linux-2.6.29/arch/x86/boot/memory.c +--- linux-2.6.29/arch/x86/boot/memory.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/memory.c 2009-03-28 14:26:18.000000000 -0400 +@@ -30,7 +30,7 @@ static int detect_memory_e820(void) + /* Important: %edx is clobbered by some BIOSes, + so it must be either used for the error output + or explicitly marked clobbered. */ +- asm("int $0x15; setc %0" ++ asm volatile("int $0x15; setc %0" + : "=d" (err), "+b" (next), "=a" (id), "+c" (size), + "=m" (*desc) + : "D" (desc), "d" (SMAP), "a" (0xe820)); +@@ -65,7 +65,7 @@ static int detect_memory_e801(void) + + bx = cx = dx = 0; + ax = 0xe801; +- asm("stc; int $0x15; setc %0" ++ asm volatile("stc; int $0x15; setc %0" + : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); + + if (err) +@@ -95,7 +95,7 @@ static int detect_memory_88(void) + u8 err; + + ax = 0x8800; +- asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); ++ asm volatile("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); + + boot_params.screen_info.ext_mem_k = ax; + +diff -urNp linux-2.6.29/arch/x86/boot/video.c linux-2.6.29/arch/x86/boot/video.c +--- linux-2.6.29/arch/x86/boot/video.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/video.c 2009-03-28 14:26:18.000000000 -0400 +@@ -23,7 +23,7 @@ static void store_cursor_position(void) + + ax = 0x0300; + bx = 0; +- asm(INT10 ++ asm volatile(INT10 + : "=d" (curpos), "+a" (ax), "+b" (bx) + : : "ecx", "esi", "edi"); + +@@ -38,7 +38,7 @@ static void store_video_mode(void) + /* N.B.: the saving of the video page here is a bit silly, + since we pretty much assume page 0 everywhere. */ + ax = 0x0f00; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "=b" (page) + : : "ecx", "edx", "esi", "edi"); + +diff -urNp linux-2.6.29/arch/x86/boot/video-vesa.c linux-2.6.29/arch/x86/boot/video-vesa.c +--- linux-2.6.29/arch/x86/boot/video-vesa.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/video-vesa.c 2009-03-28 14:26:18.000000000 -0400 +@@ -41,7 +41,7 @@ static int vesa_probe(void) + + ax = 0x4f00; + di = (size_t)&vginfo; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+D" (di), "=m" (vginfo) + : : "ebx", "ecx", "edx", "esi"); + +@@ -68,7 +68,7 @@ static int vesa_probe(void) + ax = 0x4f01; + cx = mode; + di = (size_t)&vminfo; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) + : : "ebx", "edx", "esi"); + +@@ -120,7 +120,7 @@ static int vesa_set_mode(struct mode_inf + ax = 0x4f01; + cx = vesa_mode; + di = (size_t)&vminfo; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) + : : "ebx", "edx", "esi"); + +@@ -202,19 +202,20 @@ static void vesa_dac_set_8bits(void) + /* Save the VESA protected mode info */ + static void vesa_store_pm_info(void) + { +- u16 ax, bx, di, es; ++ u16 ax, bx, cx, di, es; + + ax = 0x4f0a; +- bx = di = 0; +- asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" +- : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) +- : : "ecx", "esi"); ++ bx = cx = di = 0; ++ asm volatile("pushw %%es; "INT10"; movw %%es,%0; popw %%es" ++ : "=d" (es), "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) ++ : : "esi"); + + if (ax != 0x004f) + return; + + boot_params.screen_info.vesapm_seg = es; + boot_params.screen_info.vesapm_off = di; ++ boot_params.screen_info.vesapm_size = cx; + } + + /* +@@ -268,7 +269,7 @@ void vesa_store_edid(void) + /* Note: The VBE DDC spec is different from the main VESA spec; + we genuinely have to assume all registers are destroyed here. */ + +- asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" ++ asm volatile("pushw %%es; movw %2,%%es; "INT10"; popw %%es" + : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) + : : "esi", "edx"); + +@@ -283,7 +284,7 @@ void vesa_store_edid(void) + cx = 0; /* Controller 0 */ + dx = 0; /* EDID block number */ + di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), + "+c" (cx), "+D" (di) + : : "esi"); +diff -urNp linux-2.6.29/arch/x86/boot/video-vga.c linux-2.6.29/arch/x86/boot/video-vga.c +--- linux-2.6.29/arch/x86/boot/video-vga.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/video-vga.c 2009-03-28 14:26:18.000000000 -0400 +@@ -225,7 +225,7 @@ static int vga_probe(void) + }; + u8 vga_flag; + +- asm(INT10 ++ asm volatile(INT10 + : "=b" (ega_bx) + : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ + : "ecx", "edx", "esi", "edi"); +@@ -237,7 +237,7 @@ static int vga_probe(void) + /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ + if ((u8)ega_bx != 0x10) { + /* EGA/VGA */ +- asm(INT10 ++ asm volatile(INT10 + : "=a" (vga_flag) + : "a" (0x1a00) + : "ebx", "ecx", "edx", "esi", "edi"); +diff -urNp linux-2.6.29/arch/x86/boot/voyager.c linux-2.6.29/arch/x86/boot/voyager.c +--- linux-2.6.29/arch/x86/boot/voyager.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/boot/voyager.c 2009-03-28 14:26:18.000000000 -0400 +@@ -23,7 +23,7 @@ int query_voyager(void) + + data_ptr[0] = 0xff; /* Flag on config not found(?) */ + +- asm("pushw %%es ; " ++ asm volatile("pushw %%es ; " + "int $0x15 ; " + "setc %0 ; " + "movw %%es, %1 ; " +diff -urNp linux-2.6.29/arch/x86/ia32/ia32_signal.c linux-2.6.29/arch/x86/ia32/ia32_signal.c +--- linux-2.6.29/arch/x86/ia32/ia32_signal.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/ia32/ia32_signal.c 2009-03-28 14:26:18.000000000 -0400 +@@ -489,6 +489,7 @@ int ia32_setup_rt_frame(int sig, struct + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, ++ 0 + }; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); +diff -urNp linux-2.6.29/arch/x86/include/asm/alternative.h linux-2.6.29/arch/x86/include/asm/alternative.h +--- linux-2.6.29/arch/x86/include/asm/alternative.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/alternative.h 2009-03-28 14:26:18.000000000 -0400 +@@ -96,7 +96,7 @@ const unsigned char *const *find_nop_tab + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ +- ".section .altinstr_replacement,\"ax\"\n" \ ++ ".section .altinstr_replacement,\"a\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature) : "memory") + +@@ -120,7 +120,7 @@ const unsigned char *const *find_nop_tab + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ +- ".section .altinstr_replacement,\"ax\"\n" \ ++ ".section .altinstr_replacement,\"a\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature), ##input) + +@@ -135,7 +135,7 @@ const unsigned char *const *find_nop_tab + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ +- ".section .altinstr_replacement,\"ax\"\n" \ ++ ".section .altinstr_replacement,\"a\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + +diff -urNp linux-2.6.29/arch/x86/include/asm/atomic_32.h linux-2.6.29/arch/x86/include/asm/atomic_32.h +--- linux-2.6.29/arch/x86/include/asm/atomic_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/atomic_32.h 2009-03-28 14:26:18.000000000 -0400 +@@ -39,7 +39,29 @@ + */ + static inline void atomic_add(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "+m" (v->counter) ++ : "ir" (i)); ++} ++ ++/** ++ * atomic_add_unchecked - add integer to atomic variable ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v. ++ */ ++static inline void atomic_add_unchecked(int i, atomic_t *v) ++{ ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" + : "+m" (v->counter) + : "ir" (i)); + } +@@ -53,7 +75,15 @@ static inline void atomic_add(int i, ato + */ + static inline void atomic_sub(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "subl %1,%0" ++ asm volatile(LOCK_PREFIX "subl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %1,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+m" (v->counter) + : "ir" (i)); + } +@@ -71,7 +101,16 @@ static inline int atomic_sub_and_test(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" ++ asm volatile(LOCK_PREFIX "subl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %2,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -85,7 +124,18 @@ static inline int atomic_sub_and_test(in + */ + static inline void atomic_inc(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "incl %0" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (v->counter)); + } + +@@ -97,7 +147,18 @@ static inline void atomic_inc(atomic_t * + */ + static inline void atomic_dec(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "decl %0" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (v->counter)); + } + +@@ -113,7 +174,19 @@ static inline int atomic_dec_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "decl %0; sete %1" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -131,7 +204,19 @@ static inline int atomic_inc_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "incl %0; sete %1" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -150,7 +235,16 @@ static inline int atomic_add_negative(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" ++ asm volatile(LOCK_PREFIX "addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %2,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sets %1\n" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -173,7 +267,15 @@ static inline int atomic_add_return(int + #endif + /* Modern 486+ processor */ + __i = i; +- asm volatile(LOCK_PREFIX "xaddl %0, %1" ++ asm volatile(LOCK_PREFIX "xaddl %0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "movl %0, %1\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +@@ -214,17 +316,28 @@ static inline int atomic_sub_return(int + */ + static inline int atomic_add_unless(atomic_t *v, int a, int u) + { +- int c, old; ++ int c, old, new; + c = atomic_read(v); + for (;;) { +- if (unlikely(c == (u))) ++ if (unlikely(c == u)) + break; +- old = atomic_cmpxchg((v), c, c + (a)); ++ ++ asm volatile("addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=r" (new) ++ : "0" (c), "ir" (a)); ++ ++ old = atomic_cmpxchg(v, c, new); + if (likely(old == c)) + break; + c = old; + } +- return c != (u); ++ return c != u; + } + + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +diff -urNp linux-2.6.29/arch/x86/include/asm/atomic_64.h linux-2.6.29/arch/x86/include/asm/atomic_64.h +--- linux-2.6.29/arch/x86/include/asm/atomic_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/atomic_64.h 2009-03-28 15:30:54.000000000 -0400 +@@ -38,7 +38,29 @@ + */ + static inline void atomic_add(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=m" (v->counter) ++ : "ir" (i), "m" (v->counter)); ++} ++ ++/** ++ * atomic_add_unchecked - add integer to atomic variable ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v. ++ */ ++static inline void atomic_add_unchecked(int i, atomic_t *v) ++{ ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); + } +@@ -52,7 +74,15 @@ static inline void atomic_add(int i, ato + */ + static inline void atomic_sub(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "subl %1,%0" ++ asm volatile(LOCK_PREFIX "subl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); + } +@@ -70,7 +100,16 @@ static inline int atomic_sub_and_test(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" ++ asm volatile(LOCK_PREFIX "subl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "ir" (i), "m" (v->counter) : "memory"); + return c; +@@ -84,7 +123,19 @@ static inline int atomic_sub_and_test(in + */ + static inline void atomic_inc(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "incl %0" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -97,7 +148,19 @@ static inline void atomic_inc(atomic_t * + */ + static inline void atomic_dec(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "decl %0" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -114,7 +177,20 @@ static inline int atomic_dec_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "decl %0; sete %1" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -132,7 +208,20 @@ static inline int atomic_inc_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "incl %0; sete %1" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -151,7 +240,16 @@ static inline int atomic_add_negative(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" ++ asm volatile(LOCK_PREFIX "addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sets %1\n" + : "=m" (v->counter), "=qm" (c) + : "ir" (i), "m" (v->counter) : "memory"); + return c; +@@ -167,7 +265,15 @@ static inline int atomic_add_negative(in + static inline int atomic_add_return(int i, atomic_t *v) + { + int __i = i; +- asm volatile(LOCK_PREFIX "xaddl %0, %1" ++ asm volatile(LOCK_PREFIX "xaddl %0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "movl %0, %1\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +@@ -212,7 +318,15 @@ static inline int atomic_sub_return(int + */ + static inline void atomic64_add(long i, atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "addq %1,%0" ++ asm volatile(LOCK_PREFIX "addq %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subq %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); + } +@@ -226,7 +340,15 @@ static inline void atomic64_add(long i, + */ + static inline void atomic64_sub(long i, atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "subq %1,%0" ++ asm volatile(LOCK_PREFIX "subq %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addq %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); + } +@@ -244,7 +366,16 @@ static inline int atomic64_sub_and_test( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" ++ asm volatile(LOCK_PREFIX "subq %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addq %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +@@ -258,7 +389,19 @@ static inline int atomic64_sub_and_test( + */ + static inline void atomic64_inc(atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "incq %0" ++ asm volatile(LOCK_PREFIX "incq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -271,7 +414,19 @@ static inline void atomic64_inc(atomic64 + */ + static inline void atomic64_dec(atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "decq %0" ++ asm volatile(LOCK_PREFIX "decq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -288,7 +443,20 @@ static inline int atomic64_dec_and_test( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "decq %0; sete %1" ++ asm volatile(LOCK_PREFIX "decq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -306,7 +474,20 @@ static inline int atomic64_inc_and_test( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "incq %0; sete %1" ++ asm volatile(LOCK_PREFIX "incq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "decq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -325,7 +506,16 @@ static inline int atomic64_add_negative( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" ++ asm volatile(LOCK_PREFIX "addq %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subq %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sets %1\n" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +@@ -341,7 +531,15 @@ static inline int atomic64_add_negative( + static inline long atomic64_add_return(long i, atomic64_t *v) + { + long __i = i; +- asm volatile(LOCK_PREFIX "xaddq %0, %1;" ++ asm volatile(LOCK_PREFIX "xaddq %0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "movq %0, %1\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +@@ -372,17 +570,29 @@ static inline long atomic64_sub_return(l + */ + static inline int atomic_add_unless(atomic_t *v, int a, int u) + { +- int c, old; ++ int c, old, new; + c = atomic_read(v); + for (;;) { +- if (unlikely(c == (u))) ++ if (unlikely(c == u)) + break; +- old = atomic_cmpxchg((v), c, c + (a)); ++ ++ asm volatile("addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=r" (new) ++ : "0" (c), "ir" (a)); ++ ++ old = atomic_cmpxchg(v, c, new); + if (likely(old == c)) + break; + c = old; + } +- return c != (u); ++ return c != u; + } + + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +@@ -398,17 +608,29 @@ static inline int atomic_add_unless(atom + */ + static inline int atomic64_add_unless(atomic64_t *v, long a, long u) + { +- long c, old; ++ long c, old, new; + c = atomic64_read(v); + for (;;) { +- if (unlikely(c == (u))) ++ if (unlikely(c == u)) + break; +- old = atomic64_cmpxchg((v), c, c + (a)); ++ ++ asm volatile("addq %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=r" (new) ++ : "0" (c), "er" (a)); ++ ++ old = atomic64_cmpxchg((v), c, new); + if (likely(old == c)) + break; + c = old; + } +- return c != (u); ++ return c != u; + } + + /** +diff -urNp linux-2.6.29/arch/x86/include/asm/boot.h linux-2.6.29/arch/x86/include/asm/boot.h +--- linux-2.6.29/arch/x86/include/asm/boot.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/boot.h 2009-03-28 14:26:18.000000000 -0400 +@@ -11,10 +11,15 @@ + #define ASK_VGA 0xfffd /* ask for it at bootup */ + + /* Physical address where kernel should be loaded. */ +-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ ++#define ____LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + ++#ifndef __ASSEMBLY__ ++extern unsigned char __LOAD_PHYSICAL_ADDR[]; ++#define LOAD_PHYSICAL_ADDR ((unsigned long)__LOAD_PHYSICAL_ADDR) ++#endif ++ + #ifdef CONFIG_X86_64 + #define BOOT_HEAP_SIZE 0x7000 + #define BOOT_STACK_SIZE 0x4000 +diff -urNp linux-2.6.29/arch/x86/include/asm/cache.h linux-2.6.29/arch/x86/include/asm/cache.h +--- linux-2.6.29/arch/x86/include/asm/cache.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/cache.h 2009-03-28 14:26:18.000000000 -0400 +@@ -6,6 +6,7 @@ + #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + + #define __read_mostly __attribute__((__section__(".data.read_mostly"))) ++#define __read_only __attribute__((__section__(".data.read_only"))) + + #ifdef CONFIG_X86_VSMP + /* vSMP Internode cacheline shift */ +diff -urNp linux-2.6.29/arch/x86/include/asm/checksum_32.h linux-2.6.29/arch/x86/include/asm/checksum_32.h +--- linux-2.6.29/arch/x86/include/asm/checksum_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/checksum_32.h 2009-03-28 14:26:18.000000000 -0400 +@@ -31,6 +31,14 @@ asmlinkage __wsum csum_partial_copy_gene + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + ++asmlinkage __wsum csum_partial_copy_generic_to_user(const void *src, void *dst, ++ int len, __wsum sum, ++ int *src_err_ptr, int *dst_err_ptr); ++ ++asmlinkage __wsum csum_partial_copy_generic_from_user(const void *src, void *dst, ++ int len, __wsum sum, ++ int *src_err_ptr, int *dst_err_ptr); ++ + /* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. +@@ -50,7 +58,7 @@ static inline __wsum csum_partial_copy_f + int *err_ptr) + { + might_sleep(); +- return csum_partial_copy_generic((__force void *)src, dst, ++ return csum_partial_copy_generic_from_user((__force void *)src, dst, + len, sum, err_ptr, NULL); + } + +@@ -177,7 +185,7 @@ static inline __wsum csum_and_copy_to_us + { + might_sleep(); + if (access_ok(VERIFY_WRITE, dst, len)) +- return csum_partial_copy_generic(src, (__force void *)dst, ++ return csum_partial_copy_generic_to_user(src, (__force void *)dst, + len, sum, NULL, err_ptr); + + if (len) +diff -urNp linux-2.6.29/arch/x86/include/asm/desc.h linux-2.6.29/arch/x86/include/asm/desc.h +--- linux-2.6.29/arch/x86/include/asm/desc.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/desc.h 2009-03-28 14:26:18.000000000 -0400 +@@ -16,6 +16,7 @@ static inline void fill_ldt(struct desc_ + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; ++ desc->type |= info->seg_not_present ^ 1; + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; +@@ -32,16 +33,12 @@ static inline void fill_ldt(struct desc_ + } + + extern struct desc_ptr idt_descr; +-extern gate_desc idt_table[]; +- +-struct gdt_page { +- struct desc_struct gdt[GDT_ENTRIES]; +-} __attribute__((aligned(PAGE_SIZE))); +-DECLARE_PER_CPU(struct gdt_page, gdt_page); ++extern gate_desc idt_table[256]; + ++extern struct desc_struct cpu_gdt_table[NR_CPUS][PAGE_SIZE / sizeof(struct desc_struct)]; + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + { +- return per_cpu(gdt_page, cpu).gdt; ++ return cpu_gdt_table[cpu]; + } + + #ifdef CONFIG_X86_64 +@@ -115,19 +112,48 @@ static inline void paravirt_free_ldt(str + static inline void native_write_idt_entry(gate_desc *idt, int entry, + const gate_desc *gate) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + memcpy(&idt[entry], gate, sizeof(*gate)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, + const void *desc) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + memcpy(&ldt[entry], desc, 8); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, + const void *desc, int type) + { + unsigned int size; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + switch (type) { + case DESC_TSS: + size = sizeof(tss_desc); +@@ -139,7 +165,17 @@ static inline void native_write_gdt_entr + size = sizeof(struct desc_struct); + break; + } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + memcpy(&gdt[entry], desc, size); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, +@@ -211,7 +247,19 @@ static inline void native_set_ldt(const + + static inline void native_load_tr_desc(void) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_load_gdt(const struct desc_ptr *dtr) +@@ -246,8 +294,19 @@ static inline void native_load_tls(struc + unsigned int i; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + #define _LDT_empty(info) \ +@@ -379,6 +438,18 @@ static inline void set_system_intr_gate_ + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); + } + ++#ifdef CONFIG_X86_32 ++static inline void set_user_cs(unsigned long base, unsigned long limit, int cpu) ++{ ++ struct desc_struct d; ++ ++ if (likely(limit)) ++ limit = (limit - 1UL) >> PAGE_SHIFT; ++ pack_descriptor(&d, base, limit, 0xFB, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_CS, &d, DESCTYPE_S); ++} ++#endif ++ + #else + /* + * GET_DESC_BASE reads the descriptor base of the specified segment. +diff -urNp linux-2.6.29/arch/x86/include/asm/e820.h linux-2.6.29/arch/x86/include/asm/e820.h +--- linux-2.6.29/arch/x86/include/asm/e820.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/e820.h 2009-03-28 14:26:18.000000000 -0400 +@@ -135,7 +135,7 @@ extern char *memory_setup(void); + #define ISA_END_ADDRESS 0x100000 + #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) + +-#define BIOS_BEGIN 0x000a0000 ++#define BIOS_BEGIN 0x000c0000 + #define BIOS_END 0x00100000 + + #ifdef __KERNEL__ +diff -urNp linux-2.6.29/arch/x86/include/asm/elf.h linux-2.6.29/arch/x86/include/asm/elf.h +--- linux-2.6.29/arch/x86/include/asm/elf.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/elf.h 2009-03-28 14:26:18.000000000 -0400 +@@ -252,7 +252,25 @@ extern int force_personality32; + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + ++#ifdef CONFIG_PAX_SEGMEXEC ++#define ELF_ET_DYN_BASE ((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? SEGMEXEC_TASK_SIZE/3*2 : TASK_SIZE/3*2) ++#else + #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) ++#endif ++ ++#ifdef CONFIG_PAX_ASLR ++#ifdef CONFIG_X86_32 ++#define PAX_ELF_ET_DYN_BASE 0x10000000UL ++ ++#define PAX_DELTA_MMAP_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) ++#define PAX_DELTA_STACK_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) ++#else ++#define PAX_ELF_ET_DYN_BASE 0x400000UL ++ ++#define PAX_DELTA_MMAP_LEN ((test_thread_flag(TIF_IA32)) ? 16 : 32) ++#define PAX_DELTA_STACK_LEN ((test_thread_flag(TIF_IA32)) ? 16 : 32) ++#endif ++#endif + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, +@@ -304,8 +322,7 @@ do { \ + #define ARCH_DLINFO \ + do { \ + if (vdso_enabled) \ +- NEW_AUX_ENT(AT_SYSINFO_EHDR, \ +- (unsigned long)current->mm->context.vdso); \ ++ NEW_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso);\ + } while (0) + + #define AT_SYSINFO 32 +@@ -316,7 +333,7 @@ do { \ + + #endif /* !CONFIG_X86_32 */ + +-#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) ++#define VDSO_CURRENT_BASE (current->mm->context.vdso) + + #define VDSO_ENTRY \ + ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) +@@ -330,7 +347,4 @@ extern int arch_setup_additional_pages(s + extern int syscall32_setup_pages(struct linux_binprm *, int exstack); + #define compat_arch_setup_additional_pages syscall32_setup_pages + +-extern unsigned long arch_randomize_brk(struct mm_struct *mm); +-#define arch_randomize_brk arch_randomize_brk +- + #endif /* _ASM_X86_ELF_H */ +diff -urNp linux-2.6.29/arch/x86/include/asm/futex.h linux-2.6.29/arch/x86/include/asm/futex.h +--- linux-2.6.29/arch/x86/include/asm/futex.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/futex.h 2009-03-28 14:26:18.000000000 -0400 +@@ -11,6 +11,40 @@ + #include + #include + ++#ifdef CONFIG_X86_32 ++#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ ++ asm volatile( \ ++ "movw\t%w6, %%ds\n" \ ++ "1:\t" insn "\n" \ ++ "2:\tpushl\t%%ss\n" \ ++ "\tpopl\t%%ds\n" \ ++ "\t.section .fixup,\"ax\"\n" \ ++ "3:\tmov\t%3, %1\n" \ ++ "\tjmp\t2b\n" \ ++ "\t.previous\n" \ ++ _ASM_EXTABLE(1b, 3b) \ ++ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ ++ : "i" (-EFAULT), "0" (oparg), "1" (0), "r" (__USER_DS)) ++ ++#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ ++ asm volatile("movw\t%w7, %%es\n" \ ++ "1:\tmovl\t%%es:%2, %0\n" \ ++ "\tmovl\t%0, %3\n" \ ++ "\t" insn "\n" \ ++ "2:\t" LOCK_PREFIX "cmpxchgl %3, %%es:%2\n"\ ++ "\tjnz\t1b\n" \ ++ "3:\tpushl\t%%ss\n" \ ++ "\tpopl\t%%es\n" \ ++ "\t.section .fixup,\"ax\"\n" \ ++ "4:\tmov\t%5, %1\n" \ ++ "\tjmp\t3b\n" \ ++ "\t.previous\n" \ ++ _ASM_EXTABLE(1b, 4b) \ ++ _ASM_EXTABLE(2b, 4b) \ ++ : "=&a" (oldval), "=&r" (ret), \ ++ "+m" (*uaddr), "=&r" (tem) \ ++ : "r" (oparg), "i" (-EFAULT), "1" (0), "r" (__USER_DS)) ++#else + #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + asm volatile("1:\t" insn "\n" \ + "2:\t.section .fixup,\"ax\"\n" \ +@@ -36,8 +70,9 @@ + : "=&a" (oldval), "=&r" (ret), \ + "+m" (*uaddr), "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "1" (0)) ++#endif + +-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) ++static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) + { + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; +@@ -61,11 +96,20 @@ static inline int futex_atomic_op_inuser + + switch (op) { + case FUTEX_OP_SET: ++#ifdef CONFIG_X86_32 ++ __futex_atomic_op1("xchgl %0, %%ds:%2", ret, oldval, uaddr, oparg); ++#else + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); ++#endif + break; + case FUTEX_OP_ADD: ++#ifdef CONFIG_X86_32 ++ __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %%ds:%2", ret, oldval, ++ uaddr, oparg); ++#else + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + uaddr, oparg); ++#endif + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); +@@ -109,7 +153,7 @@ static inline int futex_atomic_op_inuser + return ret; + } + +-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, ++static inline int futex_atomic_cmpxchg_inatomic(u32 __user *uaddr, int oldval, + int newval) + { + +@@ -122,14 +166,27 @@ static inline int futex_atomic_cmpxchg_i + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + +- asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" ++ asm volatile( ++#ifdef CONFIG_X86_32 ++ "\tmovw %w5, %%ds\n" ++ "1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" ++ "2:\tpushl %%ss\n" ++ "\tpopl %%ds\n" ++ "\t.section .fixup, \"ax\"\n" ++#else ++ "1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + "2:\t.section .fixup, \"ax\"\n" ++#endif + "3:\tmov %2, %0\n" + "\tjmp 2b\n" + "\t.previous\n" + _ASM_EXTABLE(1b, 3b) + : "=a" (oldval), "+m" (*uaddr) ++#ifdef CONFIG_X86_32 ++ : "i" (-EFAULT), "r" (newval), "0" (oldval), "r" (__USER_DS) ++#else + : "i" (-EFAULT), "r" (newval), "0" (oldval) ++#endif + : "memory" + ); + +diff -urNp linux-2.6.29/arch/x86/include/asm/i387.h linux-2.6.29/arch/x86/include/asm/i387.h +--- linux-2.6.29/arch/x86/include/asm/i387.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/i387.h 2009-03-28 14:26:18.000000000 -0400 +@@ -203,13 +203,8 @@ static inline void restore_fpu(struct ta + } + + /* We need a safe address that is cheap to find and that is already +- in L1 during context switch. The best choices are unfortunately +- different for UP and SMP */ +-#ifdef CONFIG_SMP +-#define safe_address (__per_cpu_offset[0]) +-#else +-#define safe_address (kstat_cpu(0).cpustat.user) +-#endif ++ in L1 during context switch. */ ++#define safe_address (init_tss[smp_processor_id()].x86_tss.sp0) + + /* + * These must be called with preempt disabled +diff -urNp linux-2.6.29/arch/x86/include/asm/io_64.h linux-2.6.29/arch/x86/include/asm/io_64.h +--- linux-2.6.29/arch/x86/include/asm/io_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/io_64.h 2009-03-28 14:26:18.000000000 -0400 +@@ -158,6 +158,17 @@ static inline void *phys_to_virt(unsigne + } + #endif + ++#define ARCH_HAS_VALID_PHYS_ADDR_RANGE ++static inline int valid_phys_addr_range (unsigned long addr, size_t count) ++{ ++ return ((addr + count + PAGE_SIZE - 1) >> PAGE_SHIFT) < (1 << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; ++} ++ ++static inline int valid_mmap_phys_addr_range (unsigned long pfn, size_t count) ++{ ++ return (pfn + (count >> PAGE_SHIFT)) < (1 << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; ++} ++ + /* + * Change "struct page" to physical address. + */ +diff -urNp linux-2.6.29/arch/x86/include/asm/irqflags.h linux-2.6.29/arch/x86/include/asm/irqflags.h +--- linux-2.6.29/arch/x86/include/asm/irqflags.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/irqflags.h 2009-03-28 14:26:18.000000000 -0400 +@@ -141,6 +141,8 @@ static inline unsigned long __raw_local_ + #define INTERRUPT_RETURN iret + #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit + #define GET_CR0_INTO_EAX movl %cr0, %eax ++#define GET_CR0_INTO_EDX movl %cr0, %edx ++#define SET_CR0_FROM_EDX movl %edx, %cr0 + #endif + + +diff -urNp linux-2.6.29/arch/x86/include/asm/kmap_types.h linux-2.6.29/arch/x86/include/asm/kmap_types.h +--- linux-2.6.29/arch/x86/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/kmap_types.h 2009-03-28 14:26:18.000000000 -0400 +@@ -21,7 +21,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp linux-2.6.29/arch/x86/include/asm/kvm_host.h linux-2.6.29/arch/x86/include/asm/kvm_host.h +--- linux-2.6.29/arch/x86/include/asm/kvm_host.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/kvm_host.h 2009-03-28 14:26:18.000000000 -0400 +@@ -494,7 +494,7 @@ struct kvm_x86_ops { + int (*get_mt_mask_shift)(void); + }; + +-extern struct kvm_x86_ops *kvm_x86_ops; ++extern const struct kvm_x86_ops *kvm_x86_ops; + + int kvm_mmu_module_init(void); + void kvm_mmu_module_exit(void); +diff -urNp linux-2.6.29/arch/x86/include/asm/linkage.h linux-2.6.29/arch/x86/include/asm/linkage.h +--- linux-2.6.29/arch/x86/include/asm/linkage.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/linkage.h 2009-03-28 14:26:18.000000000 -0400 +@@ -7,6 +7,11 @@ + #ifdef CONFIG_X86_64 + #define __ALIGN .p2align 4,,15 + #define __ALIGN_STR ".p2align 4,,15" ++#else ++#ifdef CONFIG_X86_ALIGNMENT_16 ++#define __ALIGN .align 16,0x90 ++#define __ALIGN_STR ".align 16,0x90" ++#endif + #endif + + #ifdef CONFIG_X86_32 +@@ -52,11 +57,6 @@ + + #endif + +-#ifdef CONFIG_X86_ALIGNMENT_16 +-#define __ALIGN .align 16,0x90 +-#define __ALIGN_STR ".align 16,0x90" +-#endif +- + /* + * to check ENTRY_X86/END_X86 and + * KPROBE_ENTRY_X86/KPROBE_END_X86 +diff -urNp linux-2.6.29/arch/x86/include/asm/local.h linux-2.6.29/arch/x86/include/asm/local.h +--- linux-2.6.29/arch/x86/include/asm/local.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/local.h 2009-03-28 14:26:19.000000000 -0400 +@@ -18,26 +18,90 @@ typedef struct { + + static inline void local_inc(local_t *l) + { +- asm volatile(_ASM_INC "%0" ++ asm volatile(_ASM_INC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_DEC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter)); + } + + static inline void local_dec(local_t *l) + { +- asm volatile(_ASM_DEC "%0" ++ asm volatile(_ASM_DEC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_INC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter)); + } + + static inline void local_add(long i, local_t *l) + { +- asm volatile(_ASM_ADD "%1,%0" ++ asm volatile(_ASM_ADD "%1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_SUB "%1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter) + : "ir" (i)); + } + + static inline void local_sub(long i, local_t *l) + { +- asm volatile(_ASM_SUB "%1,%0" ++ asm volatile(_ASM_SUB "%1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_ADD "%1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter) + : "ir" (i)); + } +@@ -55,7 +119,24 @@ static inline int local_sub_and_test(lon + { + unsigned char c; + +- asm volatile(_ASM_SUB "%2,%0; sete %1" ++ asm volatile(_ASM_SUB "%2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_ADD "%2,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -73,7 +154,24 @@ static inline int local_dec_and_test(loc + { + unsigned char c; + +- asm volatile(_ASM_DEC "%0; sete %1" ++ asm volatile(_ASM_DEC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_INC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -91,7 +189,24 @@ static inline int local_inc_and_test(loc + { + unsigned char c; + +- asm volatile(_ASM_INC "%0; sete %1" ++ asm volatile(_ASM_INC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_DEC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -110,7 +225,24 @@ static inline int local_add_negative(lon + { + unsigned char c; + +- asm volatile(_ASM_ADD "%2,%0; sets %1" ++ asm volatile(_ASM_ADD "%2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_SUB "%2,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sets %1\n" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -133,7 +265,23 @@ static inline long local_add_return(long + #endif + /* Modern 486+ processor */ + __i = i; +- asm volatile(_ASM_XADD "%0, %1;" ++ asm volatile(_ASM_XADD "%0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_MOV "%0,%1\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; +diff -urNp linux-2.6.29/arch/x86/include/asm/mach-default/apm.h linux-2.6.29/arch/x86/include/asm/mach-default/apm.h +--- linux-2.6.29/arch/x86/include/asm/mach-default/apm.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/mach-default/apm.h 2009-03-28 14:26:19.000000000 -0400 +@@ -34,7 +34,7 @@ static inline void apm_bios_call_asm(u32 + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" +- "lcall *%%cs:apm_bios_entry\n\t" ++ "lcall *%%ss:apm_bios_entry\n\t" + "setc %%al\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" +@@ -58,7 +58,7 @@ static inline u8 apm_bios_call_simple_as + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" +- "lcall *%%cs:apm_bios_entry\n\t" ++ "lcall *%%ss:apm_bios_entry\n\t" + "setc %%bl\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" +diff -urNp linux-2.6.29/arch/x86/include/asm/mman.h linux-2.6.29/arch/x86/include/asm/mman.h +--- linux-2.6.29/arch/x86/include/asm/mman.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/mman.h 2009-03-28 14:26:19.000000000 -0400 +@@ -17,4 +17,14 @@ + #define MCL_CURRENT 1 /* lock all current mappings */ + #define MCL_FUTURE 2 /* lock all future mappings */ + ++#ifdef __KERNEL__ ++#ifndef __ASSEMBLY__ ++#ifdef CONFIG_X86_32 ++#define arch_mmap_check i386_mmap_check ++int i386_mmap_check(unsigned long addr, unsigned long len, ++ unsigned long flags); ++#endif ++#endif ++#endif ++ + #endif /* _ASM_X86_MMAN_H */ +diff -urNp linux-2.6.29/arch/x86/include/asm/mmu_context_32.h linux-2.6.29/arch/x86/include/asm/mmu_context_32.h +--- linux-2.6.29/arch/x86/include/asm/mmu_context_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/mmu_context_32.h 2009-03-28 14:26:19.000000000 -0400 +@@ -32,6 +32,22 @@ static inline void switch_mm(struct mm_s + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) ++ if (!nx_enabled) { ++ smp_mb__before_clear_bit(); ++ cpu_clear(cpu, prev->context.cpu_user_cs_mask); ++ smp_mb__after_clear_bit(); ++ cpu_set(cpu, next->context.cpu_user_cs_mask); ++ } ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (unlikely(prev->context.user_cs_base != next->context.user_cs_base || ++ prev->context.user_cs_limit != next->context.user_cs_limit)) ++ set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); ++#endif ++ + } + #ifdef CONFIG_SMP + else { +@@ -44,6 +60,19 @@ static inline void switch_mm(struct mm_s + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!nx_enabled) ++ cpu_set(cpu, next->context.cpu_user_cs_mask); ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!((next->pax_flags & MF_PAX_PAGEEXEC) && nx_enabled)) ++#endif ++ set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); ++#endif ++ + } + } + #endif +diff -urNp linux-2.6.29/arch/x86/include/asm/mmu.h linux-2.6.29/arch/x86/include/asm/mmu.h +--- linux-2.6.29/arch/x86/include/asm/mmu.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/mmu.h 2009-03-28 14:26:19.000000000 -0400 +@@ -9,10 +9,23 @@ + * we put the segment information here. + */ + typedef struct { +- void *ldt; ++ struct desc_struct *ldt; + int size; + struct mutex lock; +- void *vdso; ++ unsigned long vdso; ++ ++#ifdef CONFIG_X86_32 ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ unsigned long user_cs_base; ++ unsigned long user_cs_limit; ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) ++ cpumask_t cpu_user_cs_mask; ++#endif ++ ++#endif ++#endif ++ + } mm_context_t; + + #ifdef CONFIG_SMP +diff -urNp linux-2.6.29/arch/x86/include/asm/module.h linux-2.6.29/arch/x86/include/asm/module.h +--- linux-2.6.29/arch/x86/include/asm/module.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/module.h 2009-03-28 14:26:19.000000000 -0400 +@@ -74,7 +74,12 @@ struct mod_arch_specific {}; + # else + # define MODULE_STACKSIZE "" + # endif +-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE ++# ifdef CONFIG_GRKERNSEC ++# define MODULE_GRSEC "GRSECURITY " ++# else ++# define MODULE_GRSEC "" ++# endif ++# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE MODULE_GRSEC + #endif + + #endif /* _ASM_X86_MODULE_H */ +diff -urNp linux-2.6.29/arch/x86/include/asm/page_32.h linux-2.6.29/arch/x86/include/asm/page_32.h +--- linux-2.6.29/arch/x86/include/asm/page_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/page_32.h 2009-03-28 14:26:19.000000000 -0400 +@@ -13,6 +13,23 @@ + */ + #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) + ++#ifdef CONFIG_PAX_KERNEXEC ++#ifndef __ASSEMBLY__ ++extern unsigned char MODULES_VADDR[]; ++extern unsigned char MODULES_END[]; ++extern unsigned char KERNEL_TEXT_OFFSET[]; ++#define ktla_ktva(addr) (addr + (unsigned long)KERNEL_TEXT_OFFSET) ++#define ktva_ktla(addr) (addr - (unsigned long)KERNEL_TEXT_OFFSET) ++#endif ++#else ++#define ktla_ktva(addr) (addr) ++#define ktva_ktla(addr) (addr) ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++#define CONFIG_ARCH_TRACK_EXEC_LIMIT 1 ++#endif ++ + #ifdef CONFIG_4KSTACKS + #define THREAD_ORDER 0 + #else +diff -urNp linux-2.6.29/arch/x86/include/asm/page_64.h linux-2.6.29/arch/x86/include/asm/page_64.h +--- linux-2.6.29/arch/x86/include/asm/page_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/page_64.h 2009-03-28 14:26:19.000000000 -0400 +@@ -49,6 +49,9 @@ + #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) + #define __START_KERNEL_map _AC(0xffffffff80000000, UL) + ++#define ktla_ktva(addr) (addr) ++#define ktva_ktla(addr) (addr) ++ + /* See Documentation/x86_64/mm.txt for a description of the memory map. */ + #define __PHYSICAL_MASK_SHIFT 46 + #define __VIRTUAL_MASK_SHIFT 48 +@@ -101,5 +104,6 @@ extern void init_extra_mapping_wb(unsign + #define pfn_valid(pfn) ((pfn) < max_pfn) + #endif + ++#define nx_enabled (1) + + #endif /* _ASM_X86_PAGE_64_H */ +diff -urNp linux-2.6.29/arch/x86/include/asm/paravirt.h linux-2.6.29/arch/x86/include/asm/paravirt.h +--- linux-2.6.29/arch/x86/include/asm/paravirt.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/paravirt.h 2009-03-28 14:26:19.000000000 -0400 +@@ -1558,7 +1558,7 @@ static inline unsigned long __raw_local_ + #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) + #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +-#define PARA_INDIRECT(addr) *%cs:addr ++#define PARA_INDIRECT(addr) *%ss:addr + #endif + + #define INTERRUPT_RETURN \ +diff -urNp linux-2.6.29/arch/x86/include/asm/pda.h linux-2.6.29/arch/x86/include/asm/pda.h +--- linux-2.6.29/arch/x86/include/asm/pda.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pda.h 2009-03-28 14:26:19.000000000 -0400 +@@ -16,11 +16,9 @@ struct x8664_pda { + unsigned long oldrsp; /* 24 user rsp for system call */ + int irqcount; /* 32 Irq nesting counter. Starts -1 */ + unsigned int cpunumber; /* 36 Logical CPU number */ +-#ifdef CONFIG_CC_STACKPROTECTOR + unsigned long stack_canary; /* 40 stack canary value */ + /* gcc-ABI: this canary MUST be at + offset 40!!! */ +-#endif + char *irqstackptr; + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ +diff -urNp linux-2.6.29/arch/x86/include/asm/percpu.h linux-2.6.29/arch/x86/include/asm/percpu.h +--- linux-2.6.29/arch/x86/include/asm/percpu.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/percpu.h 2009-03-28 14:26:19.000000000 -0400 +@@ -93,6 +93,12 @@ DECLARE_PER_CPU(struct x8664_pda, pda); + + #define __my_cpu_offset x86_read_percpu(this_cpu_off) + ++#include ++#include ++#define __per_cpu_offset __per_cpu_offset ++extern unsigned long __per_cpu_offset[NR_CPUS]; ++#define per_cpu_offset(x) (__per_cpu_offset[x] + (unsigned long)__per_cpu_start) ++ + /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ + #define __percpu_seg "%%fs:" + +diff -urNp linux-2.6.29/arch/x86/include/asm/pgalloc.h linux-2.6.29/arch/x86/include/asm/pgalloc.h +--- linux-2.6.29/arch/x86/include/asm/pgalloc.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pgalloc.h 2009-03-28 14:26:19.000000000 -0400 +@@ -52,7 +52,7 @@ static inline void pmd_populate_kernel(s + pmd_t *pmd, pte_t *pte) + { + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); +- set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); ++ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + + static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, +diff -urNp linux-2.6.29/arch/x86/include/asm/pgtable-2level.h linux-2.6.29/arch/x86/include/asm/pgtable-2level.h +--- linux-2.6.29/arch/x86/include/asm/pgtable-2level.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pgtable-2level.h 2009-03-28 14:26:19.000000000 -0400 +@@ -18,7 +18,19 @@ static inline void native_set_pte(pte_t + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + *pmdp = pmd; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +diff -urNp linux-2.6.29/arch/x86/include/asm/pgtable_32.h linux-2.6.29/arch/x86/include/asm/pgtable_32.h +--- linux-2.6.29/arch/x86/include/asm/pgtable_32.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pgtable_32.h 2009-03-28 14:26:19.000000000 -0400 +@@ -25,8 +25,6 @@ + struct mm_struct; + struct vm_area_struct; + +-extern pgd_t swapper_pg_dir[1024]; +- + static inline void pgtable_cache_init(void) { } + static inline void check_pgt_cache(void) { } + void paging_init(void); +@@ -46,6 +44,11 @@ extern void set_pmd_pfn(unsigned long, u + # include + #endif + ++extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; ++#ifdef CONFIG_X86_PAE ++extern pmd_t swapper_pm_dir[PTRS_PER_PGD][PTRS_PER_PMD]; ++#endif ++ + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) + #define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +@@ -83,7 +86,7 @@ extern void set_pmd_pfn(unsigned long, u + #undef TEST_ACCESS_OK + + /* The boot page tables (all created as a single array) */ +-extern unsigned long pg0[]; ++extern pte_t pg0[]; + + #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) + +@@ -166,6 +169,9 @@ do { \ + + #endif /* !__ASSEMBLY__ */ + ++#define HAVE_ARCH_UNMAPPED_AREA ++#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN ++ + /* + * kern_addr_valid() is (1) for FLATMEM and (0) for + * SPARSEMEM and DISCONTIGMEM +diff -urNp linux-2.6.29/arch/x86/include/asm/pgtable-3level.h linux-2.6.29/arch/x86/include/asm/pgtable-3level.h +--- linux-2.6.29/arch/x86/include/asm/pgtable-3level.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pgtable-3level.h 2009-03-28 14:26:19.000000000 -0400 +@@ -70,12 +70,36 @@ static inline void native_set_pte_atomic + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_set_pud(pud_t *pudp, pud_t pud) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + /* +diff -urNp linux-2.6.29/arch/x86/include/asm/pgtable_64.h linux-2.6.29/arch/x86/include/asm/pgtable_64.h +--- linux-2.6.29/arch/x86/include/asm/pgtable_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pgtable_64.h 2009-03-28 14:26:19.000000000 -0400 +@@ -15,9 +15,12 @@ + + extern pud_t level3_kernel_pgt[512]; + extern pud_t level3_ident_pgt[512]; ++extern pud_t level3_vmalloc_pgt[512]; ++extern pud_t level3_vmemmap_pgt[512]; + extern pmd_t level2_kernel_pgt[512]; + extern pmd_t level2_fixmap_pgt[512]; +-extern pmd_t level2_ident_pgt[512]; ++extern pmd_t level2_ident_pgt[512*4]; ++extern pte_t level1_fixmap_pgt[512]; + extern pgd_t init_level4_pgt[]; + + #define swapper_pg_dir init_level4_pgt +@@ -106,7 +109,19 @@ static inline pte_t native_ptep_get_and_ + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + *pmdp = pmd; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_pmd_clear(pmd_t *pmd) +@@ -158,17 +173,17 @@ static inline void native_pgd_clear(pgd_ + + static inline int pgd_bad(pgd_t pgd) + { +- return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; ++ return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; + } + + static inline int pud_bad(pud_t pud) + { +- return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; ++ return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; + } + + static inline int pmd_bad(pmd_t pmd) + { +- return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; ++ return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; + } + + #define pte_none(x) (!pte_val((x))) +diff -urNp linux-2.6.29/arch/x86/include/asm/pgtable.h linux-2.6.29/arch/x86/include/asm/pgtable.h +--- linux-2.6.29/arch/x86/include/asm/pgtable.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/pgtable.h 2009-03-28 14:26:19.000000000 -0400 +@@ -13,12 +13,11 @@ + #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ + #define _PAGE_BIT_PAT 7 /* on 4KB pages */ + #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ ++#define _PAGE_BIT_SPECIAL 9 /* special mappings, no associated struct page */ + #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ + #define _PAGE_BIT_UNUSED3 11 + #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 +-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 ++#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SPECIAL + #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + + /* If _PAGE_BIT_PRESENT is clear, we use these: */ +@@ -36,7 +35,6 @@ + #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) + #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) + #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) + #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) + #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) + #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +@@ -48,7 +46,7 @@ + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) + #else +-#define _PAGE_NX (_AT(pteval_t, 0)) ++#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) + #endif + + #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) +@@ -85,6 +83,9 @@ + #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) + ++#define PAGE_READONLY_NOEXEC PAGE_READONLY ++#define PAGE_SHARED_NOEXEC PAGE_SHARED ++ + #define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) + #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) +@@ -96,7 +97,7 @@ + #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) + #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +-#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) ++#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_RO | _PAGE_PCD | _PAGE_PWT | _PAGE_USER) + #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) +@@ -155,7 +156,7 @@ + * bits are combined, this will alow user to access the high address mapped + * VDSO in the presence of CONFIG_COMPAT_VDSO + */ +-#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ ++#define PTE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ + #define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ + #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ + #endif +@@ -183,10 +184,17 @@ extern unsigned long empty_zero_page[PAG + extern spinlock_t pgd_lock; + extern struct list_head pgd_list; + ++extern pteval_t __supported_pte_mask; ++ + /* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ ++static inline int pte_user(pte_t pte) ++{ ++ return pte_val(pte) & _PAGE_USER; ++} ++ + static inline int pte_dirty(pte_t pte) + { + return pte_flags(pte) & _PAGE_DIRTY; +@@ -255,9 +263,29 @@ static inline pte_t pte_wrprotect(pte_t + return __pte(pte_val(pte) & ~_PAGE_RW); + } + ++static inline pte_t pte_mkread(pte_t pte) ++{ ++ return __pte(pte_val(pte) | _PAGE_USER); ++} ++ + static inline pte_t pte_mkexec(pte_t pte) + { +- return __pte(pte_val(pte) & ~_PAGE_NX); ++#ifdef CONFIG_X86_PAE ++ if (__supported_pte_mask & _PAGE_NX) ++ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); ++ else ++#endif ++ return __pte(pte_val(pte) | _PAGE_USER); ++} ++ ++static inline pte_t pte_exprotect(pte_t pte) ++{ ++#ifdef CONFIG_X86_PAE ++ if (__supported_pte_mask & _PAGE_NX) ++ return __pte(pte_val(pte) | _PAGE_NX); ++ else ++#endif ++ return __pte(pte_val(pte) & ~_PAGE_USER); + } + + static inline pte_t pte_mkdirty(pte_t pte) +@@ -300,8 +328,6 @@ static inline pte_t pte_mkspecial(pte_t + return __pte(pte_val(pte) | _PAGE_SPECIAL); + } + +-extern pteval_t __supported_pte_mask; +- + /* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. +@@ -601,7 +627,19 @@ static inline void ptep_set_wrprotect(st + */ + static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { +- memcpy(dst, src, count * sizeof(pgd_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ ++ memcpy(dst, src, count * sizeof(pgd_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + +diff -urNp linux-2.6.29/arch/x86/include/asm/processor.h linux-2.6.29/arch/x86/include/asm/processor.h +--- linux-2.6.29/arch/x86/include/asm/processor.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/processor.h 2009-03-28 14:26:19.000000000 -0400 +@@ -275,7 +275,7 @@ struct tss_struct { + + } ____cacheline_aligned; + +-DECLARE_PER_CPU(struct tss_struct, init_tss); ++extern struct tss_struct init_tss[NR_CPUS]; + + /* + * Save the original ist values for checking stack pointers during debugging +@@ -839,11 +839,20 @@ static inline void spin_lock_prefetch(co + * User space process size: 3GB (default). + */ + #define TASK_SIZE PAGE_OFFSET ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++#define SEGMEXEC_TASK_SIZE (TASK_SIZE / 2) ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++#define STACK_TOP ((current->mm->pax_flags & MF_PAX_SEGMEXEC)?SEGMEXEC_TASK_SIZE:TASK_SIZE) ++#else + #define STACK_TOP TASK_SIZE +-#define STACK_TOP_MAX STACK_TOP ++#endif ++#define STACK_TOP_MAX TASK_SIZE + + #define INIT_THREAD { \ +- .sp0 = sizeof(init_stack) + (long)&init_stack, \ ++ .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ +@@ -858,7 +867,7 @@ static inline void spin_lock_prefetch(co + */ + #define INIT_TSS { \ + .x86_tss = { \ +- .sp0 = sizeof(init_stack) + (long)&init_stack, \ ++ .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ + .ss0 = __KERNEL_DS, \ + .ss1 = __KERNEL_CS, \ + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ +@@ -869,11 +878,7 @@ static inline void spin_lock_prefetch(co + extern unsigned long thread_saved_pc(struct task_struct *tsk); + + #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) +-#define KSTK_TOP(info) \ +-({ \ +- unsigned long *__ptr = (unsigned long *)(info); \ +- (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ +-}) ++#define KSTK_TOP(info) ((info)->task.thread.sp0) + + /* + * The below -8 is to reserve 8 bytes on top of the ring0 stack. +@@ -888,7 +893,7 @@ extern unsigned long thread_saved_pc(str + #define task_pt_regs(task) \ + ({ \ + struct pt_regs *__regs__; \ +- __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ ++ __regs__ = (struct pt_regs *)((task)->thread.sp0); \ + __regs__ - 1; \ + }) + +@@ -904,7 +909,7 @@ extern unsigned long thread_saved_pc(str + * space during mmap's. + */ + #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ +- 0xc0000000 : 0xFFFFe000) ++ 0xc0000000 : 0xFFFFf000) + + #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE64) +@@ -941,6 +946,10 @@ extern void start_thread(struct pt_regs + */ + #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + ++#ifdef CONFIG_PAX_SEGMEXEC ++#define SEGMEXEC_TASK_UNMAPPED_BASE (PAGE_ALIGN(SEGMEXEC_TASK_SIZE / 3)) ++#endif ++ + #define KSTK_EIP(task) (task_pt_regs(task)->ip) + + /* Get/set a process' ability to use the timestamp counter instruction */ +diff -urNp linux-2.6.29/arch/x86/include/asm/ptrace.h linux-2.6.29/arch/x86/include/asm/ptrace.h +--- linux-2.6.29/arch/x86/include/asm/ptrace.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/ptrace.h 2009-03-28 14:26:19.000000000 -0400 +@@ -151,28 +151,29 @@ static inline unsigned long regs_return_ + } + + /* +- * user_mode_vm(regs) determines whether a register set came from user mode. ++ * user_mode(regs) determines whether a register set came from user mode. + * This is true if V8086 mode was enabled OR if the register set was from + * protected mode with RPL-3 CS value. This tricky test checks that with + * one comparison. Many places in the kernel can bypass this full check +- * if they have already ruled out V8086 mode, so user_mode(regs) can be used. ++ * if they have already ruled out V8086 mode, so user_mode_novm(regs) can ++ * be used. + */ +-static inline int user_mode(struct pt_regs *regs) ++static inline int user_mode_novm(struct pt_regs *regs) + { + #ifdef CONFIG_X86_32 + return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; + #else +- return !!(regs->cs & 3); ++ return !!(regs->cs & SEGMENT_RPL_MASK); + #endif + } + +-static inline int user_mode_vm(struct pt_regs *regs) ++static inline int user_mode(struct pt_regs *regs) + { + #ifdef CONFIG_X86_32 + return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= + USER_RPL; + #else +- return user_mode(regs); ++ return user_mode_novm(regs); + #endif + } + +diff -urNp linux-2.6.29/arch/x86/include/asm/reboot.h linux-2.6.29/arch/x86/include/asm/reboot.h +--- linux-2.6.29/arch/x86/include/asm/reboot.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/reboot.h 2009-03-28 14:26:19.000000000 -0400 +@@ -18,7 +18,7 @@ extern struct machine_ops machine_ops; + + void native_machine_crash_shutdown(struct pt_regs *regs); + void native_machine_shutdown(void); +-void machine_real_restart(const unsigned char *code, int length); ++void machine_real_restart(const unsigned char *code, unsigned int length); + + typedef void (*nmi_shootdown_cb)(int, struct die_args*); + void nmi_shootdown_cpus(nmi_shootdown_cb callback); +diff -urNp linux-2.6.29/arch/x86/include/asm/rwsem.h linux-2.6.29/arch/x86/include/asm/rwsem.h +--- linux-2.6.29/arch/x86/include/asm/rwsem.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/rwsem.h 2009-03-28 14:26:19.000000000 -0400 +@@ -106,10 +106,26 @@ static inline void __down_read(struct rw + { + asm volatile("# beginning down_read\n\t" + LOCK_PREFIX " incl (%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl (%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* adds 0x00000001, returns the old value */ +- " jns 1f\n" ++ " jns 2f\n" + " call call_rwsem_down_read_failed\n" +- "1:\n\t" ++ "2:\n\t" + "# ending down_read\n\t" + : "+m" (sem->count) + : "a" (sem) +@@ -124,13 +140,29 @@ static inline int __down_read_trylock(st + __s32 result, tmp; + asm volatile("# beginning __down_read_trylock\n\t" + " movl %0,%1\n\t" +- "1:\n\t" ++ "2:\n\t" + " movl %1,%2\n\t" + " addl %3,%2\n\t" +- " jle 2f\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "subl %3,%2\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ " jle 3f\n\t" + LOCK_PREFIX " cmpxchgl %2,%0\n\t" +- " jnz 1b\n\t" +- "2:\n\t" ++ " jnz 2b\n\t" ++ "3:\n\t" + "# ending __down_read_trylock\n\t" + : "+m" (sem->count), "=&a" (result), "=&r" (tmp) + : "i" (RWSEM_ACTIVE_READ_BIAS) +@@ -148,12 +180,28 @@ static inline void __down_write_nested(s + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile("# beginning down_write\n\t" + LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %%edx,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" + /* was the count 0 before? */ +- " jz 1f\n" ++ " jz 2f\n" + " call call_rwsem_down_write_failed\n" +- "1:\n" ++ "2:\n" + "# ending down_write" + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (tmp) +@@ -186,10 +234,26 @@ static inline void __up_read(struct rw_s + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + asm volatile("# beginning __up_read\n\t" + LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %%edx,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* subtracts 1, returns the old value */ +- " jns 1f\n\t" ++ " jns 2f\n\t" + " call call_rwsem_wake\n" +- "1:\n" ++ "2:\n" + "# ending __up_read\n" + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (tmp) +@@ -204,11 +268,27 @@ static inline void __up_write(struct rw_ + asm volatile("# beginning __up_write\n\t" + " movl %2,%%edx\n\t" + LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %%edx,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* tries to transition + 0xffff0001 -> 0x00000000 */ +- " jz 1f\n" ++ " jz 2f\n" + " call call_rwsem_wake\n" +- "1:\n\t" ++ "2:\n\t" + "# ending __up_write\n" + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) +@@ -222,10 +302,26 @@ static inline void __downgrade_write(str + { + asm volatile("# beginning __downgrade_write\n\t" + LOCK_PREFIX " addl %2,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "subl %2,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ +- " jns 1f\n\t" ++ " jns 2f\n\t" + " call call_rwsem_downgrade_wake\n" +- "1:\n\t" ++ "2:\n\t" + "# ending __downgrade_write\n" + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_WAITING_BIAS) +@@ -237,7 +333,23 @@ static inline void __downgrade_write(str + */ + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (sem->count) + : "ir" (delta)); + } +@@ -249,7 +361,23 @@ static inline int rwsem_atomic_update(in + { + int tmp = delta; + +- asm volatile(LOCK_PREFIX "xadd %0,%1" ++ asm volatile(LOCK_PREFIX "xadd %0,%1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %0,%1\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+r" (tmp), "+m" (sem->count) + : : "memory"); + +diff -urNp linux-2.6.29/arch/x86/include/asm/segment.h linux-2.6.29/arch/x86/include/asm/segment.h +--- linux-2.6.29/arch/x86/include/asm/segment.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/segment.h 2009-03-28 14:26:19.000000000 -0400 +@@ -88,13 +88,19 @@ + #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) + #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) + +-#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) ++#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) + #ifdef CONFIG_SMP + #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) + #else + #define __KERNEL_PERCPU 0 + #endif + ++#define GDT_ENTRY_PCIBIOS_CS (GDT_ENTRY_KERNEL_BASE + 16) ++#define __PCIBIOS_CS (GDT_ENTRY_PCIBIOS_CS * 8) ++ ++#define GDT_ENTRY_PCIBIOS_DS (GDT_ENTRY_KERNEL_BASE + 17) ++#define __PCIBIOS_DS (GDT_ENTRY_PCIBIOS_DS * 8) ++ + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 + + /* +@@ -132,7 +138,7 @@ + */ + + /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +-#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) ++#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xFFFCU) == PNP_CS32 || ((x) & 0xFFFCU) == PNP_CS16) + + + #else +diff -urNp linux-2.6.29/arch/x86/include/asm/spinlock.h linux-2.6.29/arch/x86/include/asm/spinlock.h +--- linux-2.6.29/arch/x86/include/asm/spinlock.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/spinlock.h 2009-03-28 14:26:19.000000000 -0400 +@@ -311,18 +311,50 @@ static inline int __raw_write_can_lock(r + static inline void __raw_read_lock(raw_rwlock_t *rw) + { + asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" +- "jns 1f\n" +- "call __read_lock_failed\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" + "1:\n" ++ LOCK_PREFIX " addl $1,(%0)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "jns 2f\n" ++ "call __read_lock_failed\n\t" ++ "2:\n" + ::LOCK_PTR_REG (rw) : "memory"); + } + + static inline void __raw_write_lock(raw_rwlock_t *rw) + { + asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" +- "jz 1f\n" +- "call __write_lock_failed\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" + "1:\n" ++ LOCK_PREFIX " addl %1,(%0)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "jz 2f\n" ++ "call __write_lock_failed\n\t" ++ "2:\n" + ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); + } + +@@ -349,12 +381,45 @@ static inline int __raw_write_trylock(ra + + static inline void __raw_read_unlock(raw_rwlock_t *rw) + { +- asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ :"+m" (rw->lock) : : "memory"); + } + + static inline void __raw_write_unlock(raw_rwlock_t *rw) + { +- asm volatile(LOCK_PREFIX "addl %1, %0" ++ asm volatile(LOCK_PREFIX "addl %1, %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); + } + +diff -urNp linux-2.6.29/arch/x86/include/asm/system.h linux-2.6.29/arch/x86/include/asm/system.h +--- linux-2.6.29/arch/x86/include/asm/system.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/system.h 2009-03-28 14:26:19.000000000 -0400 +@@ -95,6 +95,8 @@ do { \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ ++ "movq %P[task_canary](%%rsi),%%r8\n\t" \ ++ "movq %%r8,%%gs:%P[pda_canary]\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ +@@ -106,7 +108,9 @@ do { \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ +- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ ++ [task_canary] "i" (offsetof(struct task_struct, stack_canary)), \ ++ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ ++ [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ + : "memory", "cc" __EXTRA_CLOBBER) + #endif + +@@ -169,7 +173,7 @@ static inline unsigned long get_limit(un + { + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); +- return __limit + 1; ++ return __limit; + } + + static inline void native_clts(void) +@@ -295,6 +299,21 @@ static inline void native_wbinvd(void) + + #define stts() write_cr0(read_cr0() | X86_CR0_TS) + ++#define pax_open_kernel(cr0) \ ++do { \ ++ typecheck(unsigned long, cr0); \ ++ preempt_disable(); \ ++ cr0 = read_cr0(); \ ++ write_cr0(cr0 & ~X86_CR0_WP); \ ++} while (0) ++ ++#define pax_close_kernel(cr0) \ ++do { \ ++ typecheck(unsigned long, cr0); \ ++ write_cr0(cr0); \ ++ preempt_enable_no_resched(); \ ++} while (0) ++ + #endif /* __KERNEL__ */ + + static inline void clflush(volatile void *__p) +@@ -309,7 +328,7 @@ void enable_hlt(void); + + void cpu_idle_wait(void); + +-extern unsigned long arch_align_stack(unsigned long sp); ++#define arch_align_stack(x) ((x) & ~0xfUL) + extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + + void default_idle(void); +diff -urNp linux-2.6.29/arch/x86/include/asm/uaccess_64.h linux-2.6.29/arch/x86/include/asm/uaccess_64.h +--- linux-2.6.29/arch/x86/include/asm/uaccess_64.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/uaccess_64.h 2009-03-28 14:26:19.000000000 -0400 +@@ -10,6 +10,8 @@ + #include + #include + ++#define set_fs(x) (current_thread_info()->addr_limit = (x)) ++ + /* + * Copy To/From Userspace + */ +diff -urNp linux-2.6.29/arch/x86/include/asm/uaccess.h linux-2.6.29/arch/x86/include/asm/uaccess.h +--- linux-2.6.29/arch/x86/include/asm/uaccess.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/include/asm/uaccess.h 2009-03-28 14:26:19.000000000 -0400 +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #define VERIFY_READ 0 + #define VERIFY_WRITE 1 +@@ -29,7 +30,12 @@ + + #define get_ds() (KERNEL_DS) + #define get_fs() (current_thread_info()->addr_limit) ++#ifdef CONFIG_X86_32 ++void __set_fs(mm_segment_t x, int cpu); ++void set_fs(mm_segment_t x); ++#else + #define set_fs(x) (current_thread_info()->addr_limit = (x)) ++#endif + + #define segment_eq(a, b) ((a).seg == (b).seg) + +@@ -187,9 +193,12 @@ extern int __get_user_bad(void); + + #ifdef CONFIG_X86_32 + #define __put_user_u64(x, addr, err) \ +- asm volatile("1: movl %%eax,0(%2)\n" \ +- "2: movl %%edx,4(%2)\n" \ ++ asm volatile(" movw %w5,%%ds\n" \ ++ "1: movl %%eax,%%ds:0(%2)\n" \ ++ "2: movl %%edx,%%ds:4(%2)\n" \ + "3:\n" \ ++ " pushl %%ss\n" \ ++ " popl %%ds\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ +@@ -197,7 +206,8 @@ extern int __get_user_bad(void); + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=r" (err) \ +- : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) ++ : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err), \ ++ "r"(__USER_DS)) + + #define __put_user_x8(x, ptr, __ret_pu) \ + asm volatile("call __put_user_8" : "=a" (__ret_pu) \ +@@ -338,6 +348,22 @@ do { \ + } \ + } while (0) + ++#ifdef CONFIG_X86_32 ++#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ ++ asm volatile(" movw %w5,%%ds\n" \ ++ "1: mov"itype" %%ds:%2,%"rtype"1\n" \ ++ "2:\n" \ ++ " pushl %%ss\n" \ ++ " popl %%ds\n" \ ++ ".section .fixup,\"ax\"\n" \ ++ "3: movl %3,%0\n" \ ++ " xor"itype" %"rtype"1,%"rtype"1\n" \ ++ " jmp 2b\n" \ ++ ".previous\n" \ ++ _ASM_EXTABLE(1b, 3b) \ ++ : "=r" (err), ltype (x) \ ++ : "m" (__m(addr)), "i" (errret), "0" (err), "r"(__USER_DS)) ++#else + #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ +@@ -349,6 +375,7 @@ do { \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (err), ltype(x) \ + : "m" (__m(addr)), "i" (errret), "0" (err)) ++#endif + + #define __put_user_nocheck(x, ptr, size) \ + ({ \ +@@ -375,6 +402,22 @@ struct __large_struct { unsigned long bu + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ ++#ifdef CONFIG_X86_32 ++#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ ++ asm volatile(" movw %w5,%%ds\n" \ ++ "1: mov"itype" %"rtype"1,%%ds:%2\n" \ ++ "2:\n" \ ++ " pushl %%ss\n" \ ++ " popl %%ds\n" \ ++ ".section .fixup,\"ax\"\n" \ ++ "3: movl %3,%0\n" \ ++ " jmp 2b\n" \ ++ ".previous\n" \ ++ _ASM_EXTABLE(1b, 3b) \ ++ : "=r"(err) \ ++ : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err),\ ++ "r"(__USER_DS)) ++#else + #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ +@@ -385,6 +428,7 @@ struct __large_struct { unsigned long bu + _ASM_EXTABLE(1b, 3b) \ + : "=r"(err) \ + : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) ++#endif + /** + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. +@@ -445,6 +489,7 @@ extern struct movsl_mask { + + #define ARCH_HAS_NOCACHE_UACCESS 1 + ++#define ARCH_HAS_SORT_EXTABLE + #ifdef CONFIG_X86_32 + # include "uaccess_32.h" + #else +diff -urNp linux-2.6.29/arch/x86/Kconfig linux-2.6.29/arch/x86/Kconfig +--- linux-2.6.29/arch/x86/Kconfig 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/Kconfig 2009-03-28 14:26:19.000000000 -0400 +@@ -993,7 +993,7 @@ config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x80000000 if VMSPLIT_2G +- default 0x78000000 if VMSPLIT_2G_OPT ++ default 0x70000000 if VMSPLIT_2G_OPT + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + depends on X86_32 +@@ -1408,8 +1408,7 @@ config KEXEC_JUMP + config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + default "0x1000000" if X86_NUMAQ +- default "0x200000" if X86_64 +- default "0x100000" ++ default "0x200000" + help + This gives the physical address where the kernel is loaded. + +@@ -1501,9 +1500,9 @@ config HOTPLUG_CPU + Say N if you want to disable CPU hotplug. + + config COMPAT_VDSO +- def_bool y ++ def_bool n + prompt "Compat VDSO support" +- depends on X86_32 || IA32_EMULATION ++ depends on (X86_32 || IA32_EMULATION) && !PAX_NOEXEC + help + Map the 32-bit VDSO to the predictable old-style address too. + ---help--- +diff -urNp linux-2.6.29/arch/x86/Kconfig.cpu linux-2.6.29/arch/x86/Kconfig.cpu +--- linux-2.6.29/arch/x86/Kconfig.cpu 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/Kconfig.cpu 2009-03-28 14:26:19.000000000 -0400 +@@ -333,7 +333,7 @@ config X86_PPRO_FENCE + + config X86_F00F_BUG + def_bool y +- depends on M586MMX || M586TSC || M586 || M486 || M386 ++ depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC + + config X86_WP_WORKS_OK + def_bool y +@@ -353,7 +353,7 @@ config X86_POPAD_OK + + config X86_ALIGNMENT_16 + def_bool y +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MCORE2 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + + config X86_INTEL_USERCOPY + def_bool y +@@ -399,7 +399,7 @@ config X86_CMPXCHG64 + # generates cmov. + config X86_CMOV + def_bool y +- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) ++ depends on (MK8 || MK7 || MCORE2 || MPSC || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) + + config X86_MINIMUM_CPU_FAMILY + int +diff -urNp linux-2.6.29/arch/x86/Kconfig.debug linux-2.6.29/arch/x86/Kconfig.debug +--- linux-2.6.29/arch/x86/Kconfig.debug 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/Kconfig.debug 2009-03-28 14:26:19.000000000 -0400 +@@ -107,7 +107,7 @@ config X86_PTDUMP + config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + default y +- depends on DEBUG_KERNEL ++ depends on DEBUG_KERNEL && BROKEN + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const +diff -urNp linux-2.6.29/arch/x86/kernel/acpi/boot.c linux-2.6.29/arch/x86/kernel/acpi/boot.c +--- linux-2.6.29/arch/x86/kernel/acpi/boot.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/acpi/boot.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1705,7 +1705,7 @@ static struct dmi_system_id __initdata a + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, +- {} ++ { NULL, NULL, {{0, {0}}}, NULL} + }; + + /* +diff -urNp linux-2.6.29/arch/x86/kernel/acpi/realmode/wakeup.S linux-2.6.29/arch/x86/kernel/acpi/realmode/wakeup.S +--- linux-2.6.29/arch/x86/kernel/acpi/realmode/wakeup.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/acpi/realmode/wakeup.S 2009-03-28 14:26:19.000000000 -0400 +@@ -104,7 +104,7 @@ _start: + movl %eax, %ecx + orl %edx, %ecx + jz 1f +- movl $0xc0000080, %ecx ++ mov $MSR_EFER, %ecx + wrmsr + 1: + +diff -urNp linux-2.6.29/arch/x86/kernel/acpi/sleep.c linux-2.6.29/arch/x86/kernel/acpi/sleep.c +--- linux-2.6.29/arch/x86/kernel/acpi/sleep.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/acpi/sleep.c 2009-03-28 14:26:19.000000000 -0400 +@@ -37,6 +37,10 @@ int acpi_save_state_mem(void) + { + struct wakeup_header *header; + ++#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) && defined(CONFIG_PAX_KERNEXEC) ++ unsigned long cr0; ++#endif ++ + if (!acpi_realmode) { + printk(KERN_ERR "Could not allocate memory during boot, " + "S3 disabled\n"); +@@ -99,8 +103,18 @@ int acpi_save_state_mem(void) + header->trampoline_segment = setup_trampoline() >> 4; + #ifdef CONFIG_SMP + stack_start.sp = temp_stack + sizeof(temp_stack); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + early_gdt_descr.address = + (unsigned long)get_cpu_gdt_table(smp_processor_id()); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + #endif + initial_code = (unsigned long)wakeup_long64; + saved_magic = 0x123456789abcdef0; +diff -urNp linux-2.6.29/arch/x86/kernel/acpi/wakeup_32.S linux-2.6.29/arch/x86/kernel/acpi/wakeup_32.S +--- linux-2.6.29/arch/x86/kernel/acpi/wakeup_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/acpi/wakeup_32.S 2009-03-28 14:26:19.000000000 -0400 +@@ -30,13 +30,11 @@ wakeup_pmode_return: + # and restore the stack ... but you need gdt for this to work + movl saved_context_esp, %esp + +- movl %cs:saved_magic, %eax +- cmpl $0x12345678, %eax ++ cmpl $0x12345678, saved_magic + jne bogus_magic + + # jump to place where we left off +- movl saved_eip, %eax +- jmp *%eax ++ jmp *(saved_eip) + + bogus_magic: + jmp bogus_magic +diff -urNp linux-2.6.29/arch/x86/kernel/alternative.c linux-2.6.29/arch/x86/kernel/alternative.c +--- linux-2.6.29/arch/x86/kernel/alternative.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/alternative.c 2009-03-28 14:26:19.000000000 -0400 +@@ -393,7 +393,7 @@ void apply_paravirt(struct paravirt_patc + + BUG_ON(p->len > MAX_PATCH_LEN); + /* prep the buffer with the original instructions */ +- memcpy(insnbuf, p->instr, p->len); ++ memcpy(insnbuf, ktla_ktva(p->instr), p->len); + used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, + (unsigned long)p->instr, p->len); + +@@ -473,11 +473,26 @@ void __init alternative_instructions(voi + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +-void *text_poke_early(void *addr, const void *opcode, size_t len) ++void *__kprobes text_poke_early(void *addr, const void *opcode, size_t len) + { + unsigned long flags; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + local_irq_save(flags); +- memcpy(addr, opcode, len); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ memcpy(ktla_ktva(addr), opcode, len); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + local_irq_restore(flags); + sync_core(); + /* Could also do a CLFLUSH here to speed up CPU recovery; but +@@ -498,33 +513,27 @@ void *text_poke_early(void *addr, const + */ + void *__kprobes text_poke(void *addr, const void *opcode, size_t len) + { +- unsigned long flags; +- char *vaddr; +- int nr_pages = 2; ++ unsigned char *vaddr = ktla_ktva(addr); + struct page *pages[2]; +- int i; ++ size_t i; ++ ++ if (!core_kernel_text((unsigned long)addr) + +- if (!core_kernel_text((unsigned long)addr)) { +- pages[0] = vmalloc_to_page(addr); +- pages[1] = vmalloc_to_page(addr + PAGE_SIZE); ++#if defined(CONFIG_X86_32) && defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ && (vaddr < MODULES_VADDR || MODULES_END < vaddr) ++#endif ++ ++ ) { ++ pages[0] = vmalloc_to_page(vaddr); ++ pages[1] = vmalloc_to_page(vaddr + PAGE_SIZE); + } else { +- pages[0] = virt_to_page(addr); ++ pages[0] = virt_to_page(vaddr); + WARN_ON(!PageReserved(pages[0])); +- pages[1] = virt_to_page(addr + PAGE_SIZE); ++ pages[1] = virt_to_page(vaddr + PAGE_SIZE); + } + BUG_ON(!pages[0]); +- if (!pages[1]) +- nr_pages = 1; +- vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); +- BUG_ON(!vaddr); +- local_irq_save(flags); +- memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); +- local_irq_restore(flags); +- vunmap(vaddr); +- sync_core(); +- /* Could also do a CLFLUSH here to speed up CPU recovery; but +- that causes hangs on some VIA CPUs. */ ++ text_poke_early(addr, opcode, len); + for (i = 0; i < len; i++) +- BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); ++ BUG_ON((vaddr)[i] != ((unsigned char *)opcode)[i]); + return addr; + } +diff -urNp linux-2.6.29/arch/x86/kernel/apm_32.c linux-2.6.29/arch/x86/kernel/apm_32.c +--- linux-2.6.29/arch/x86/kernel/apm_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/apm_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -403,7 +403,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitq + static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); + static struct apm_user *user_list; + static DEFINE_SPINLOCK(user_list_lock); +-static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; ++static const struct desc_struct bad_bios_desc = { { { 0, 0x00409300 } } }; + + static const char driver_version[] = "1.16ac"; /* no spaces */ + +@@ -598,19 +598,42 @@ static u8 apm_bios_call(u32 func, u32 eb + struct desc_struct save_desc_40; + struct desc_struct *gdt; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + cpus = apm_save_cpus(); + + cpu = get_cpu(); + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = bad_bios_desc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + apm_irq_save(flags); + APM_DO_SAVE_SEGS; + apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); + APM_DO_RESTORE_SEGS; + apm_irq_restore(flags); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = save_desc_40; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + put_cpu(); + apm_restore_cpus(cpus); + +@@ -641,19 +664,42 @@ static u8 apm_bios_call_simple(u32 func, + struct desc_struct save_desc_40; + struct desc_struct *gdt; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + cpus = apm_save_cpus(); + + cpu = get_cpu(); + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = bad_bios_desc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + apm_irq_save(flags); + APM_DO_SAVE_SEGS; + error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); + APM_DO_RESTORE_SEGS; + apm_irq_restore(flags); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = save_desc_40; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + put_cpu(); + apm_restore_cpus(cpus); + return error; +@@ -925,7 +971,7 @@ recalc: + + static void apm_power_off(void) + { +- unsigned char po_bios_call[] = { ++ const unsigned char po_bios_call[] = { + 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ + 0x8e, 0xd0, /* movw ax,ss */ + 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ +@@ -1876,7 +1922,10 @@ static const struct file_operations apm_ + static struct miscdevice apm_device = { + APM_MINOR_DEV, + "apm_bios", +- &apm_bios_fops ++ &apm_bios_fops, ++ {NULL, NULL}, ++ NULL, ++ NULL + }; + + +@@ -2197,7 +2246,7 @@ static struct dmi_system_id __initdata a + { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, + }, + +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL} + }; + + /* +@@ -2215,6 +2264,10 @@ static int __init apm_init(void) + struct desc_struct *gdt; + int err; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + dmi_check_system(apm_dmi_table); + + if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { +@@ -2288,9 +2341,18 @@ static int __init apm_init(void) + * This is for buggy BIOS's that refer to (real mode) segment 0x40 + * even though they are called in protected mode. + */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); + _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * Set up the long jump entry point to the APM BIOS, which is called + * from inline assembly. +@@ -2309,6 +2371,11 @@ static int __init apm_init(void) + * code to that CPU. + */ + gdt = get_cpu_gdt_table(0); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_base(gdt[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt[APM_CS_16 >> 3], +@@ -2316,6 +2383,10 @@ static int __init apm_init(void) + set_base(gdt[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + proc_create("apm", 0, NULL, &apm_file_ops); + + kapmd_task = kthread_create(apm, NULL, "kapmd"); +diff -urNp linux-2.6.29/arch/x86/kernel/asm-offsets_32.c linux-2.6.29/arch/x86/kernel/asm-offsets_32.c +--- linux-2.6.29/arch/x86/kernel/asm-offsets_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/asm-offsets_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -100,6 +100,7 @@ void foo(void) + DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); + DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); + DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); ++ DEFINE(PERCPU_MODULE_RESERVE, PERCPU_MODULE_RESERVE); + + OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + +@@ -113,6 +114,7 @@ void foo(void) + OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); ++ OFFSET(PV_CPU_write_cr0, pv_cpu_ops, write_cr0); + #endif + + #ifdef CONFIG_XEN +diff -urNp linux-2.6.29/arch/x86/kernel/asm-offsets_64.c linux-2.6.29/arch/x86/kernel/asm-offsets_64.c +--- linux-2.6.29/arch/x86/kernel/asm-offsets_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/asm-offsets_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -124,6 +124,7 @@ int main(void) + ENTRY(cr8); + BLANK(); + #undef ENTRY ++ DEFINE(TSS_size, sizeof(struct tss_struct)); + DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); + BLANK(); + DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); +diff -urNp linux-2.6.29/arch/x86/kernel/cpu/common.c linux-2.6.29/arch/x86/kernel/cpu/common.c +--- linux-2.6.29/arch/x86/kernel/cpu/common.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/cpu/common.c 2009-03-28 14:26:19.000000000 -0400 +@@ -2,7 +2,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -62,59 +61,6 @@ cpumask_t cpu_sibling_setup_map; + + static struct cpu_dev *this_cpu __cpuinitdata; + +-#ifdef CONFIG_X86_64 +-/* We need valid kernel segments for data and code in long mode too +- * IRET will check the segment types kkeil 2000/10/28 +- * Also sysret mandates a special GDT layout +- */ +-/* The TLS descriptors are currently at a different place compared to i386. +- Hopefully nobody expects them at a fixed place (Wine?) */ +-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, +- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +-} }; +-#else +-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, +- /* +- * Segments used for calling PnP BIOS have byte granularity. +- * They code segments and data segments have fixed 64k limits, +- * the transfer segment sizes are set at run time. +- */ +- /* 32-bit code */ +- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, +- /* 16-bit code */ +- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, +- /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, +- /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, +- /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, +- /* +- * The APM segments have byte granularity and their bases +- * are set at run time. All have 64k limits. +- */ +- /* 32-bit code */ +- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, +- /* 16-bit code */ +- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, +- /* data */ +- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, +- +- [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, +- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +-} }; +-#endif +-EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +- + #ifdef CONFIG_X86_32 + static int cachesize_override __cpuinitdata = -1; + static int disable_x86_serial_nr __cpuinitdata = 1; +@@ -248,7 +194,7 @@ void switch_to_new_gdt(void) + { + struct desc_ptr gdt_descr; + +- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); ++ gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + #ifdef CONFIG_X86_32 +@@ -708,6 +654,10 @@ static void __cpuinit identify_cpu(struc + * we do "generic changes." + */ + ++#if defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) ++ setup_clear_cpu_cap(X86_FEATURE_SEP); ++#endif ++ + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + char *p; +@@ -880,7 +830,7 @@ __setup("clearcpuid=", setup_disablecpui + struct x8664_pda **_cpu_pda __read_mostly; + EXPORT_SYMBOL(_cpu_pda); + +-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; ++struct desc_ptr idt_descr __read_only = { 256 * 16 - 1, (unsigned long) idt_table }; + + static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +@@ -979,7 +929,7 @@ struct pt_regs * __cpuinit idle_regs(str + void __cpuinit cpu_init(void) + { + int cpu = stack_smp_processor_id(); +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; +@@ -1100,7 +1050,7 @@ void __cpuinit cpu_init(void) + { + int cpu = smp_processor_id(); + struct task_struct *curr = current; +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; + struct thread_struct *thread = &curr->thread; + + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { +diff -urNp linux-2.6.29/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c linux-2.6.29/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +--- linux-2.6.29/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c 2009-03-28 14:26:19.000000000 -0400 +@@ -581,7 +581,7 @@ static const struct dmi_system_id sw_any + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + #endif + +diff -urNp linux-2.6.29/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c linux-2.6.29/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +--- linux-2.6.29/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c 2009-03-28 14:26:19.000000000 -0400 +@@ -225,7 +225,7 @@ static struct cpu_model models[] = + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, + +- { NULL, } ++ { NULL, NULL, 0, NULL} + }; + #undef _BANIAS + #undef BANIAS +diff -urNp linux-2.6.29/arch/x86/kernel/cpu/intel.c linux-2.6.29/arch/x86/kernel/cpu/intel.c +--- linux-2.6.29/arch/x86/kernel/cpu/intel.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/cpu/intel.c 2009-03-28 14:26:19.000000000 -0400 +@@ -94,7 +94,7 @@ static void __cpuinit trap_init_f00f_bug + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ +- idt_descr.address = fix_to_virt(FIX_F00F_IDT); ++ idt_descr.address = (struct desc_struct *)fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); + } + #endif +diff -urNp linux-2.6.29/arch/x86/kernel/cpu/mcheck/mce_64.c linux-2.6.29/arch/x86/kernel/cpu/mcheck/mce_64.c +--- linux-2.6.29/arch/x86/kernel/cpu/mcheck/mce_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/cpu/mcheck/mce_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -678,6 +678,7 @@ static struct miscdevice mce_log_device + MISC_MCELOG_MINOR, + "mcelog", + &mce_chrdev_ops, ++ {NULL, NULL}, NULL, NULL + }; + + static unsigned long old_cr4 __initdata; +diff -urNp linux-2.6.29/arch/x86/kernel/cpu/mtrr/generic.c linux-2.6.29/arch/x86/kernel/cpu/mtrr/generic.c +--- linux-2.6.29/arch/x86/kernel/cpu/mtrr/generic.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/cpu/mtrr/generic.c 2009-03-28 14:26:19.000000000 -0400 +@@ -23,14 +23,14 @@ static struct fixed_range_block fixed_ra + { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ + { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ + { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ +- {} ++ { 0, 0 } + }; + + static unsigned long smp_changes_mask; + static int mtrr_state_set; + u64 mtrr_tom2; + +-struct mtrr_state_type mtrr_state = {}; ++struct mtrr_state_type mtrr_state; + EXPORT_SYMBOL_GPL(mtrr_state); + + static int __initdata mtrr_show; +diff -urNp linux-2.6.29/arch/x86/kernel/crash.c linux-2.6.29/arch/x86/kernel/crash.c +--- linux-2.6.29/arch/x86/kernel/crash.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/crash.c 2009-03-28 14:26:19.000000000 -0400 +@@ -43,7 +43,7 @@ static void kdump_nmi_callback(int cpu, + regs = args->regs; + + #ifdef CONFIG_X86_32 +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + crash_fixup_ss_esp(&fixed_regs, regs); + regs = &fixed_regs; + } +diff -urNp linux-2.6.29/arch/x86/kernel/doublefault_32.c linux-2.6.29/arch/x86/kernel/doublefault_32.c +--- linux-2.6.29/arch/x86/kernel/doublefault_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/doublefault_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -11,7 +11,7 @@ + + #define DOUBLEFAULT_STACKSIZE (1024) + static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; +-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) ++#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) + + #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) + +@@ -21,7 +21,7 @@ static void doublefault_fn(void) + unsigned long gdt, tss; + + store_gdt(&gdt_desc); +- gdt = gdt_desc.address; ++ gdt = (unsigned long)gdt_desc.address; + + printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); + +@@ -60,10 +60,10 @@ struct tss_struct doublefault_tss __cach + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .sp = STACK_START, +- .es = __USER_DS, ++ .es = __KERNEL_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, +- .ds = __USER_DS, ++ .ds = __KERNEL_DS, + .fs = __KERNEL_PERCPU, + + .__cr3 = __pa_nodebug(swapper_pg_dir), +diff -urNp linux-2.6.29/arch/x86/kernel/dumpstack_32.c linux-2.6.29/arch/x86/kernel/dumpstack_32.c +--- linux-2.6.29/arch/x86/kernel/dumpstack_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/dumpstack_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -107,11 +107,12 @@ void show_registers(struct pt_regs *regs + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + u8 *ip; ++ unsigned long cs_base = get_desc_base(&get_cpu_gdt_table(smp_processor_id())[(0xffff & regs->cs) >> 3]); + + printk(KERN_EMERG "Stack:\n"); + show_stack_log_lvl(NULL, regs, ®s->sp, +@@ -119,10 +120,10 @@ void show_registers(struct pt_regs *regs + + printk(KERN_EMERG "Code: "); + +- ip = (u8 *)regs->ip - code_prologue; ++ ip = (u8 *)regs->ip - code_prologue + cs_base; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at IP */ +- ip = (u8 *)regs->ip; ++ ip = (u8 *)regs->ip + cs_base; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { +@@ -131,7 +132,7 @@ void show_registers(struct pt_regs *regs + printk(" Bad EIP value."); + break; + } +- if (ip == (u8 *)regs->ip) ++ if (ip == (u8 *)regs->ip + cs_base) + printk("<%02x> ", c); + else + printk("%02x ", c); +@@ -144,6 +145,7 @@ int is_valid_bugaddr(unsigned long ip) + { + unsigned short ud2; + ++ ip = ktla_ktva(ip); + if (ip < PAGE_OFFSET) + return 0; + if (probe_kernel_address((unsigned short *)ip, ud2)) +diff -urNp linux-2.6.29/arch/x86/kernel/dumpstack.c linux-2.6.29/arch/x86/kernel/dumpstack.c +--- linux-2.6.29/arch/x86/kernel/dumpstack.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/dumpstack.c 2009-03-28 14:26:19.000000000 -0400 +@@ -288,7 +288,7 @@ void die(const char *str, struct pt_regs + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + +- if (!user_mode_vm(regs)) ++ if (!user_mode(regs)) + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) +diff -urNp linux-2.6.29/arch/x86/kernel/efi_32.c linux-2.6.29/arch/x86/kernel/efi_32.c +--- linux-2.6.29/arch/x86/kernel/efi_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/efi_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -38,70 +38,38 @@ + */ + + static unsigned long efi_rt_eflags; +-static pgd_t efi_bak_pg_dir_pointer[2]; ++static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS]; + +-void efi_call_phys_prelog(void) ++void __init efi_call_phys_prelog(void) + { +- unsigned long cr4; +- unsigned long temp; + struct desc_ptr gdt_descr; + + local_irq_save(efi_rt_eflags); + +- /* +- * If I don't have PAE, I should just duplicate two entries in page +- * directory. If I have PAE, I just need to duplicate one entry in +- * page directory. +- */ +- cr4 = read_cr4_safe(); + +- if (cr4 & X86_CR4_PAE) { +- efi_bak_pg_dir_pointer[0].pgd = +- swapper_pg_dir[pgd_index(0)].pgd; +- swapper_pg_dir[0].pgd = +- swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; +- } else { +- efi_bak_pg_dir_pointer[0].pgd = +- swapper_pg_dir[pgd_index(0)].pgd; +- efi_bak_pg_dir_pointer[1].pgd = +- swapper_pg_dir[pgd_index(0x400000)].pgd; +- swapper_pg_dir[pgd_index(0)].pgd = +- swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; +- temp = PAGE_OFFSET + 0x400000; +- swapper_pg_dir[pgd_index(0x400000)].pgd = +- swapper_pg_dir[pgd_index(temp)].pgd; +- } ++ clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); ++ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, ++ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); + + /* + * After the lock is released, the original page table is restored. + */ + __flush_tlb_all(); + +- gdt_descr.address = __pa(get_cpu_gdt_table(0)); ++ gdt_descr.address = (struct desc_struct *)__pa(get_cpu_gdt_table(0)); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + } + +-void efi_call_phys_epilog(void) ++void __init efi_call_phys_epilog(void) + { +- unsigned long cr4; + struct desc_ptr gdt_descr; + +- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); ++ gdt_descr.address = get_cpu_gdt_table(0); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + +- cr4 = read_cr4_safe(); +- +- if (cr4 & X86_CR4_PAE) { +- swapper_pg_dir[pgd_index(0)].pgd = +- efi_bak_pg_dir_pointer[0].pgd; +- } else { +- swapper_pg_dir[pgd_index(0)].pgd = +- efi_bak_pg_dir_pointer[0].pgd; +- swapper_pg_dir[pgd_index(0x400000)].pgd = +- efi_bak_pg_dir_pointer[1].pgd; +- } ++ clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); + + /* + * After the lock is released, the original page table is restored. +diff -urNp linux-2.6.29/arch/x86/kernel/efi_stub_32.S linux-2.6.29/arch/x86/kernel/efi_stub_32.S +--- linux-2.6.29/arch/x86/kernel/efi_stub_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/efi_stub_32.S 2009-03-28 14:26:19.000000000 -0400 +@@ -6,6 +6,7 @@ + */ + + #include ++#include + #include + + /* +@@ -20,7 +21,7 @@ + * service functions will comply with gcc calling convention, too. + */ + +-.text ++__INIT + ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been +@@ -36,9 +37,7 @@ ENTRY(efi_call_phys) + * The mapping of lower virtual memory has been created in prelog and + * epilog. + */ +- movl $1f, %edx +- subl $__PAGE_OFFSET, %edx +- jmp *%edx ++ jmp 1f-__PAGE_OFFSET + 1: + + /* +@@ -47,14 +46,8 @@ ENTRY(efi_call_phys) + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ +- popl %edx +- movl %edx, saved_return_addr +- /* get the function pointer into ECX*/ +- popl %ecx +- movl %ecx, efi_rt_function_ptr +- movl $2f, %edx +- subl $__PAGE_OFFSET, %edx +- pushl %edx ++ popl (saved_return_addr) ++ popl (efi_rt_function_ptr) + + /* + * 3. Clear PG bit in %CR0. +@@ -73,9 +66,8 @@ ENTRY(efi_call_phys) + /* + * 5. Call the physical function. + */ +- jmp *%ecx ++ call *(efi_rt_function_ptr-__PAGE_OFFSET) + +-2: + /* + * 6. After EFI runtime service returns, control will return to + * following instruction. We'd better readjust stack pointer first. +@@ -88,34 +80,27 @@ ENTRY(efi_call_phys) + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 +- jmp 1f +-1: ++ + /* + * 8. Now restore the virtual mode from flat mode by + * adding EIP with PAGE_OFFSET. + */ +- movl $1f, %edx +- jmp *%edx ++ jmp 1f+__PAGE_OFFSET + 1: + + /* + * 9. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. + */ +- leal efi_rt_function_ptr, %edx +- movl (%edx), %ecx +- pushl %ecx ++ pushl (efi_rt_function_ptr) + + /* +- * 10. Push the saved return address onto the stack and return. ++ * 10. Return to the saved return address. + */ +- leal saved_return_addr, %edx +- movl (%edx), %ecx +- pushl %ecx +- ret ++ jmpl *(saved_return_addr) + .previous + +-.data ++__INITDATA + saved_return_addr: + .long 0 + efi_rt_function_ptr: +diff -urNp linux-2.6.29/arch/x86/kernel/entry_32.S linux-2.6.29/arch/x86/kernel/entry_32.S +--- linux-2.6.29/arch/x86/kernel/entry_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/entry_32.S 2009-03-28 14:26:19.000000000 -0400 +@@ -101,7 +101,7 @@ + #define resume_userspace_sig resume_userspace + #endif + +-#define SAVE_ALL \ ++#define __SAVE_ALL(_DS) \ + cld; \ + pushl %fs; \ + CFI_ADJUST_CFA_OFFSET 4;\ +@@ -133,12 +133,26 @@ + pushl %ebx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ebx, 0;\ +- movl $(__USER_DS), %edx; \ ++ movl $(_DS), %edx; \ + movl %edx, %ds; \ + movl %edx, %es; \ + movl $(__KERNEL_PERCPU), %edx; \ + movl %edx, %fs + ++#ifdef CONFIG_PAX_KERNEXEC ++#define SAVE_ALL \ ++ __SAVE_ALL(__KERNEL_DS); \ ++ GET_CR0_INTO_EDX; \ ++ movl %edx, %esi; \ ++ orl $X86_CR0_WP, %edx; \ ++ xorl %edx, %esi; \ ++ SET_CR0_FROM_EDX ++#elif defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) ++#define SAVE_ALL __SAVE_ALL(__KERNEL_DS) ++#else ++#define SAVE_ALL __SAVE_ALL(__USER_DS) ++#endif ++ + #define RESTORE_INT_REGS \ + popl %ebx; \ + CFI_ADJUST_CFA_OFFSET -4;\ +@@ -229,6 +243,11 @@ ENTRY(ret_from_fork) + CFI_ADJUST_CFA_OFFSET 4 + popfl + CFI_ADJUST_CFA_OFFSET -4 ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ xorl %esi, %esi ++#endif ++ + jmp syscall_exit + CFI_ENDPROC + END(ret_from_fork) +@@ -252,7 +271,17 @@ check_userspace: + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ jae resume_userspace ++ ++ GET_CR0_INTO_EDX ++ xorl %esi, %edx ++ SET_CR0_FROM_EDX ++ jmp resume_kernel ++#else + jb resume_kernel # not returning to v8086 or userspace ++#endif + + ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT +@@ -314,10 +343,9 @@ sysenter_past_esp: + /*CFI_REL_OFFSET cs, 0*/ + /* + * Push current_thread_info()->sysenter_return to the stack. +- * A tiny bit of offset fixup is necessary - 4*4 means the 4 words +- * pushed above; +8 corresponds to copy_thread's esp0 setting. + */ +- pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++ GET_THREAD_INFO(%ebp) ++ pushl TI_sysenter_return(%ebp) + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eip, 0 + +@@ -330,9 +358,19 @@ sysenter_past_esp: + * Load the potential sixth argument from user stack. + * Careful about security. + */ ++ movl PT_OLDESP(%esp),%ebp ++ ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ mov PT_OLDSS(%esp),%ds ++1: movl %ds:(%ebp),%ebp ++ push %ss ++ pop %ds ++#else + cmpl $__PAGE_OFFSET-3,%ebp + jae syscall_fault + 1: movl (%ebp),%ebp ++#endif ++ + movl %ebp,PT_EBP(%esp) + .section __ex_table,"a" + .align 4 +@@ -356,12 +394,23 @@ sysenter_do_call: + testw $_TIF_ALLWORK_MASK, %cx + jne sysexit_audit + sysenter_exit: ++ ++#ifdef CONFIG_PAX_RANDKSTACK ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ call pax_randomize_kstack ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++#endif ++ + /* if something modifies registers it must also disable sysexit */ + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp + TRACE_IRQS_ON + 1: mov PT_FS(%esp), %fs ++2: mov PT_DS(%esp), %ds ++3: mov PT_ES(%esp), %es + ENABLE_INTERRUPTS_SYSEXIT + + #ifdef CONFIG_AUDITSYSCALL +@@ -404,11 +453,17 @@ sysexit_audit: + + CFI_ENDPROC + .pushsection .fixup,"ax" +-2: movl $0,PT_FS(%esp) ++4: movl $0,PT_FS(%esp) ++ jmp 1b ++5: movl $0,PT_DS(%esp) ++ jmp 1b ++6: movl $0,PT_ES(%esp) + jmp 1b + .section __ex_table,"a" + .align 4 +- .long 1b,2b ++ .long 1b,4b ++ .long 2b,5b ++ .long 3b,6b + .popsection + ENDPROC(ia32_sysenter_target) + +@@ -438,6 +493,10 @@ syscall_exit: + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + ++#ifdef CONFIG_PAX_RANDKSTACK ++ call pax_randomize_kstack ++#endif ++ + restore_all: + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we +@@ -531,25 +590,19 @@ work_resched: + + work_notifysig: # deal with pending signals and + # notify-resume requests ++ movl %esp, %eax + #ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) +- movl %esp, %eax +- jne work_notifysig_v86 # returning to kernel-space or ++ jz 1f # returning to kernel-space or + # vm86-space +- xorl %edx, %edx +- call do_notify_resume +- jmp resume_userspace_sig + +- ALIGN +-work_notifysig_v86: + pushl %ecx # save ti_flags for do_notify_resume + CFI_ADJUST_CFA_OFFSET 4 + call save_v86_state # %eax contains pt_regs pointer + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + movl %eax, %esp +-#else +- movl %esp, %eax ++1: + #endif + xorl %edx, %edx + call do_notify_resume +@@ -584,6 +637,10 @@ END(syscall_exit_work) + + RING0_INT_FRAME # can't unwind into user space anyway + syscall_fault: ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ push %ss ++ pop %ds ++#endif + GET_THREAD_INFO(%ebp) + movl $-EFAULT,PT_EAX(%esp) + jmp resume_userspace +@@ -595,17 +652,24 @@ syscall_badsys: + END(syscall_badsys) + CFI_ENDPROC + +-#define FIXUP_ESPFIX_STACK \ +- /* since we are on a wrong stack, we cant make it a C code :( */ \ +- PER_CPU(gdt_page, %ebx); \ +- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ +- addl %esp, %eax; \ +- pushl $__KERNEL_DS; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl %eax; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- lss (%esp), %esp; \ ++.macro FIXUP_ESPFIX_STACK ++ /* since we are on a wrong stack, we cant make it a C code :( */ ++#ifdef CONFIG_SMP ++ movl PER_CPU_VAR(cpu_number), %ebx; ++ shll $PAGE_SHIFT_asm, %ebx; ++ addl $cpu_gdt_table, %ebx; ++#else ++ movl $cpu_gdt_table, %ebx; ++#endif ++ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); ++ addl %esp, %eax; ++ pushl $__KERNEL_DS; ++ CFI_ADJUST_CFA_OFFSET 4; ++ pushl %eax; ++ CFI_ADJUST_CFA_OFFSET 4; ++ lss (%esp), %esp; + CFI_ADJUST_CFA_OFFSET -8; ++.endm + #define UNWIND_ESPFIX_STACK \ + movl %ss, %eax; \ + /* see if on espfix stack */ \ +@@ -1052,7 +1116,6 @@ return_to_handler: + ret + #endif + +-.section .rodata,"a" + #include "syscall_table_32.S" + + syscall_table_size=(.-sys_call_table) +@@ -1106,12 +1169,21 @@ error_code: + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ GET_CR0_INTO_EDX ++ movl %edx, %esi ++ orl $X86_CR0_WP, %edx ++ xorl %edx, %esi ++ SET_CR0_FROM_EDX ++#endif ++ + movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ +- movl $(__USER_DS), %ecx ++ movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + TRACE_IRQS_OFF +@@ -1206,6 +1278,13 @@ nmi_stack_correct: + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ GET_CR0_INTO_EDX ++ xorl %esi, %edx ++ SET_CR0_FROM_EDX ++#endif ++ + jmp restore_nocheck_notrace + CFI_ENDPROC + +@@ -1246,6 +1325,13 @@ nmi_espfix_stack: + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ GET_CR0_INTO_EDX ++ xorl %esi, %edx ++ SET_CR0_FROM_EDX ++#endif ++ + RESTORE_REGS + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 +diff -urNp linux-2.6.29/arch/x86/kernel/entry_64.S linux-2.6.29/arch/x86/kernel/entry_64.S +--- linux-2.6.29/arch/x86/kernel/entry_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/entry_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -1073,10 +1073,11 @@ ENTRY(\sym) + TRACE_IRQS_OFF + movq %rsp,%rdi /* pt_regs pointer */ + xorl %esi,%esi /* no error code */ +- movq %gs:pda_data_offset, %rbp +- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) ++ imul $TSS_size, %gs:pda_cpunumber, %ebp ++ lea init_tss(%rbp), %rbp ++ subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + call \do_sym +- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) ++ addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + END(\sym) +diff -urNp linux-2.6.29/arch/x86/kernel/ftrace.c linux-2.6.29/arch/x86/kernel/ftrace.c +--- linux-2.6.29/arch/x86/kernel/ftrace.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/ftrace.c 2009-03-28 14:26:19.000000000 -0400 +@@ -250,9 +250,9 @@ int ftrace_update_ftrace_func(ftrace_fun + unsigned char old[MCOUNT_INSN_SIZE], *new; + int ret; + +- memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); ++ memcpy(old, (void *)ktla_ktva((unsigned long)ftrace_call), MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); +- ret = ftrace_modify_code(ip, old, new); ++ ret = ftrace_modify_code(ktla_ktva(ip), old, new); + + return ret; + } +diff -urNp linux-2.6.29/arch/x86/kernel/head32.c linux-2.6.29/arch/x86/kernel/head32.c +--- linux-2.6.29/arch/x86/kernel/head32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/head32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -13,12 +13,13 @@ + #include + #include + #include ++#include + + void __init i386_start_kernel(void) + { + reserve_trampoline_memory(); + +- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); ++ reserve_early(LOAD_PHYSICAL_ADDR, __pa_symbol(&_end), "TEXT DATA BSS"); + + #ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ +diff -urNp linux-2.6.29/arch/x86/kernel/head_32.S linux-2.6.29/arch/x86/kernel/head_32.S +--- linux-2.6.29/arch/x86/kernel/head_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/head_32.S 2009-03-28 14:26:19.000000000 -0400 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + /* Physical address */ + #define pa(X) ((X) - __PAGE_OFFSET) +@@ -64,17 +65,22 @@ LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) + LOW_PAGES = LOW_PAGES + 0x1000000 + #endif + +-#if PTRS_PER_PMD > 1 +-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD +-#else +-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) +-#endif ++PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PTE) + BOOTBITMAP_SIZE = LOW_PAGES / 8 + ALLOCATOR_SLOP = 4 + + INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm + + /* ++ * Real beginning of normal "text" segment ++ */ ++ENTRY(stext) ++ENTRY(_stext) ++ ++.section .text.startup,"ax",@progbits ++ ljmp $(__BOOT_CS),$phys_startup_32 ++ ++/* + * 32-bit kernel entrypoint; only used by the boot CPU. On entry, + * %esi points to the real-mode code as a 32-bit pointer. + * CS and DS must be 4 GB flat segments, but we don't depend on +@@ -82,6 +88,12 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + + * can. + */ + .section .text.head,"ax",@progbits ++ ++#ifdef CONFIG_PAX_KERNEXEC ++/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ ++.fill 4096,1,0xcc ++#endif ++ + ENTRY(startup_32) + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + us to not reload segments */ +@@ -99,6 +111,56 @@ ENTRY(startup_32) + movl %eax,%gs + 2: + ++ movl $pa(cpu_gdt_table),%edi ++ movl $__per_cpu_start,%eax ++ movw %ax,__KERNEL_PERCPU + 2(%edi) ++ rorl $16,%eax ++ movb %al,__KERNEL_PERCPU + 4(%edi) ++ movb %ah,__KERNEL_PERCPU + 7(%edi) ++ movl $__per_cpu_end + PERCPU_MODULE_RESERVE - 1,%eax ++ subl $__per_cpu_start,%eax ++ movw %ax,__KERNEL_PERCPU + 0(%edi) ++ ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ /* check for VMware */ ++ movl $0x564d5868,%eax ++ xorl %ebx,%ebx ++ movl $0xa,%ecx ++ movl $0x5658,%edx ++ in (%dx),%eax ++ cmpl $0x564d5868,%ebx ++ jz 2f ++ ++ movl $NR_CPUS,%ecx ++ movl $pa(cpu_gdt_table),%edi ++1: ++ movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),GDT_ENTRY_KERNEL_DS * 8 + 4(%edi) ++ addl $PAGE_SIZE_asm,%edi ++ loop 1b ++2: ++#endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ movl $pa(boot_gdt),%edi ++ movl $KERNEL_TEXT_OFFSET,%eax ++ movw %ax,__BOOT_CS + 2(%edi) ++ rorl $16,%eax ++ movb %al,__BOOT_CS + 4(%edi) ++ movb %ah,__BOOT_CS + 7(%edi) ++ rorl $16,%eax ++ ++ movl $NR_CPUS,%ecx ++ movl $pa(cpu_gdt_table),%edi ++1: ++ movw %ax,__KERNEL_CS + 2(%edi) ++ rorl $16,%eax ++ movb %al,__KERNEL_CS + 4(%edi) ++ movb %ah,__KERNEL_CS + 7(%edi) ++ rorl $16,%eax ++ addl $PAGE_SIZE_asm,%edi ++ loop 1b ++#endif ++ + /* + * Clear BSS first so that there are no surprises... + */ +@@ -142,9 +204,7 @@ ENTRY(startup_32) + cmpl $num_subarch_entries, %eax + jae bad_subarch + +- movl pa(subarch_entries)(,%eax,4), %eax +- subl $__PAGE_OFFSET, %eax +- jmp *%eax ++ jmp *pa(subarch_entries)(,%eax,4) + + bad_subarch: + WEAK(lguest_entry) +@@ -156,9 +216,9 @@ WEAK(xen_entry) + __INITDATA + + subarch_entries: +- .long default_entry /* normal x86/PC */ +- .long lguest_entry /* lguest hypervisor */ +- .long xen_entry /* Xen hypervisor */ ++ .long pa(default_entry) /* normal x86/PC */ ++ .long pa(lguest_entry) /* lguest hypervisor */ ++ .long pa(xen_entry) /* Xen hypervisor */ + num_subarch_entries = (. - subarch_entries) / 4 + .previous + #endif /* CONFIG_PARAVIRT */ +@@ -220,8 +280,7 @@ default_entry: + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ +- movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax +- movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) ++ movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,pa(swapper_pg_pmd+0x1000*KPMDS-8) + #else /* Not PAE */ + + page_pde_offset = (__PAGE_OFFSET >> 20); +@@ -253,8 +312,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ +- movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax +- movl %eax,pa(swapper_pg_dir+0xffc) ++ movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,pa(swapper_pg_dir+0xffc) + #endif + jmp 3f + /* +@@ -318,13 +376,16 @@ ENTRY(startup_32_smp) + jnc 6f + + /* Setup EFER (Extended Feature Enable Register) */ +- movl $0xc0000080, %ecx ++ movl $MSR_EFER, %ecx + rdmsr + + btsl $11, %eax + /* Make changes effective */ + wrmsr + ++ btsl $_PAGE_BIT_NX-32,pa(__supported_pte_mask+4) ++ movl $1,pa(nx_enabled) ++ + 6: + + /* +@@ -350,9 +411,7 @@ ENTRY(startup_32_smp) + + #ifdef CONFIG_SMP + cmpb $0, ready +- jz 1f /* Initial CPU cleans BSS */ +- jmp checkCPUtype +-1: ++ jnz checkCPUtype /* Initial CPU cleans BSS */ + #endif /* CONFIG_SMP */ + + /* +@@ -429,12 +488,12 @@ is386: movl $2,%ecx # set MP + ljmp $(__KERNEL_CS),$1f + 1: movl $(__KERNEL_DS),%eax # reload all the segment registers + movl %eax,%ss # after changing gdt. +- movl %eax,%fs # gets reset once there's real percpu +- +- movl $(__USER_DS),%eax # DS/ES contains default USER segment + movl %eax,%ds + movl %eax,%es + ++ movl $(__KERNEL_PERCPU), %eax ++ movl %eax,%fs # set this cpu's percpu ++ + xorl %eax,%eax # Clear GS and LDT + movl %eax,%gs + lldt %ax +@@ -444,12 +503,6 @@ is386: movl $2,%ecx # set MP + #ifdef CONFIG_SMP + movb ready, %cl + movb $1, ready +- cmpb $0,%cl # the first CPU calls start_kernel +- je 1f +- movl $(__KERNEL_PERCPU), %eax +- movl %eax,%fs # set this cpu's percpu +- movl (stack_start), %esp +-1: + #endif /* CONFIG_SMP */ + jmp *(initial_code) + +@@ -535,15 +588,15 @@ early_page_fault: + jmp early_fault + + early_fault: +- cld + #ifdef CONFIG_PRINTK ++ cmpl $2,%ss:early_recursion_flag ++ je hlt_loop ++ incl %ss:early_recursion_flag ++ cld + pusha + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es +- cmpl $2,early_recursion_flag +- je hlt_loop +- incl early_recursion_flag + movl %cr2,%eax + pushl %eax + pushl %edx /* trapno */ +@@ -553,8 +606,8 @@ early_fault: + #else + call printk + #endif +-#endif + call dump_stack ++#endif + hlt_loop: + hlt + jmp hlt_loop +@@ -562,8 +615,11 @@ hlt_loop: + /* This is the default interrupt "handler" :-) */ + ALIGN + ignore_int: +- cld + #ifdef CONFIG_PRINTK ++ cmpl $2,%ss:early_recursion_flag ++ je hlt_loop ++ incl %ss:early_recursion_flag ++ cld + pushl %eax + pushl %ecx + pushl %edx +@@ -572,9 +628,6 @@ ignore_int: + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es +- cmpl $2,early_recursion_flag +- je hlt_loop +- incl early_recursion_flag + pushl 16(%esp) + pushl 24(%esp) + pushl 32(%esp) +@@ -599,36 +652,41 @@ ignore_int: + ENTRY(initial_code) + .long i386_start_kernel + +-.section .text +-/* +- * Real beginning of normal "text" segment +- */ +-ENTRY(stext) +-ENTRY(_stext) +- + /* + * BSS section + */ +-.section ".bss.page_aligned","wa" +- .align PAGE_SIZE_asm + #ifdef CONFIG_X86_PAE ++.section .swapper_pg_pmd,"a",@progbits + swapper_pg_pmd: + .fill 1024*KPMDS,4,0 + #else ++.section .swapper_pg_dir,"a",@progbits + ENTRY(swapper_pg_dir) + .fill 1024,4,0 + #endif + swapper_pg_fixmap: + .fill 1024,4,0 ++ ++.section .empty_zero_page,"a",@progbits + ENTRY(empty_zero_page) + .fill 4096,1,0 ++ ++/* ++ * The IDT has to be page-aligned to simplify the Pentium ++ * F0 0F bug workaround.. We have a special link segment ++ * for this. ++ */ ++.section .idt,"a",@progbits ++ENTRY(idt_table) ++ .fill 256,8,0 ++ + /* + * This starts the data section. + */ ++.data ++ + #ifdef CONFIG_X86_PAE +-.section ".data.page_aligned","wa" +- /* Page-aligned for the benefit of paravirt? */ +- .align PAGE_SIZE_asm ++.section .swapper_pg_dir,"a",@progbits + ENTRY(swapper_pg_dir) + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ + # if KPMDS == 3 +@@ -651,11 +709,12 @@ ENTRY(swapper_pg_dir) + + .data + ENTRY(stack_start) +- .long init_thread_union+THREAD_SIZE ++ .long init_thread_union+THREAD_SIZE-8 + .long __BOOT_DS + + ready: .byte 0 + ++.section .rodata,"a",@progbits + early_recursion_flag: + .long 0 + +@@ -691,7 +750,7 @@ fault_msg: + .word 0 # 32 bit align gdt_desc.address + boot_gdt_descr: + .word __BOOT_DS+7 +- .long boot_gdt - __PAGE_OFFSET ++ .long pa(boot_gdt) + + .word 0 # 32-bit align idt_desc.address + idt_descr: +@@ -702,7 +761,7 @@ idt_descr: + .word 0 # 32 bit align gdt_desc.address + ENTRY(early_gdt_descr) + .word GDT_ENTRIES*8-1 +- .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ ++ .long cpu_gdt_table /* Overwritten for secondary CPUs */ + + /* + * The boot_gdt must mirror the equivalent in setup.S and is +@@ -711,5 +770,59 @@ ENTRY(early_gdt_descr) + .align L1_CACHE_BYTES + ENTRY(boot_gdt) + .fill GDT_ENTRY_BOOT_CS,8,0 +- .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ +- .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++ .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ ++ ++ .align PAGE_SIZE_asm ++ENTRY(cpu_gdt_table) ++ .rept NR_CPUS ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x0000000000000000 /* 0x0b reserved */ ++ .quad 0x0000000000000000 /* 0x13 reserved */ ++ .quad 0x0000000000000000 /* 0x1b reserved */ ++ .quad 0x0000000000000000 /* 0x20 unused */ ++ .quad 0x0000000000000000 /* 0x28 unused */ ++ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ ++ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ ++ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ ++ .quad 0x0000000000000000 /* 0x4b reserved */ ++ .quad 0x0000000000000000 /* 0x53 reserved */ ++ .quad 0x0000000000000000 /* 0x5b reserved */ ++ ++ .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ ++ .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ ++ .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ ++ ++ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ ++ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ ++ ++ /* ++ * Segments used for calling PnP BIOS have byte granularity. ++ * The code segments and data segments have fixed 64k limits, ++ * the transfer segment sizes are set at run time. ++ */ ++ .quad 0x00409b000000ffff /* 0x90 32-bit code */ ++ .quad 0x00009b000000ffff /* 0x98 16-bit code */ ++ .quad 0x000093000000ffff /* 0xa0 16-bit data */ ++ .quad 0x0000930000000000 /* 0xa8 16-bit data */ ++ .quad 0x0000930000000000 /* 0xb0 16-bit data */ ++ ++ /* ++ * The APM segments have byte granularity and their bases ++ * are set at run time. All have 64k limits. ++ */ ++ .quad 0x00409b000000ffff /* 0xb8 APM CS code */ ++ .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ ++ .quad 0x004093000000ffff /* 0xc8 APM DS data */ ++ ++ .quad 0x00c0930000000000 /* 0xd0 - ESPFIX SS */ ++ .quad 0x0040930000000000 /* 0xd8 - PERCPU */ ++ .quad 0x0000000000000000 /* 0xe0 - PCIBIOS_CS */ ++ .quad 0x0000000000000000 /* 0xe8 - PCIBIOS_DS */ ++ .quad 0x0000000000000000 /* 0xf0 - unused */ ++ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ ++ ++ /* Be sure this is zeroed to avoid false validations in Xen */ ++ .fill PAGE_SIZE_asm - GDT_SIZE,1,0 ++ .endr +diff -urNp linux-2.6.29/arch/x86/kernel/head64.c linux-2.6.29/arch/x86/kernel/head64.c +--- linux-2.6.29/arch/x86/kernel/head64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/head64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -94,6 +94,8 @@ void __init x86_64_start_kernel(char * r + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); + ++ x86_64_init_pda(); ++ + /* Make NULL pointers segfault */ + zap_identity_mappings(); + +@@ -112,8 +114,6 @@ void __init x86_64_start_kernel(char * r + if (console_loglevel == 10) + early_printk("Kernel alive\n"); + +- x86_64_init_pda(); +- + x86_64_start_reservations(real_mode_data); + } + +diff -urNp linux-2.6.29/arch/x86/kernel/head_64.S linux-2.6.29/arch/x86/kernel/head_64.S +--- linux-2.6.29/arch/x86/kernel/head_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/head_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -38,6 +38,10 @@ L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET + L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) + L4_START_KERNEL = pgd_index(__START_KERNEL_map) + L3_START_KERNEL = pud_index(__START_KERNEL_map) ++L4_VMALLOC_START = pgd_index(VMALLOC_START) ++L3_VMALLOC_START = pud_index(VMALLOC_START) ++L4_VMEMMAP_START = pgd_index(VMEMMAP_START) ++L3_VMEMMAP_START = pud_index(VMEMMAP_START) + + .text + .section .text.head +@@ -85,35 +89,22 @@ startup_64: + */ + addq %rbp, init_level4_pgt + 0(%rip) + addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) ++ addq %rbp, init_level4_pgt + (L4_VMALLOC_START*8)(%rip) ++ addq %rbp, init_level4_pgt + (L4_VMEMMAP_START*8)(%rip) + addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) + + addq %rbp, level3_ident_pgt + 0(%rip) ++ addq %rbp, level3_ident_pgt + 8(%rip) ++ addq %rbp, level3_ident_pgt + 16(%rip) ++ addq %rbp, level3_ident_pgt + 24(%rip) + +- addq %rbp, level3_kernel_pgt + (510*8)(%rip) +- addq %rbp, level3_kernel_pgt + (511*8)(%rip) ++ addq %rbp, level3_vmemmap_pgt + (L3_VMEMMAP_START*8)(%rip) + +- addq %rbp, level2_fixmap_pgt + (506*8)(%rip) ++ addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8)(%rip) ++ addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8+8)(%rip) + +- /* Add an Identity mapping if I am above 1G */ +- leaq _text(%rip), %rdi +- andq $PMD_PAGE_MASK, %rdi +- +- movq %rdi, %rax +- shrq $PUD_SHIFT, %rax +- andq $(PTRS_PER_PUD - 1), %rax +- jz ident_complete +- +- leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx +- leaq level3_ident_pgt(%rip), %rbx +- movq %rdx, 0(%rbx, %rax, 8) +- +- movq %rdi, %rax +- shrq $PMD_SHIFT, %rax +- andq $(PTRS_PER_PMD - 1), %rax +- leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx +- leaq level2_spare_pgt(%rip), %rbx +- movq %rdx, 0(%rbx, %rax, 8) +-ident_complete: ++ addq %rbp, level2_fixmap_pgt + (506*8)(%rip) ++ addq %rbp, level2_fixmap_pgt + (507*8)(%rip) + + /* + * Fixup the kernel text+data virtual addresses. Note that +@@ -187,6 +178,10 @@ ENTRY(secondary_startup_64) + btl $20,%edi /* No Execute supported? */ + jnc 1f + btsl $_EFER_NX, %eax ++ leaq init_level4_pgt(%rip), %rdi ++ btsq $_PAGE_BIT_NX, 8*L4_PAGE_OFFSET(%rdi) ++ btsq $_PAGE_BIT_NX, 8*L4_VMALLOC_START(%rdi) ++ btsq $_PAGE_BIT_NX, 8*L4_VMEMMAP_START(%rdi) + 1: wrmsr /* Make changes effective */ + + /* Setup cr0 */ +@@ -257,16 +252,16 @@ ENTRY(secondary_startup_64) + .align 8 + ENTRY(initial_code) + .quad x86_64_start_kernel +- __FINITDATA + + ENTRY(stack_start) + .quad init_thread_union+THREAD_SIZE-8 + .word 0 ++ __FINITDATA + + bad_address: + jmp bad_address + +- .section ".init.text","ax" ++ __INIT + #ifdef CONFIG_EARLY_PRINTK + .globl early_idt_handlers + early_idt_handlers: +@@ -311,18 +306,23 @@ ENTRY(early_idt_handler) + #endif /* EARLY_PRINTK */ + 1: hlt + jmp 1b ++ .previous + + #ifdef CONFIG_EARLY_PRINTK ++ __INITDATA + early_recursion_flag: + .long 0 ++ .previous + ++ .section .rodata,"a",@progbits + early_idt_msg: + .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" + early_idt_ripmsg: + .asciz "RIP %s\n" +-#endif /* CONFIG_EARLY_PRINTK */ + .previous ++#endif /* CONFIG_EARLY_PRINTK */ + ++ .section .rodata,"a",@progbits + .balign PAGE_SIZE + + #define NEXT_PAGE(name) \ +@@ -347,13 +347,27 @@ NEXT_PAGE(init_level4_pgt) + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE ++ .org init_level4_pgt + L4_VMALLOC_START*8, 0 ++ .quad level3_vmalloc_pgt - __START_KERNEL_map + _KERNPG_TABLE ++ .org init_level4_pgt + L4_VMEMMAP_START*8, 0 ++ .quad level3_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_START_KERNEL*8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + + NEXT_PAGE(level3_ident_pgt) + .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +- .fill 511,8,0 ++ .quad level2_ident_pgt + PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE ++ .quad level2_ident_pgt + 2*PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE ++ .quad level2_ident_pgt + 3*PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE ++ .fill 508,8,0 ++ ++NEXT_PAGE(level3_vmalloc_pgt) ++ .fill 512,8,0 ++ ++NEXT_PAGE(level3_vmemmap_pgt) ++ .fill L3_VMEMMAP_START,8,0 ++ .quad level2_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE + + NEXT_PAGE(level3_kernel_pgt) + .fill L3_START_KERNEL,8,0 +@@ -361,20 +375,27 @@ NEXT_PAGE(level3_kernel_pgt) + .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE + .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE + ++NEXT_PAGE(level2_vmemmap_pgt) ++ .fill 512,8,0 ++ + NEXT_PAGE(level2_fixmap_pgt) + .fill 506,8,0 + .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE +- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ +- .fill 5,8,0 ++ .quad level1_vsyscall_pgt - __START_KERNEL_map + _PAGE_TABLE ++ /* 6MB reserved for vsyscalls + a 2MB hole = 3 + 1 entries */ ++ .fill 4,8,0 + + NEXT_PAGE(level1_fixmap_pgt) + .fill 512,8,0 + +-NEXT_PAGE(level2_ident_pgt) +- /* Since I easily can, map the first 1G. ++NEXT_PAGE(level1_vsyscall_pgt) ++ .fill 512,8,0 ++ ++ /* Since I easily can, map the first 4G. + * Don't set NX because code runs from these pages. + */ +- PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) ++NEXT_PAGE(level2_ident_pgt) ++ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, 4*PTRS_PER_PMD) + + NEXT_PAGE(level2_kernel_pgt) + /* +@@ -387,32 +408,48 @@ NEXT_PAGE(level2_kernel_pgt) + * If you want to increase this then increase MODULES_VADDR + * too.) + */ +- PMDS(0, __PAGE_KERNEL_LARGE_EXEC, +- KERNEL_IMAGE_SIZE/PMD_SIZE) +- +-NEXT_PAGE(level2_spare_pgt) +- .fill 512, 8, 0 ++ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) + + #undef PMDS + #undef NEXT_PAGE + +- .data ++ .align PAGE_SIZE ++ENTRY(cpu_gdt_table) ++ .rept NR_CPUS ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ ++ .quad 0x00af9b000000ffff /* __KERNEL_CS */ ++ .quad 0x00cf93000000ffff /* __KERNEL_DS */ ++ .quad 0x00cffb000000ffff /* __USER32_CS */ ++ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ ++ .quad 0x00affb000000ffff /* __USER_CS */ ++ .quad 0x0 /* unused */ ++ .quad 0,0 /* TSS */ ++ .quad 0,0 /* LDT */ ++ .quad 0,0,0 /* three TLS descriptors */ ++ .quad 0x0000f40000000000 /* node/CPU stored in limit */ ++ /* asm/segment.h:GDT_ENTRIES must match this */ ++ ++ /* zero the remaining page */ ++ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 ++ .endr ++ + .align 16 + .globl early_gdt_descr + early_gdt_descr: + .word GDT_ENTRIES*8-1 +- .quad per_cpu__gdt_page ++ .quad cpu_gdt_table + + ENTRY(phys_base) + /* This must match the first entry in level2_kernel_pgt */ + .quad 0x0000000000000000 + + #include "../../x86/xen/xen-head.S" +- +- .section .bss, "aw", @nobits ++ ++ .section .rodata,"a",@progbits + .align L1_CACHE_BYTES + ENTRY(idt_table) +- .skip 256 * 16 ++ .fill 512,8,0 + + .section .bss.page_aligned, "aw", @nobits + .align PAGE_SIZE +diff -urNp linux-2.6.29/arch/x86/kernel/i386_ksyms_32.c linux-2.6.29/arch/x86/kernel/i386_ksyms_32.c +--- linux-2.6.29/arch/x86/kernel/i386_ksyms_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/i386_ksyms_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -10,8 +10,12 @@ + EXPORT_SYMBOL(mcount); + #endif + ++EXPORT_SYMBOL_GPL(cpu_gdt_table); ++ + /* Networking helper routines. */ + EXPORT_SYMBOL(csum_partial_copy_generic); ++EXPORT_SYMBOL(csum_partial_copy_generic_to_user); ++EXPORT_SYMBOL(csum_partial_copy_generic_from_user); + + EXPORT_SYMBOL(__get_user_1); + EXPORT_SYMBOL(__get_user_2); +@@ -26,3 +30,7 @@ EXPORT_SYMBOL(strstr); + + EXPORT_SYMBOL(csum_partial); + EXPORT_SYMBOL(empty_zero_page); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++EXPORT_SYMBOL(KERNEL_TEXT_OFFSET); ++#endif +diff -urNp linux-2.6.29/arch/x86/kernel/init_task.c linux-2.6.29/arch/x86/kernel/init_task.c +--- linux-2.6.29/arch/x86/kernel/init_task.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/init_task.c 2009-03-28 14:26:19.000000000 -0400 +@@ -40,5 +40,5 @@ EXPORT_SYMBOL(init_task); + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; +- ++struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; ++EXPORT_SYMBOL(init_tss); +diff -urNp linux-2.6.29/arch/x86/kernel/ioport.c linux-2.6.29/arch/x86/kernel/ioport.c +--- linux-2.6.29/arch/x86/kernel/ioport.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/ioport.c 2009-03-28 14:26:19.000000000 -0400 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -41,6 +42,12 @@ asmlinkage long sys_ioperm(unsigned long + + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) + return -EINVAL; ++#ifdef CONFIG_GRKERNSEC_IO ++ if (turn_on) { ++ gr_handle_ioperm(); ++ return -EPERM; ++ } ++#endif + if (turn_on && !capable(CAP_SYS_RAWIO)) + return -EPERM; + +@@ -67,7 +74,7 @@ asmlinkage long sys_ioperm(unsigned long + * because the ->io_bitmap_max value must match the bitmap + * contents: + */ +- tss = &per_cpu(init_tss, get_cpu()); ++ tss = init_tss + get_cpu(); + + set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); + +@@ -122,8 +129,13 @@ static int do_iopl(unsigned int level, s + return -EINVAL; + /* Trying to gain more privileges? */ + if (level > old) { ++#ifdef CONFIG_GRKERNSEC_IO ++ gr_handle_iopl(); ++ return -EPERM; ++#else + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; ++#endif + } + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); + +diff -urNp linux-2.6.29/arch/x86/kernel/irq_32.c linux-2.6.29/arch/x86/kernel/irq_32.c +--- linux-2.6.29/arch/x86/kernel/irq_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/irq_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -93,7 +93,7 @@ execute_on_irq_stack(int overflow, struc + return 0; + + /* build the stack frame on the IRQ stack */ +- isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); ++ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx) - 8); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.previous_esp = current_stack_pointer; + +@@ -174,7 +174,7 @@ asmlinkage void do_softirq(void) + irqctx->tinfo.previous_esp = current_stack_pointer; + + /* build the stack frame on the softirq stack */ +- isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); ++ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx) - 8); + + call_on_stack(__do_softirq, isp); + /* +diff -urNp linux-2.6.29/arch/x86/kernel/kprobes.c linux-2.6.29/arch/x86/kernel/kprobes.c +--- linux-2.6.29/arch/x86/kernel/kprobes.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/kprobes.c 2009-03-28 14:26:19.000000000 -0400 +@@ -166,9 +166,24 @@ static void __kprobes set_jmp_op(void *f + char op; + s32 raddr; + } __attribute__((packed)) * jop; +- jop = (struct __arch_jmp_op *)from; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ ++ jop = (struct __arch_jmp_op *)(ktla_ktva(from)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); + jop->op = RELATIVEJUMP_INSTRUCTION; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + /* +@@ -345,16 +360,29 @@ static void __kprobes fix_riprel(struct + + static void __kprobes arch_copy_kprobe(struct kprobe *p) + { +- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ memcpy(p->ainsn.insn, ktla_ktva(p->addr), MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif + + fix_riprel(p); + +- if (can_boost(p->addr)) ++ if (can_boost(ktla_ktva(p->addr))) + p->ainsn.boostable = 0; + else + p->ainsn.boostable = -1; + +- p->opcode = *p->addr; ++ p->opcode = *(ktla_ktva(p->addr)); + } + + int __kprobes arch_prepare_kprobe(struct kprobe *p) +@@ -432,7 +460,7 @@ static void __kprobes prepare_singlestep + if (p->opcode == BREAKPOINT_INSTRUCTION) + regs->ip = (unsigned long)p->addr; + else +- regs->ip = (unsigned long)p->ainsn.insn; ++ regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); + } + + void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, +@@ -453,7 +481,7 @@ static void __kprobes setup_singlestep(s + if (p->ainsn.boostable == 1 && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + reset_current_kprobe(); +- regs->ip = (unsigned long)p->ainsn.insn; ++ regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); + preempt_enable_no_resched(); + return; + } +@@ -523,7 +551,7 @@ static int __kprobes kprobe_handler(stru + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); +- if (*addr != BREAKPOINT_INSTRUCTION) { ++ if (*(kprobe_opcode_t *)ktla_ktva((unsigned long)addr) != BREAKPOINT_INSTRUCTION) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed +@@ -774,7 +802,7 @@ static void __kprobes resume_execution(s + struct pt_regs *regs, struct kprobe_ctlblk *kcb) + { + unsigned long *tos = stack_addr(regs); +- unsigned long copy_ip = (unsigned long)p->ainsn.insn; ++ unsigned long copy_ip = ktva_ktla((unsigned long)p->ainsn.insn); + unsigned long orig_ip = (unsigned long)p->addr; + kprobe_opcode_t *insn = p->ainsn.insn; + +@@ -957,7 +985,7 @@ int __kprobes kprobe_exceptions_notify(s + struct die_args *args = data; + int ret = NOTIFY_DONE; + +- if (args->regs && user_mode_vm(args->regs)) ++ if (args->regs && user_mode(args->regs)) + return ret; + + switch (val) { +diff -urNp linux-2.6.29/arch/x86/kernel/ldt.c linux-2.6.29/arch/x86/kernel/ldt.c +--- linux-2.6.29/arch/x86/kernel/ldt.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/ldt.c 2009-03-28 14:26:19.000000000 -0400 +@@ -66,13 +66,13 @@ static int alloc_ldt(mm_context_t *pc, i + if (reload) { + #ifdef CONFIG_SMP + preempt_disable(); +- load_LDT(pc); ++ load_LDT_nolock(pc); + if (!cpus_equal(current->mm->cpu_vm_mask, + cpumask_of_cpu(smp_processor_id()))) + smp_call_function(flush_ldt, current->mm, 1); + preempt_enable(); + #else +- load_LDT(pc); ++ load_LDT_nolock(pc); + #endif + } + if (oldsize) { +@@ -94,7 +94,7 @@ static inline int copy_ldt(mm_context_t + return err; + + for (i = 0; i < old->size; i++) +- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); ++ write_ldt_entry(new->ldt, i, old->ldt + i); + return 0; + } + +@@ -115,6 +115,24 @@ int init_new_context(struct task_struct + retval = copy_ldt(&mm->context, &old_mm->context); + mutex_unlock(&old_mm->context.lock); + } ++ ++ if (tsk == current) { ++ mm->context.vdso = ~0UL; ++ ++#ifdef CONFIG_X86_32 ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ mm->context.user_cs_base = 0UL; ++ mm->context.user_cs_limit = ~0UL; ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) ++ cpus_clear(mm->context.cpu_user_cs_mask); ++#endif ++ ++#endif ++#endif ++ ++ } ++ + return retval; + } + +@@ -229,6 +247,13 @@ static int write_ldt(void __user *ptr, u + } + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { ++ error = -EINVAL; ++ goto out_unlock; ++ } ++#endif ++ + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; +diff -urNp linux-2.6.29/arch/x86/kernel/machine_kexec_32.c linux-2.6.29/arch/x86/kernel/machine_kexec_32.c +--- linux-2.6.29/arch/x86/kernel/machine_kexec_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/machine_kexec_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -26,7 +26,7 @@ + #include + #include + +-static void set_idt(void *newidt, __u16 limit) ++static void set_idt(struct desc_struct *newidt, __u16 limit) + { + struct desc_ptr curidt; + +@@ -38,7 +38,7 @@ static void set_idt(void *newidt, __u16 + } + + +-static void set_gdt(void *newgdt, __u16 limit) ++static void set_gdt(struct desc_struct *newgdt, __u16 limit) + { + struct desc_ptr curgdt; + +@@ -216,7 +216,7 @@ void machine_kexec(struct kimage *image) + } + + control_page = page_address(image->control_code_page); +- memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); ++ memcpy(control_page, (void *)ktla_ktva((unsigned long)relocate_kernel), KEXEC_CONTROL_CODE_MAX_SIZE); + + relocate_kernel_ptr = control_page; + page_list[PA_CONTROL_PAGE] = __pa(control_page); +diff -urNp linux-2.6.29/arch/x86/kernel/module_32.c linux-2.6.29/arch/x86/kernel/module_32.c +--- linux-2.6.29/arch/x86/kernel/module_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/module_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -23,6 +23,9 @@ + #include + #include + ++#include ++#include ++ + #if 0 + #define DEBUGP printk + #else +@@ -33,9 +36,31 @@ void *module_alloc(unsigned long size) + { + if (size == 0) + return NULL; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); ++#else + return vmalloc_exec(size); ++#endif ++ + } + ++#ifdef CONFIG_PAX_KERNEXEC ++void *module_alloc_exec(unsigned long size) ++{ ++ struct vm_struct *area; ++ ++ if (size == 0) ++ return NULL; ++ ++ area = __get_vm_area(size, VM_ALLOC, (unsigned long)&MODULES_VADDR, (unsigned long)&MODULES_END); ++ if (area) ++ return area->addr; ++ ++ return NULL; ++} ++EXPORT_SYMBOL(module_alloc_exec); ++#endif + + /* Free memory returned from module_alloc */ + void module_free(struct module *mod, void *module_region) +@@ -45,6 +70,45 @@ void module_free(struct module *mod, voi + table entries. */ + } + ++#ifdef CONFIG_PAX_KERNEXEC ++void module_free_exec(struct module *mod, void *module_region) ++{ ++ struct vm_struct **p, *tmp; ++ ++ if (!module_region) ++ return; ++ ++ if ((PAGE_SIZE-1) & (unsigned long)module_region) { ++ printk(KERN_ERR "Trying to module_free_exec() bad address (%p)\n", module_region); ++ WARN_ON(1); ++ return; ++ } ++ ++ write_lock(&vmlist_lock); ++ for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) ++ if (tmp->addr == module_region) ++ break; ++ ++ if (tmp) { ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++ memset(tmp->addr, 0xCC, tmp->size); ++ pax_close_kernel(cr0); ++ ++ *p = tmp->next; ++ kfree(tmp); ++ } ++ write_unlock(&vmlist_lock); ++ ++ if (!tmp) { ++ printk(KERN_ERR "Trying to module_free_exec() nonexistent vm area (%p)\n", ++ module_region); ++ WARN_ON(1); ++ } ++} ++#endif ++ + /* We don't need anything special. */ + int module_frob_arch_sections(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, +@@ -63,14 +127,20 @@ int apply_relocate(Elf32_Shdr *sechdrs, + unsigned int i; + Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; +- uint32_t *location; ++ uint32_t *plocation, location; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ +- location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr +- + rel[i].r_offset; ++ plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; ++ location = (uint32_t)plocation; ++ if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) ++ plocation = ktla_ktva((void *)plocation); + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr +@@ -78,12 +148,32 @@ int apply_relocate(Elf32_Shdr *sechdrs, + + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_386_32: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + /* We add the value into the location given */ +- *location += sym->st_value; ++ *plocation += sym->st_value; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + case R_386_PC32: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + /* Add the value, subtract its postition */ +- *location += sym->st_value - (uint32_t)location; ++ *plocation += sym->st_value - location; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", +diff -urNp linux-2.6.29/arch/x86/kernel/module_64.c linux-2.6.29/arch/x86/kernel/module_64.c +--- linux-2.6.29/arch/x86/kernel/module_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/module_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -40,7 +40,7 @@ void module_free(struct module *mod, voi + table entries. */ + } + +-void *module_alloc(unsigned long size) ++static void *__module_alloc(unsigned long size, pgprot_t prot) + { + struct vm_struct *area; + +@@ -54,8 +54,31 @@ void *module_alloc(unsigned long size) + if (!area) + return NULL; + +- return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); ++ return __vmalloc_area(area, GFP_KERNEL | __GFP_ZERO, prot); ++} ++ ++#ifdef CONFIG_PAX_KERNEXEC ++void *module_alloc(unsigned long size) ++{ ++ return __module_alloc(size, PAGE_KERNEL); ++} ++ ++void module_free_exec(struct module *mod, void *module_region) ++{ ++ module_free(mod, module_region); ++} ++ ++void *module_alloc_exec(unsigned long size) ++{ ++ return __module_alloc(size, PAGE_KERNEL_RX); + } ++#else ++void *module_alloc(unsigned long size) ++{ ++ return __module_alloc(size, PAGE_KERNEL_EXEC); ++} ++#endif ++ + #endif + + /* We don't need anything special. */ +@@ -77,7 +100,11 @@ int apply_relocate_add(Elf64_Shdr *sechd + Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + void *loc; +- u64 val; ++ u64 val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); +@@ -101,21 +128,61 @@ int apply_relocate_add(Elf64_Shdr *sechd + case R_X86_64_NONE: + break; + case R_X86_64_64: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(u64 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + case R_X86_64_32: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(u32 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + if (val != *(u32 *)loc) + goto overflow; + break; + case R_X86_64_32S: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(s32 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + if ((s64)val != *(s32 *)loc) + goto overflow; + break; + case R_X86_64_PC32: + val -= (u64)loc; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(u32 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + #if 0 + if ((s64)val != *(s32 *)loc) + goto overflow; +diff -urNp linux-2.6.29/arch/x86/kernel/paravirt.c linux-2.6.29/arch/x86/kernel/paravirt.c +--- linux-2.6.29/arch/x86/kernel/paravirt.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/paravirt.c 2009-03-28 14:26:19.000000000 -0400 +@@ -44,7 +44,7 @@ void _paravirt_nop(void) + { + } + +-static void __init default_banner(void) ++static void default_banner(void) + { + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + pv_info.name); +@@ -164,7 +164,7 @@ unsigned paravirt_patch_insns(void *insn + if (insn_len > len || start == NULL) + insn_len = len; + else +- memcpy(insnbuf, start, insn_len); ++ memcpy(insnbuf, ktla_ktva(start), insn_len); + + return insn_len; + } +@@ -294,21 +294,21 @@ void arch_flush_lazy_cpu_mode(void) + preempt_enable(); + } + +-struct pv_info pv_info = { ++struct pv_info pv_info __read_only = { + .name = "bare hardware", + .paravirt_enabled = 0, + .kernel_rpl = 0, + .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ + }; + +-struct pv_init_ops pv_init_ops = { ++struct pv_init_ops pv_init_ops __read_only = { + .patch = native_patch, + .banner = default_banner, + .arch_setup = paravirt_nop, + .memory_setup = machine_specific_memory_setup, + }; + +-struct pv_time_ops pv_time_ops = { ++struct pv_time_ops pv_time_ops __read_only = { + .time_init = hpet_time_init, + .get_wallclock = native_get_wallclock, + .set_wallclock = native_set_wallclock, +@@ -316,7 +316,7 @@ struct pv_time_ops pv_time_ops = { + .get_tsc_khz = native_calibrate_tsc, + }; + +-struct pv_irq_ops pv_irq_ops = { ++struct pv_irq_ops pv_irq_ops __read_only = { + .init_IRQ = native_init_IRQ, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, +@@ -329,7 +329,7 @@ struct pv_irq_ops pv_irq_ops = { + #endif + }; + +-struct pv_cpu_ops pv_cpu_ops = { ++struct pv_cpu_ops pv_cpu_ops __read_only = { + .cpuid = native_cpuid, + .get_debugreg = native_get_debugreg, + .set_debugreg = native_set_debugreg, +@@ -391,7 +391,7 @@ struct pv_cpu_ops pv_cpu_ops = { + }, + }; + +-struct pv_apic_ops pv_apic_ops = { ++struct pv_apic_ops pv_apic_ops __read_only = { + #ifdef CONFIG_X86_LOCAL_APIC + .setup_boot_clock = setup_boot_APIC_clock, + .setup_secondary_clock = setup_secondary_APIC_clock, +@@ -399,7 +399,7 @@ struct pv_apic_ops pv_apic_ops = { + #endif + }; + +-struct pv_mmu_ops pv_mmu_ops = { ++struct pv_mmu_ops pv_mmu_ops __read_only = { + #ifndef CONFIG_X86_64 + .pagetable_setup_start = native_pagetable_setup_start, + .pagetable_setup_done = native_pagetable_setup_done, +diff -urNp linux-2.6.29/arch/x86/kernel/paravirt-spinlocks.c linux-2.6.29/arch/x86/kernel/paravirt-spinlocks.c +--- linux-2.6.29/arch/x86/kernel/paravirt-spinlocks.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/paravirt-spinlocks.c 2009-03-28 14:26:19.000000000 -0400 +@@ -13,7 +13,7 @@ default_spin_lock_flags(raw_spinlock_t * + __raw_spin_lock(lock); + } + +-struct pv_lock_ops pv_lock_ops = { ++struct pv_lock_ops pv_lock_ops __read_only = { + #ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, +diff -urNp linux-2.6.29/arch/x86/kernel/process_32.c linux-2.6.29/arch/x86/kernel/process_32.c +--- linux-2.6.29/arch/x86/kernel/process_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/process_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -66,8 +66,10 @@ asmlinkage void ret_from_fork(void) __as + DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; + EXPORT_PER_CPU_SYMBOL(current_task); + ++#ifdef CONFIG_SMP + DEFINE_PER_CPU(int, cpu_number); + EXPORT_PER_CPU_SYMBOL(cpu_number); ++#endif + + /* + * Return saved PC of a blocked thread. +@@ -75,6 +77,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); + unsigned long thread_saved_pc(struct task_struct *tsk) + { + return ((unsigned long *)tsk->thread.sp)[3]; ++//XXX return tsk->thread.eip; + } + + #ifndef CONFIG_SMP +@@ -129,7 +132,7 @@ void __show_regs(struct pt_regs *regs, i + unsigned short ss, gs; + const char *board; + +- if (user_mode_vm(regs)) { ++ if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; + savesegment(gs, gs); +@@ -210,8 +213,8 @@ int kernel_thread(int (*fn)(void *), voi + regs.bx = (unsigned long) fn; + regs.dx = (unsigned long) arg; + +- regs.ds = __USER_DS; +- regs.es = __USER_DS; ++ regs.ds = __KERNEL_DS; ++ regs.es = __KERNEL_DS; + regs.fs = __KERNEL_PERCPU; + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; +@@ -233,7 +236,7 @@ void exit_thread(void) + struct task_struct *tsk = current; + struct thread_struct *t = &tsk->thread; + int cpu = get_cpu(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; +@@ -256,6 +259,7 @@ void flush_thread(void) + { + struct task_struct *tsk = current; + ++ loadsegment(gs, 0); + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; + tsk->thread.debugreg2 = 0; +@@ -295,7 +299,7 @@ int copy_thread(int nr, unsigned long cl + struct task_struct *tsk; + int err; + +- childregs = task_pt_regs(p); ++ childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; + *childregs = *regs; + childregs->ax = 0; + childregs->sp = sp; +@@ -324,6 +328,7 @@ int copy_thread(int nr, unsigned long cl + * Set a new TLS for the child thread? + */ + if (clone_flags & CLONE_SETTLS) ++//XXX needs set_fs()? + err = do_set_thread_area(p, -1, + (struct user_desc __user *)childregs->si, 0); + +@@ -514,7 +519,7 @@ __switch_to(struct task_struct *prev_p, + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + + /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ + +@@ -542,6 +547,11 @@ __switch_to(struct task_struct *prev_p, + */ + savesegment(gs, prev->gs); + ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ if (!segment_eq(task_thread_info(prev_p)->addr_limit, task_thread_info(next_p)->addr_limit)) ++ __set_fs(task_thread_info(next_p)->addr_limit, cpu); ++#endif ++ + /* + * Load the per-thread Thread-Local Storage descriptor. + */ +@@ -680,15 +690,27 @@ unsigned long get_wchan(struct task_stru + return 0; + } + +-unsigned long arch_align_stack(unsigned long sp) ++#ifdef CONFIG_PAX_RANDKSTACK ++asmlinkage void pax_randomize_kstack(void) + { +- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +- sp -= get_random_int() % 8192; +- return sp & ~0xf; +-} ++ struct thread_struct *thread = ¤t->thread; ++ unsigned long time; + +-unsigned long arch_randomize_brk(struct mm_struct *mm) +-{ +- unsigned long range_end = mm->brk + 0x02000000; +- return randomize_range(mm->brk, range_end, 0) ? : mm->brk; ++ if (!randomize_va_space) ++ return; ++ ++ rdtscl(time); ++ ++ /* P4 seems to return a 0 LSB, ignore it */ ++#ifdef CONFIG_MPENTIUM4 ++ time &= 0x1EUL; ++ time <<= 2; ++#else ++ time &= 0xFUL; ++ time <<= 3; ++#endif ++ ++ thread->sp0 ^= time; ++ load_sp0(init_tss + smp_processor_id(), thread); + } ++#endif +diff -urNp linux-2.6.29/arch/x86/kernel/process_64.c linux-2.6.29/arch/x86/kernel/process_64.c +--- linux-2.6.29/arch/x86/kernel/process_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/process_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -112,6 +112,8 @@ static inline void play_dead(void) + void cpu_idle(void) + { + current_thread_info()->status |= TS_POLLING; ++ current->stack_canary = pax_get_random_long(); ++ write_pda(stack_canary, current->stack_canary); + /* endless idle loop with no priority at all */ + while (1) { + tick_nohz_stop_sched_tick(1); +@@ -230,7 +232,7 @@ void exit_thread(void) + struct thread_struct *t = &me->thread; + + if (me->thread.io_bitmap_ptr) { +- struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); ++ struct tss_struct *tss = init_tss + get_cpu(); + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; +@@ -537,7 +539,7 @@ __switch_to(struct task_struct *prev_p, + struct thread_struct *prev = &prev_p->thread; + struct thread_struct *next = &next_p->thread; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + unsigned fsindex, gsindex; + + /* we're going to use this soon, after a few expensive things */ +@@ -626,7 +628,6 @@ __switch_to(struct task_struct *prev_p, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - PDA_STACKOFFSET); + #ifdef CONFIG_CC_STACKPROTECTOR +- write_pda(stack_canary, next_p->stack_canary); + /* + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement +@@ -725,12 +726,11 @@ unsigned long get_wchan(struct task_stru + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack = (unsigned long)task_stack_page(p); +- if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) ++ if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE-8-sizeof(u64)) + return 0; + fp = *(u64 *)(p->thread.sp); + do { +- if (fp < (unsigned long)stack || +- fp >= (unsigned long)stack+THREAD_SIZE) ++ if (fp < stack || fp > stack+THREAD_SIZE-8-sizeof(u64)) + return 0; + ip = *(u64 *)(fp+8); + if (!in_sched_functions(ip)) +@@ -839,16 +839,3 @@ long sys_arch_prctl(int code, unsigned l + { + return do_arch_prctl(current, code, addr); + } +- +-unsigned long arch_align_stack(unsigned long sp) +-{ +- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +- sp -= get_random_int() % 8192; +- return sp & ~0xf; +-} +- +-unsigned long arch_randomize_brk(struct mm_struct *mm) +-{ +- unsigned long range_end = mm->brk + 0x02000000; +- return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +-} +diff -urNp linux-2.6.29/arch/x86/kernel/ptrace.c linux-2.6.29/arch/x86/kernel/ptrace.c +--- linux-2.6.29/arch/x86/kernel/ptrace.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/ptrace.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1378,7 +1378,7 @@ void send_sigtrap(struct task_struct *ts + info.si_code = si_code; + + /* User-mode ip? */ +- info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; ++ info.si_addr = user_mode(regs) ? (void __user *) regs->ip : NULL; + + /* Send us the fake SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); +diff -urNp linux-2.6.29/arch/x86/kernel/reboot.c linux-2.6.29/arch/x86/kernel/reboot.c +--- linux-2.6.29/arch/x86/kernel/reboot.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/reboot.c 2009-03-28 14:26:19.000000000 -0400 +@@ -32,7 +32,7 @@ void (*pm_power_off)(void); + EXPORT_SYMBOL(pm_power_off); + + static const struct desc_ptr no_idt = {}; +-static int reboot_mode; ++static unsigned short reboot_mode; + enum reboot_type reboot_type = BOOT_KBD; + int reboot_force; + +@@ -225,7 +225,7 @@ static struct dmi_system_id __initdata r + DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), + }, + }, +- { } ++ { NULL, NULL, {{0, {0}}}, NULL} + }; + + static int __init reboot_init(void) +@@ -241,12 +241,12 @@ core_initcall(reboot_init); + controller to pulse the CPU reset line, which is more thorough, but + doesn't work with at least one type of 486 motherboard. It is easy + to stop this code working; hence the copious comments. */ +-static const unsigned long long +-real_mode_gdt_entries [3] = ++static struct desc_struct ++real_mode_gdt_entries [3] __read_only = + { +- 0x0000000000000000ULL, /* Null descriptor */ +- 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ +- 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ ++ {{{0x00000000, 0x00000000}}}, /* Null descriptor */ ++ {{{0x0000ffff, 0x00009b00}}}, /* 16-bit real-mode 64k code at 0x00000000 */ ++ {{{0x0100ffff, 0x00009300}}} /* 16-bit real-mode 64k data at 0x00000100 */ + }; + + static const struct desc_ptr +@@ -295,7 +295,7 @@ static const unsigned char jump_to_bios + * specified by the code and length parameters. + * We assume that length will aways be less that 100! + */ +-void machine_real_restart(const unsigned char *code, int length) ++void machine_real_restart(const unsigned char *code, unsigned int length) + { + local_irq_disable(); + +@@ -315,8 +315,8 @@ void machine_real_restart(const unsigned + /* Remap the kernel at virtual address zero, as well as offset zero + from the kernel segment. This assumes the kernel segment starts at + virtual address PAGE_OFFSET. */ +- memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, +- sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); ++ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, ++ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); + + /* + * Use `swapper_pg_dir' as our page directory. +@@ -328,16 +328,15 @@ void machine_real_restart(const unsigned + boot)". This seems like a fairly standard thing that gets set by + REBOOT.COM programs, and the previous reset routine did this + too. */ +- *((unsigned short *)0x472) = reboot_mode; ++ *(unsigned short *)(__va(0x472)) = reboot_mode; + + /* For the switch to real mode, copy some code to low memory. It has + to be in the first 64k because it is running in 16-bit mode, and it + has to have the same physical and virtual address, because it turns + off paging. Copy it near the end of the first page, out of the way + of BIOS variables. */ +- memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), +- real_mode_switch, sizeof (real_mode_switch)); +- memcpy((void *)(0x1000 - 100), code, length); ++ memcpy(__va(0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); ++ memcpy(__va(0x1000 - 100), code, length); + + /* Set up the IDT for real mode. */ + load_idt(&real_mode_idt); +diff -urNp linux-2.6.29/arch/x86/kernel/setup.c linux-2.6.29/arch/x86/kernel/setup.c +--- linux-2.6.29/arch/x86/kernel/setup.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/setup.c 2009-03-28 14:26:19.000000000 -0400 +@@ -712,8 +712,8 @@ void __init setup_arch(char **cmdline_p) + + if (!boot_params.hdr.root_flags) + root_mountflags &= ~MS_RDONLY; +- init_mm.start_code = (unsigned long) _text; +- init_mm.end_code = (unsigned long) _etext; ++ init_mm.start_code = ktla_ktva((unsigned long) _text); ++ init_mm.end_code = ktla_ktva((unsigned long) _etext); + init_mm.end_data = (unsigned long) _edata; + #ifdef CONFIG_X86_32 + init_mm.brk = init_pg_tables_end + PAGE_OFFSET; +@@ -721,9 +721,9 @@ void __init setup_arch(char **cmdline_p) + init_mm.brk = (unsigned long) &_end; + #endif + +- code_resource.start = virt_to_phys(_text); +- code_resource.end = virt_to_phys(_etext)-1; +- data_resource.start = virt_to_phys(_etext); ++ code_resource.start = virt_to_phys(ktla_ktva(_text)); ++ code_resource.end = virt_to_phys(ktla_ktva(_etext))-1; ++ data_resource.start = virt_to_phys(_data); + data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; +diff -urNp linux-2.6.29/arch/x86/kernel/setup_percpu.c linux-2.6.29/arch/x86/kernel/setup_percpu.c +--- linux-2.6.29/arch/x86/kernel/setup_percpu.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/setup_percpu.c 2009-03-28 14:26:19.000000000 -0400 +@@ -197,7 +197,11 @@ void __init setup_per_cpu_areas(void) + cpu, node, __pa(ptr)); + } + #endif ++#ifdef CONFIG_X86_32 ++ __per_cpu_offset[cpu] = ptr - __per_cpu_start; ++#else + per_cpu_offset(cpu) = ptr - __per_cpu_start; ++#endif + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + } + +diff -urNp linux-2.6.29/arch/x86/kernel/signal.c linux-2.6.29/arch/x86/kernel/signal.c +--- linux-2.6.29/arch/x86/kernel/signal.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/signal.c 2009-03-28 14:26:19.000000000 -0400 +@@ -287,9 +287,9 @@ __setup_frame(int sig, struct k_sigactio + } + + if (current->mm->context.vdso) +- restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); ++ restorer = (void __user *)VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); + else +- restorer = &frame->retcode; ++ restorer = (void __user *)&frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + +@@ -360,7 +360,7 @@ static int __setup_rt_frame(int sig, str + return -EFAULT; + + /* Set up to return from userspace. */ +- restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); ++ restorer = (void __user *)VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + err |= __put_user(restorer, &frame->pretcode); +@@ -811,7 +811,7 @@ static void do_signal(struct pt_regs *re + * X86_32: vm86 regs switched out by assembly code before reaching + * here, so testing against kernel CS suffices. + */ +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + return; + + if (current_thread_info()->status & TS_RESTORE_SIGMASK) +diff -urNp linux-2.6.29/arch/x86/kernel/smpboot.c linux-2.6.29/arch/x86/kernel/smpboot.c +--- linux-2.6.29/arch/x86/kernel/smpboot.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/smpboot.c 2009-03-28 14:26:19.000000000 -0400 +@@ -806,6 +806,11 @@ static int __cpuinit do_boot_cpu(int api + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + INIT_WORK(&c_idle.work, do_fork_idle); + + #ifdef CONFIG_X86_64 +@@ -856,7 +861,17 @@ do_rest: + cpu_pda(cpu)->pcurrent = c_idle.idle; + clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + #endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + initial_code = (unsigned long)start_secondary; + stack_start.sp = (void *) c_idle.idle->thread.sp; + +diff -urNp linux-2.6.29/arch/x86/kernel/smpcommon.c linux-2.6.29/arch/x86/kernel/smpcommon.c +--- linux-2.6.29/arch/x86/kernel/smpcommon.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/smpcommon.c 2009-03-28 14:26:19.000000000 -0400 +@@ -3,9 +3,10 @@ + */ + #include + #include ++#include + + #ifdef CONFIG_X86_32 +-DEFINE_PER_CPU(unsigned long, this_cpu_off); ++DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_start; + EXPORT_PER_CPU_SYMBOL(this_cpu_off); + + /* +@@ -15,16 +16,19 @@ EXPORT_PER_CPU_SYMBOL(this_cpu_off); + */ + __cpuinit void init_gdt(int cpu) + { +- struct desc_struct gdt; ++ struct desc_struct d, *gdt = get_cpu_gdt_table(cpu); ++ unsigned long base, limit; + +- pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, +- 0x2 | DESCTYPE_S, 0x8); +- gdt.s = 1; ++ base = per_cpu_offset(cpu); ++ limit = PERCPU_ENOUGH_ROOM - 1; ++ if (limit < 64*1024) ++ pack_descriptor(&d, base, limit, 0x80 | DESCTYPE_S | 0x3, 0x4); ++ else ++ pack_descriptor(&d, base, limit >> PAGE_SHIFT, 0x80 | DESCTYPE_S | 0x3, 0xC); + +- write_gdt_entry(get_cpu_gdt_table(cpu), +- GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); ++ write_gdt_entry(gdt, GDT_ENTRY_PERCPU, &d, DESCTYPE_S); + +- per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; ++ per_cpu(this_cpu_off, cpu) = base; + per_cpu(cpu_number, cpu) = cpu; + } + #endif +diff -urNp linux-2.6.29/arch/x86/kernel/step.c linux-2.6.29/arch/x86/kernel/step.c +--- linux-2.6.29/arch/x86/kernel/step.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/step.c 2009-03-28 14:26:19.000000000 -0400 +@@ -23,22 +23,20 @@ unsigned long convert_ip_to_linear(struc + * and APM bios ones we just ignore here. + */ + if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { +- u32 *desc; ++ struct desc_struct *desc; + unsigned long base; + +- seg &= ~7UL; ++ seg >>= 3; + + mutex_lock(&child->mm->context.lock); +- if (unlikely((seg >> 3) >= child->mm->context.size)) +- addr = -1L; /* bogus selector, access would fault */ ++ if (unlikely(seg >= child->mm->context.size)) ++ addr = -EINVAL; + else { +- desc = child->mm->context.ldt + seg; +- base = ((desc[0] >> 16) | +- ((desc[1] & 0xff) << 16) | +- (desc[1] & 0xff000000)); ++ desc = &child->mm->context.ldt[seg]; ++ base = (desc->a >> 16) | ((desc->b & 0xff) << 16) | (desc->b & 0xff000000); + + /* 16-bit code segment? */ +- if (!((desc[1] >> 22) & 1)) ++ if (!((desc->b >> 22) & 1)) + addr &= 0xffff; + addr += base; + } +@@ -54,6 +52,9 @@ static int is_setting_trap_flag(struct t + unsigned char opcode[15]; + unsigned long addr = convert_ip_to_linear(child, regs); + ++ if (addr == -EINVAL) ++ return 0; ++ + copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); + for (i = 0; i < copied; i++) { + switch (opcode[i]) { +@@ -75,7 +76,7 @@ static int is_setting_trap_flag(struct t + + #ifdef CONFIG_X86_64 + case 0x40 ... 0x4f: +- if (regs->cs != __USER_CS) ++ if ((regs->cs & 0xffff) != __USER_CS) + /* 32-bit mode: register increment */ + return 0; + /* 64-bit mode: REX prefix */ +diff -urNp linux-2.6.29/arch/x86/kernel/syscall_table_32.S linux-2.6.29/arch/x86/kernel/syscall_table_32.S +--- linux-2.6.29/arch/x86/kernel/syscall_table_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/syscall_table_32.S 2009-03-28 14:26:19.000000000 -0400 +@@ -1,3 +1,4 @@ ++.section .rodata,"a",@progbits + ENTRY(sys_call_table) + .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ + .long sys_exit +diff -urNp linux-2.6.29/arch/x86/kernel/sys_i386_32.c linux-2.6.29/arch/x86/kernel/sys_i386_32.c +--- linux-2.6.29/arch/x86/kernel/sys_i386_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/sys_i386_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -24,6 +24,21 @@ + + #include + ++int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) ++{ ++ unsigned long pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size || addr > pax_task_size - len) ++ return -EINVAL; ++ ++ return 0; ++} ++ + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +@@ -83,6 +98,205 @@ out: + return err; + } + ++unsigned long ++arch_get_unmapped_area(struct file *filp, unsigned long addr, ++ unsigned long len, unsigned long pgoff, unsigned long flags) ++{ ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ unsigned long start_addr, pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) ++ return addr; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ ++ if (addr) { ++ addr = PAGE_ALIGN(addr); ++ vma = find_vma(mm, addr); ++ if (pax_task_size - len >= addr && ++ (!vma || addr + len <= vma->vm_start)) ++ return addr; ++ } ++ if (len > mm->cached_hole_size) { ++ start_addr = addr = mm->free_area_cache; ++ } else { ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; ++ } ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!nx_enabled && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { ++ start_addr = 0x00110000UL; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ start_addr += mm->delta_mmap & 0x03FFF000UL; ++#endif ++ ++ if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) ++ start_addr = addr = mm->mmap_base; ++ else ++ addr = start_addr; ++ } ++#endif ++ ++full_search: ++ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { ++ /* At this point: (!vma || addr < vma->vm_end). */ ++ if (pax_task_size - len < addr) { ++ /* ++ * Start a new search - just in case we missed ++ * some holes. ++ */ ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; ++ goto full_search; ++ } ++ return -ENOMEM; ++ } ++ if (!vma || addr + len <= vma->vm_start) { ++ /* ++ * Remember the place where we stopped the search: ++ */ ++ mm->free_area_cache = addr + len; ++ return addr; ++ } ++ if (addr + mm->cached_hole_size < vma->vm_start) ++ mm->cached_hole_size = vma->vm_start - addr; ++ addr = vma->vm_end; ++ if (mm->start_brk <= addr && addr < mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; ++ goto full_search; ++ } ++ } ++} ++ ++unsigned long ++arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, ++ const unsigned long len, const unsigned long pgoff, ++ const unsigned long flags) ++{ ++ struct vm_area_struct *vma; ++ struct mm_struct *mm = current->mm; ++ unsigned long base = mm->mmap_base, addr = addr0, pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ /* requested length too big for entire address space */ ++ if (len > pax_task_size) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) ++ return addr; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!nx_enabled && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) ++ goto bottomup; ++#endif ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ ++ /* requesting a specific address */ ++ if (addr) { ++ addr = PAGE_ALIGN(addr); ++ vma = find_vma(mm, addr); ++ if (pax_task_size - len >= addr && ++ (!vma || addr + len <= vma->vm_start)) ++ return addr; ++ } ++ ++ /* check if free_area_cache is useful for us */ ++ if (len <= mm->cached_hole_size) { ++ mm->cached_hole_size = 0; ++ mm->free_area_cache = mm->mmap_base; ++ } ++ ++ /* either no address requested or can't fit in requested address hole */ ++ addr = mm->free_area_cache; ++ ++ /* make sure it can fit in the remaining address space */ ++ if (addr > len) { ++ vma = find_vma(mm, addr-len); ++ if (!vma || addr <= vma->vm_start) ++ /* remember the address as a hint for next time */ ++ return (mm->free_area_cache = addr-len); ++ } ++ ++ if (mm->mmap_base < len) ++ goto bottomup; ++ ++ addr = mm->mmap_base-len; ++ ++ do { ++ /* ++ * Lookup failure means no vma is above this address, ++ * else if new region fits below vma->vm_start, ++ * return with success: ++ */ ++ vma = find_vma(mm, addr); ++ if (!vma || addr+len <= vma->vm_start) ++ /* remember the address as a hint for next time */ ++ return (mm->free_area_cache = addr); ++ ++ /* remember the largest hole we saw so far */ ++ if (addr + mm->cached_hole_size < vma->vm_start) ++ mm->cached_hole_size = vma->vm_start - addr; ++ ++ /* try just below the current vma->vm_start */ ++ addr = vma->vm_start-len; ++ } while (len < vma->vm_start); ++ ++bottomup: ++ /* ++ * A failed mmap() very likely causes application failure, ++ * so fall back to the bottom-up function here. This scenario ++ * can happen with large stack limits and large mmap() ++ * allocations. ++ */ ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; ++ else ++#endif ++ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; ++ mm->cached_hole_size = ~0UL; ++ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); ++ /* ++ * Restore the topdown base: ++ */ ++ mm->mmap_base = base; ++ mm->free_area_cache = base; ++ mm->cached_hole_size = ~0UL; ++ ++ return addr; ++} + + struct sel_arg_struct { + unsigned long n; +diff -urNp linux-2.6.29/arch/x86/kernel/sys_x86_64.c linux-2.6.29/arch/x86/kernel/sys_x86_64.c +--- linux-2.6.29/arch/x86/kernel/sys_x86_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/sys_x86_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -47,8 +47,8 @@ out: + return error; + } + +-static void find_start_end(unsigned long flags, unsigned long *begin, +- unsigned long *end) ++static void find_start_end(struct mm_struct *mm, unsigned long flags, ++ unsigned long *begin, unsigned long *end) + { + if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { + unsigned long new_begin; +@@ -67,7 +67,7 @@ static void find_start_end(unsigned long + *begin = new_begin; + } + } else { +- *begin = TASK_UNMAPPED_BASE; ++ *begin = mm->mmap_base; + *end = TASK_SIZE; + } + } +@@ -84,11 +84,15 @@ arch_get_unmapped_area(struct file *filp + if (flags & MAP_FIXED) + return addr; + +- find_start_end(flags, &begin, &end); ++ find_start_end(mm, flags, &begin, &end); + + if (len > end) + return -ENOMEM; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); +@@ -143,7 +147,7 @@ arch_get_unmapped_area_topdown(struct fi + { + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; +- unsigned long addr = addr0; ++ unsigned long base = mm->mmap_base, addr = addr0; + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) +@@ -156,6 +160,10 @@ arch_get_unmapped_area_topdown(struct fi + if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) + goto bottomup; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); +@@ -213,13 +221,21 @@ bottomup: + * can happen with large stack limits and large mmap() + * allocations. + */ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; +- mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ +- mm->free_area_cache = mm->mmap_base; ++ mm->mmap_base = base; ++ mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +diff -urNp linux-2.6.29/arch/x86/kernel/time_32.c linux-2.6.29/arch/x86/kernel/time_32.c +--- linux-2.6.29/arch/x86/kernel/time_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/time_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -47,22 +47,32 @@ unsigned long profile_pc(struct pt_regs + unsigned long pc = instruction_pointer(regs); + + #ifdef CONFIG_SMP +- if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++ if (!user_mode(regs) && in_lock_functions(pc)) { + #ifdef CONFIG_FRAME_POINTER +- return *(unsigned long *)(regs->bp + sizeof(long)); ++ return ktla_ktva(*(unsigned long *)(regs->bp + sizeof(long))); + #else + unsigned long *sp = (unsigned long *)®s->sp; + + /* Return address is either directly at stack pointer + or above a saved flags. Eflags has bits 22-31 zero, + kernel addresses don't. */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ return ktla_ktva(sp[0]); ++#else + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; + #endif ++ ++#endif + } + #endif ++ ++ if (!user_mode(regs)) ++ pc = ktla_ktva(pc); ++ + return pc; + } + EXPORT_SYMBOL(profile_pc); +diff -urNp linux-2.6.29/arch/x86/kernel/time_64.c linux-2.6.29/arch/x86/kernel/time_64.c +--- linux-2.6.29/arch/x86/kernel/time_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/time_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -34,7 +34,7 @@ unsigned long profile_pc(struct pt_regs + /* Assume the lock function has either no stack frame or a copy + of flags from PUSHF + Eflags always has bits 22 and up cleared unlike kernel addresses. */ +- if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++ if (!user_mode(regs) && in_lock_functions(pc)) { + #ifdef CONFIG_FRAME_POINTER + return *(unsigned long *)(regs->bp + sizeof(long)); + #else +diff -urNp linux-2.6.29/arch/x86/kernel/tlb_32.c linux-2.6.29/arch/x86/kernel/tlb_32.c +--- linux-2.6.29/arch/x86/kernel/tlb_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/tlb_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -5,7 +5,7 @@ + #include + + DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) +- ____cacheline_aligned = { &init_mm, 0, }; ++ ____cacheline_aligned = { &init_mm, 0, {0} }; + + /* must come after the send_IPI functions above for inlining */ + #include +diff -urNp linux-2.6.29/arch/x86/kernel/tls.c linux-2.6.29/arch/x86/kernel/tls.c +--- linux-2.6.29/arch/x86/kernel/tls.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/tls.c 2009-03-28 14:26:19.000000000 -0400 +@@ -85,6 +85,11 @@ int do_set_thread_area(struct task_struc + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((p->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) ++ return -EINVAL; ++#endif ++ + set_tls_desc(p, idx, &info, 1); + + return 0; +diff -urNp linux-2.6.29/arch/x86/kernel/traps.c linux-2.6.29/arch/x86/kernel/traps.c +--- linux-2.6.29/arch/x86/kernel/traps.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/traps.c 2009-03-28 14:26:19.000000000 -0400 +@@ -71,14 +71,6 @@ asmlinkage int system_call(void); + + /* Do we ignore FPU interrupts ? */ + char ignore_fpu_irq; +- +-/* +- * The IDT has to be page-aligned to simplify the Pentium +- * F0 0F bug workaround.. We have a special link segment +- * for this. +- */ +-gate_desc idt_table[256] +- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; + #endif + + DECLARE_BITMAP(used_vectors, NR_VECTORS); +@@ -116,7 +108,7 @@ static inline void preempt_conditional_c + static inline void + die_if_kernel(const char *str, struct pt_regs *regs, long err) + { +- if (!user_mode_vm(regs)) ++ if (!user_mode(regs)) + die(str, regs, err); + } + +@@ -133,7 +125,7 @@ static int lazy_iobitmap_copy(void) + int cpu; + + cpu = get_cpu(); +- tss = &per_cpu(init_tss, cpu); ++ tss = init_tss + cpu; + thread = ¤t->thread; + + if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && +@@ -169,7 +161,7 @@ do_trap(int trapnr, int signr, char *str + struct task_struct *tsk = current; + + #ifdef CONFIG_X86_32 +- if (regs->flags & X86_VM_MASK) { ++ if (v8086_mode(regs)) { + /* + * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * On nmi (interrupt 2), do_trap should not be called. +@@ -180,7 +172,7 @@ do_trap(int trapnr, int signr, char *str + } + #endif + +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + goto kernel_trap; + + #ifdef CONFIG_X86_32 +@@ -222,6 +214,12 @@ kernel_trap: + tsk->thread.trap_no = trapnr; + die(str, regs, error_code); + } ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ if (trapnr == 4) ++ pax_report_refcount_overflow(regs); ++#endif ++ + return; + + #ifdef CONFIG_X86_32 +@@ -315,14 +313,30 @@ do_general_protection(struct pt_regs *re + return; + } + +- if (regs->flags & X86_VM_MASK) ++ if (v8086_mode(regs)) + goto gp_in_vm86; + #endif + + tsk = current; +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + goto gp_in_kernel; + ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) ++ if (!nx_enabled && tsk->mm && (tsk->mm->pax_flags & MF_PAX_PAGEEXEC)) { ++ struct mm_struct *mm = tsk->mm; ++ unsigned long limit; ++ ++ down_write(&mm->mmap_sem); ++ limit = mm->context.user_cs_limit; ++ if (limit < TASK_SIZE) { ++ track_exec_limit(mm, limit, TASK_SIZE, VM_EXEC); ++ up_write(&mm->mmap_sem); ++ return; ++ } ++ up_write(&mm->mmap_sem); ++ } ++#endif ++ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + +@@ -355,6 +369,13 @@ gp_in_kernel: + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ if ((regs->cs & 0xFFFF) == __KERNEL_CS) ++ die("PAX: suspicious general protection fault", regs, error_code); ++ else ++#endif ++ + die("general protection fault", regs, error_code); + } + +@@ -601,7 +622,7 @@ dotraplinkage void __kprobes do_debug(st + } + + #ifdef CONFIG_X86_32 +- if (regs->flags & X86_VM_MASK) ++ if (v8086_mode(regs)) + goto debug_vm86; + #endif + +@@ -613,7 +634,7 @@ dotraplinkage void __kprobes do_debug(st + * kernel space (but re-enable TF when returning to user mode). + */ + if (condition & DR_STEP) { +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + goto clear_TF_reenable; + } + +@@ -800,7 +821,7 @@ do_simd_coprocessor_error(struct pt_regs + * Handle strange cache flush from user space exception + * in all other cases. This is undocumented behaviour. + */ +- if (regs->flags & X86_VM_MASK) { ++ if (v8086_mode(regs)) { + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); + return; + } +@@ -829,19 +850,14 @@ do_spurious_interrupt_bug(struct pt_regs + #ifdef CONFIG_X86_32 + unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) + { +- struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; +- __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; ++ struct desc_struct ss; + + /* Set up base for espfix segment */ +- desc &= 0x00f0ff0000000000ULL; +- desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | +- ((((__u64)base) << 32) & 0xff00000000000000ULL) | +- ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | +- (lim_pages & 0xffff); +- *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; ++ pack_descriptor(&ss, base, lim_pages, 0x93, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(smp_processor_id()), GDT_ENTRY_ESPFIX_SS, &ss, DESCTYPE_S); + + return new_kesp; + } +diff -urNp linux-2.6.29/arch/x86/kernel/tsc.c linux-2.6.29/arch/x86/kernel/tsc.c +--- linux-2.6.29/arch/x86/kernel/tsc.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/tsc.c 2009-03-28 14:26:19.000000000 -0400 +@@ -765,7 +765,7 @@ static struct dmi_system_id __initdata b + DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), + }, + }, +- {} ++ { NULL, NULL, {{0, {0}}}, NULL} + }; + + static void __init check_system_tsc_reliable(void) +diff -urNp linux-2.6.29/arch/x86/kernel/vm86_32.c linux-2.6.29/arch/x86/kernel/vm86_32.c +--- linux-2.6.29/arch/x86/kernel/vm86_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/vm86_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -148,7 +148,7 @@ struct pt_regs *save_v86_state(struct ke + do_exit(SIGSEGV); + } + +- tss = &per_cpu(init_tss, get_cpu()); ++ tss = init_tss + get_cpu(); + current->thread.sp0 = current->thread.saved_sp0; + current->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, ¤t->thread); +@@ -325,7 +325,7 @@ static void do_sys_vm86(struct kernel_vm + tsk->thread.saved_fs = info->regs32->fs; + savesegment(gs, tsk->thread.saved_gs); + +- tss = &per_cpu(init_tss, get_cpu()); ++ tss = init_tss + get_cpu(); + tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + if (cpu_has_sep) + tsk->thread.sysenter_cs = 0; +diff -urNp linux-2.6.29/arch/x86/kernel/vmi_32.c linux-2.6.29/arch/x86/kernel/vmi_32.c +--- linux-2.6.29/arch/x86/kernel/vmi_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/vmi_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -102,18 +102,43 @@ static unsigned patch_internal(int call, + { + u64 reloc; + struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + reloc = call_vrom_long_func(vmi_rom, get_reloc, call); + switch(rel->type) { + case VMI_RELOCATION_CALL_REL: + BUG_ON(len < 5); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(char *)insnbuf = MNEM_CALL; + patch_offset(insnbuf, ip, (unsigned long)rel->eip); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + return 5; + + case VMI_RELOCATION_JUMP_REL: + BUG_ON(len < 5); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(char *)insnbuf = MNEM_JMP; + patch_offset(insnbuf, ip, (unsigned long)rel->eip); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + return 5; + + case VMI_RELOCATION_NOP: +@@ -409,13 +434,13 @@ static void vmi_set_pud(pud_t *pudp, pud + + static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { +- const pte_t pte = { .pte = 0 }; ++ const pte_t pte = __pte(0ULL); + vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); + } + + static void vmi_pmd_clear(pmd_t *pmd) + { +- const pte_t pte = { .pte = 0 }; ++ const pte_t pte = __pte(0ULL); + vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); + } + #endif +@@ -443,8 +468,8 @@ vmi_startup_ipi_hook(int phys_apicid, un + ap.ss = __KERNEL_DS; + ap.esp = (unsigned long) start_esp; + +- ap.ds = __USER_DS; +- ap.es = __USER_DS; ++ ap.ds = __KERNEL_DS; ++ ap.es = __KERNEL_DS; + ap.fs = __KERNEL_PERCPU; + ap.gs = 0; + +@@ -639,12 +664,20 @@ static inline int __init activate_vmi(vo + u64 reloc; + const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + if (call_vrom_func(vmi_rom, vmi_init) != 0) { + printk(KERN_ERR "VMI ROM failed to initialize!"); + return 0; + } + savesegment(cs, kernel_cs); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + pv_info.paravirt_enabled = 1; + pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; + pv_info.name = "vmi"; +@@ -835,6 +868,10 @@ static inline int __init activate_vmi(vo + + para_fill(pv_irq_ops.safe_halt, Halt); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * Alternative instruction rewriting doesn't happen soon enough + * to convert VMI_IRET to a call instead of a jump; so we have +diff -urNp linux-2.6.29/arch/x86/kernel/vmlinux_32.lds.S linux-2.6.29/arch/x86/kernel/vmlinux_32.lds.S +--- linux-2.6.29/arch/x86/kernel/vmlinux_32.lds.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/vmlinux_32.lds.S 2009-03-28 14:26:19.000000000 -0400 +@@ -15,6 +15,20 @@ + #include + #include + #include ++#include ++ ++#ifdef CONFIG_X86_PAE ++#define PMD_SHIFT 21 ++#else ++#define PMD_SHIFT 22 ++#endif ++#define PMD_SIZE (1 << PMD_SHIFT) ++ ++#ifdef CONFIG_PAX_KERNEXEC ++#define __KERNEL_TEXT_OFFSET (__PAGE_OFFSET + (((____LOAD_PHYSICAL_ADDR + 2*(PMD_SIZE - 1)) - 1) & ~(PMD_SIZE - 1))) ++#else ++#define __KERNEL_TEXT_OFFSET 0 ++#endif + + OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") + OUTPUT_ARCH(i386) +@@ -22,82 +36,23 @@ ENTRY(phys_startup_32) + jiffies = jiffies_64; + + PHDRS { +- text PT_LOAD FLAGS(5); /* R_E */ +- data PT_LOAD FLAGS(7); /* RWE */ +- note PT_NOTE FLAGS(0); /* ___ */ ++ initdata PT_LOAD FLAGS(6); /* RW_ */ ++ percpu PT_LOAD FLAGS(6); /* RW_ */ ++ inittext PT_LOAD FLAGS(5); /* R_E */ ++ text PT_LOAD FLAGS(5); /* R_E */ ++ rodata PT_LOAD FLAGS(4); /* R__ */ ++ data PT_LOAD FLAGS(6); /* RW_ */ ++ note PT_NOTE FLAGS(0); /* ___ */ + } + SECTIONS + { +- . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; +- phys_startup_32 = startup_32 - LOAD_OFFSET; +- +- .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { +- _text = .; /* Text and read-only data */ +- *(.text.head) +- } :text = 0x9090 +- +- /* read-only */ +- .text : AT(ADDR(.text) - LOAD_OFFSET) { +- . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ +- *(.text.page_aligned) +- TEXT_TEXT +- SCHED_TEXT +- LOCK_TEXT +- KPROBES_TEXT +- IRQENTRY_TEXT +- *(.fixup) +- *(.gnu.warning) +- _etext = .; /* End of text section */ +- } :text = 0x9090 +- +- NOTES :text :note ++ . = LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR; + +- . = ALIGN(16); /* Exception table */ +- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { +- __start___ex_table = .; +- *(__ex_table) +- __stop___ex_table = .; +- } :text = 0x9090 +- +- RODATA +- +- /* writeable */ +- . = ALIGN(PAGE_SIZE); +- .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ +- DATA_DATA +- CONSTRUCTORS +- } :data +- +- . = ALIGN(PAGE_SIZE); +- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { +- __nosave_begin = .; +- *(.data.nosave) +- . = ALIGN(PAGE_SIZE); +- __nosave_end = .; +- } +- +- . = ALIGN(PAGE_SIZE); +- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { +- *(.data.page_aligned) +- *(.data.idt) +- } +- +- . = ALIGN(32); +- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +- *(.data.cacheline_aligned) +- } +- +- /* rarely changed data like cpu maps */ +- . = ALIGN(32); +- .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { +- *(.data.read_mostly) +- _edata = .; /* End of data section */ +- } +- +- . = ALIGN(THREAD_SIZE); /* init_task */ +- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { +- *(.data.init_task) +- } ++ .text.startup : AT(ADDR(.text.startup) - LOAD_OFFSET) { ++ __LOAD_PHYSICAL_ADDR = . - LOAD_OFFSET; ++ phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; ++ *(.text.startup) ++ } :initdata + + /* might get freed after init */ + . = ALIGN(PAGE_SIZE); +@@ -115,14 +70,8 @@ SECTIONS + . = ALIGN(PAGE_SIZE); + + /* will be freed after init */ +- . = ALIGN(PAGE_SIZE); /* Init code and data */ +- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { +- __init_begin = .; +- _sinittext = .; +- INIT_TEXT +- _einittext = .; +- } + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { ++ __init_begin = .; + INIT_DATA + } + . = ALIGN(16); +@@ -162,11 +111,6 @@ SECTIONS + *(.parainstructions) + __parainstructions_end = .; + } +- /* .exit.text is discard at runtime, not link time, to deal with references +- from .altinstructions and .eh_frame */ +- .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { +- EXIT_TEXT +- } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } +@@ -179,18 +123,139 @@ SECTIONS + } + #endif + . = ALIGN(PAGE_SIZE); +- .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { +- __per_cpu_start = .; +- *(.data.percpu.page_aligned) ++ per_cpu_start = .; ++ .data.percpu (0) : AT(ADDR(.data.percpu) - LOAD_OFFSET + per_cpu_start) { ++ __per_cpu_start = . + per_cpu_start; ++ LONG(0) + *(.data.percpu) + *(.data.percpu.shared_aligned) +- __per_cpu_end = .; +- } ++ . = ALIGN(PAGE_SIZE); ++ *(.data.percpu.page_aligned) ++ __per_cpu_end = . + per_cpu_start; ++ } :percpu ++ . += per_cpu_start; + . = ALIGN(PAGE_SIZE); + /* freed after init ends here */ + ++ . = ALIGN(PAGE_SIZE); /* Init code and data */ ++ .init.text (. - __KERNEL_TEXT_OFFSET) : AT(ADDR(.init.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ _sinittext = .; ++ INIT_TEXT ++ _einittext = .; ++ } :inittext ++ ++ /* .exit.text is discard at runtime, not link time, to deal with references ++ from .altinstructions and .eh_frame */ ++ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ EXIT_TEXT ++ } ++ ++ .filler : AT(ADDR(.filler) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ BYTE(0) ++ . = ALIGN(2*PMD_SIZE) - 1; ++ } ++ ++ /* freed after init ends here */ ++ ++ .text.head : AT(ADDR(.text.head) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ __init_end = . + __KERNEL_TEXT_OFFSET; ++ KERNEL_TEXT_OFFSET = . + __KERNEL_TEXT_OFFSET; ++ _text = .; /* Text and read-only data */ ++ *(.text.head) ++ } :text = 0x9090 ++ ++ /* read-only */ ++ .text : AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ ++ *(.text.page_aligned) ++ TEXT_TEXT ++ SCHED_TEXT ++ LOCK_TEXT ++ KPROBES_TEXT ++ IRQENTRY_TEXT ++ *(.fixup) ++ *(.gnu.warning) ++ _etext = .; /* End of text section */ ++ } :text = 0x9090 ++ ++ . += __KERNEL_TEXT_OFFSET; ++ ++ . = ALIGN(4096); ++ NOTES :rodata :note ++ ++ . = ALIGN(16); /* Exception table */ ++ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { ++ __start___ex_table = .; ++ *(__ex_table) ++ __stop___ex_table = .; ++ } :rodata ++ ++ RO_DATA(PAGE_SIZE) ++ ++ . = ALIGN(PAGE_SIZE); ++ .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { ++ *(.idt) ++ . = ALIGN(PAGE_SIZE); ++ *(.empty_zero_page) ++ *(.swapper_pg_pmd) ++ *(.swapper_pg_dir) ++ ++#if defined(CONFIG_PAX_KERNEXEC) && !defined(CONFIG_MODULES) ++ . = ALIGN(PMD_SIZE); ++#endif ++ ++ } ++ ++#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_MODULES) ++ . = ALIGN(PAGE_SIZE); ++ .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { ++ MODULES_VADDR = .; ++ BYTE(0) ++ . += (6 * 1024 * 1024); ++ . = ALIGN(PMD_SIZE); ++ MODULES_END = . - 1; ++ } ++#endif ++ ++ /* writeable */ ++ . = ALIGN(PAGE_SIZE); ++ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ ++ _data = .; ++ DATA_DATA ++ CONSTRUCTORS ++ } :data ++ ++ . = ALIGN(PAGE_SIZE); ++ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { ++ __nosave_begin = .; ++ *(.data.nosave) ++ . = ALIGN(PAGE_SIZE); ++ __nosave_end = .; ++ } ++ ++ . = ALIGN(PAGE_SIZE); ++ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { ++ *(.data.page_aligned) ++ } ++ ++ . = ALIGN(32); ++ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { ++ *(.data.cacheline_aligned) ++ } ++ ++ /* rarely changed data like cpu maps */ ++ . = ALIGN(32); ++ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { ++ *(.data.read_mostly) ++ _edata = .; /* End of data section */ ++ } ++ ++ . = ALIGN(THREAD_SIZE); /* init_task */ ++ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { ++ *(.data.init_task) ++ } ++ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { +- __init_end = .; + __bss_start = .; /* BSS */ + *(.bss.page_aligned) + *(.bss) +diff -urNp linux-2.6.29/arch/x86/kernel/vmlinux_64.lds.S linux-2.6.29/arch/x86/kernel/vmlinux_64.lds.S +--- linux-2.6.29/arch/x86/kernel/vmlinux_64.lds.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/vmlinux_64.lds.S 2009-03-28 14:26:19.000000000 -0400 +@@ -16,7 +16,7 @@ jiffies_64 = jiffies; + _proxy_pda = 1; + PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ +- data PT_LOAD FLAGS(7); /* RWE */ ++ data PT_LOAD FLAGS(6); /* RW_ */ + user PT_LOAD FLAGS(7); /* RWE */ + data.init PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(0); /* ___ */ +@@ -50,17 +50,20 @@ SECTIONS + __stop___ex_table = .; + } :text = 0x9090 + +- RODATA ++ RO_DATA(PAGE_SIZE) + ++#ifdef CONFIG_PAX_KERNEXEC ++ . = ALIGN(2*1024*1024); /* Align data segment to PMD size boundary */ ++#else + . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ ++#endif + /* Data */ ++ _data = .; + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS + } :data + +- _edata = .; /* End of data section */ +- + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +@@ -71,9 +74,27 @@ SECTIONS + *(.data.read_mostly) + } + ++ . = ALIGN(THREAD_SIZE); /* init_task */ ++ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { ++ *(.data.init_task) ++ } ++ ++ . = ALIGN(PAGE_SIZE); ++ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { ++ *(.data.page_aligned) ++ } ++ ++ . = ALIGN(PAGE_SIZE); ++ __nosave_begin = .; ++ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } ++ . = ALIGN(PAGE_SIZE); ++ __nosave_end = .; ++ ++ _edata = .; /* End of data section */ ++ + #define VSYSCALL_ADDR (-10*1024*1024) +-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) ++#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data_nosave) + SIZEOF(.data_nosave) + 4095) & ~(4095)) ++#define VSYSCALL_VIRT_ADDR ((ADDR(.data_nosave) + SIZEOF(.data_nosave) + 4095) & ~(4095)) + + #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) + #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) +@@ -121,23 +142,13 @@ SECTIONS + #undef VVIRT_OFFSET + #undef VVIRT + +- . = ALIGN(THREAD_SIZE); /* init_task */ +- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { +- *(.data.init_task) +- }:data.init +- +- . = ALIGN(PAGE_SIZE); +- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { +- *(.data.page_aligned) +- } +- + /* might get freed after init */ + . = ALIGN(PAGE_SIZE); + __smp_alt_begin = .; + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) +- } ++ } :data.init + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + __smp_alt_end = .; +@@ -213,16 +224,11 @@ SECTIONS + . = ALIGN(PAGE_SIZE); + __init_end = .; + +- . = ALIGN(PAGE_SIZE); +- __nosave_begin = .; +- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } +- . = ALIGN(PAGE_SIZE); +- __nosave_end = .; +- + __bss_start = .; /* BSS */ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + *(.bss.page_aligned) + *(.bss) ++ . = ALIGN(2*1024*1024); + } + __bss_stop = .; + +diff -urNp linux-2.6.29/arch/x86/kernel/vsyscall_64.c linux-2.6.29/arch/x86/kernel/vsyscall_64.c +--- linux-2.6.29/arch/x86/kernel/vsyscall_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kernel/vsyscall_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -248,13 +248,13 @@ static ctl_table kernel_table2[] = { + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = vsyscall_sysctl_change }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static ctl_table kernel_root_table2[] = { + { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, + .child = kernel_table2 }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + #endif + +diff -urNp linux-2.6.29/arch/x86/kvm/svm.c linux-2.6.29/arch/x86/kvm/svm.c +--- linux-2.6.29/arch/x86/kvm/svm.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kvm/svm.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1509,7 +1509,19 @@ static void reload_tss(struct kvm_vcpu * + int cpu = raw_smp_processor_id(); + + struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + load_TR_desc(); + } + +@@ -1920,7 +1932,7 @@ static int svm_get_mt_mask_shift(void) + return 0; + } + +-static struct kvm_x86_ops svm_x86_ops = { ++static const struct kvm_x86_ops svm_x86_ops = { + .cpu_has_kvm_support = has_svm, + .disabled_by_bios = is_disabled, + .hardware_setup = svm_hardware_setup, +diff -urNp linux-2.6.29/arch/x86/kvm/vmx.c linux-2.6.29/arch/x86/kvm/vmx.c +--- linux-2.6.29/arch/x86/kvm/vmx.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kvm/vmx.c 2009-03-28 14:26:19.000000000 -0400 +@@ -497,9 +497,23 @@ static void reload_tss(void) + struct descriptor_table gdt; + struct desc_struct *descs; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + kvm_get_gdt(&gdt); + descs = (void *)gdt.base; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + load_TR_desc(); + } + +@@ -2188,7 +2202,7 @@ static int vmx_vcpu_setup(struct vcpu_vm + vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ + + asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); +- vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ ++ vmcs_writel(HOST_RIP, ktla_ktva(kvm_vmx_return)); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); +@@ -3385,6 +3399,12 @@ static void vmx_vcpu_run(struct kvm_vcpu + "jmp .Lkvm_vmx_return \n\t" + ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" + ".Lkvm_vmx_return: " ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ "ljmp %[cs],$.Lkvm_vmx_return2\n\t" ++ ".Lkvm_vmx_return2: " ++#endif ++ + /* Save guest registers, load host registers, keep flags */ + "xchg %0, (%%"R"sp) \n\t" + "mov %%"R"ax, %c[rax](%0) \n\t" +@@ -3431,6 +3451,11 @@ static void vmx_vcpu_run(struct kvm_vcpu + [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), + #endif + [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ ,[cs]"i"(__KERNEL_CS) ++#endif ++ + : "cc", "memory" + , R"bx", R"di", R"si" + #ifdef CONFIG_X86_64 +@@ -3447,7 +3472,7 @@ static void vmx_vcpu_run(struct kvm_vcpu + + vmx_update_window_states(vcpu); + +- asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); ++ asm("mov %0, %%ds; mov %0, %%es" : : "r"(__KERNEL_DS)); + vmx->launched = 1; + + intr_info = vmcs_read32(VM_EXIT_INTR_INFO); +@@ -3576,7 +3601,7 @@ static int vmx_get_mt_mask_shift(void) + return VMX_EPT_MT_EPTE_SHIFT; + } + +-static struct kvm_x86_ops vmx_x86_ops = { ++static const struct kvm_x86_ops vmx_x86_ops = { + .cpu_has_kvm_support = cpu_has_kvm_support, + .disabled_by_bios = vmx_disabled_by_bios, + .hardware_setup = hardware_setup, +diff -urNp linux-2.6.29/arch/x86/kvm/x86.c linux-2.6.29/arch/x86/kvm/x86.c +--- linux-2.6.29/arch/x86/kvm/x86.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/kvm/x86.c 2009-03-28 14:26:19.000000000 -0400 +@@ -70,44 +70,44 @@ static u64 __read_mostly efer_reserved_b + static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); + +-struct kvm_x86_ops *kvm_x86_ops; ++const struct kvm_x86_ops *kvm_x86_ops; + EXPORT_SYMBOL_GPL(kvm_x86_ops); + + struct kvm_stats_debugfs_item debugfs_entries[] = { +- { "pf_fixed", VCPU_STAT(pf_fixed) }, +- { "pf_guest", VCPU_STAT(pf_guest) }, +- { "tlb_flush", VCPU_STAT(tlb_flush) }, +- { "invlpg", VCPU_STAT(invlpg) }, +- { "exits", VCPU_STAT(exits) }, +- { "io_exits", VCPU_STAT(io_exits) }, +- { "mmio_exits", VCPU_STAT(mmio_exits) }, +- { "signal_exits", VCPU_STAT(signal_exits) }, +- { "irq_window", VCPU_STAT(irq_window_exits) }, +- { "nmi_window", VCPU_STAT(nmi_window_exits) }, +- { "halt_exits", VCPU_STAT(halt_exits) }, +- { "halt_wakeup", VCPU_STAT(halt_wakeup) }, +- { "hypercalls", VCPU_STAT(hypercalls) }, +- { "request_irq", VCPU_STAT(request_irq_exits) }, +- { "request_nmi", VCPU_STAT(request_nmi_exits) }, +- { "irq_exits", VCPU_STAT(irq_exits) }, +- { "host_state_reload", VCPU_STAT(host_state_reload) }, +- { "efer_reload", VCPU_STAT(efer_reload) }, +- { "fpu_reload", VCPU_STAT(fpu_reload) }, +- { "insn_emulation", VCPU_STAT(insn_emulation) }, +- { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, +- { "irq_injections", VCPU_STAT(irq_injections) }, +- { "nmi_injections", VCPU_STAT(nmi_injections) }, +- { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, +- { "mmu_pte_write", VM_STAT(mmu_pte_write) }, +- { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, +- { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, +- { "mmu_flooded", VM_STAT(mmu_flooded) }, +- { "mmu_recycled", VM_STAT(mmu_recycled) }, +- { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, +- { "mmu_unsync", VM_STAT(mmu_unsync) }, +- { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, +- { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, +- { "largepages", VM_STAT(lpages) }, ++ { "pf_fixed", VCPU_STAT(pf_fixed), NULL }, ++ { "pf_guest", VCPU_STAT(pf_guest), NULL }, ++ { "tlb_flush", VCPU_STAT(tlb_flush), NULL }, ++ { "invlpg", VCPU_STAT(invlpg), NULL }, ++ { "exits", VCPU_STAT(exits), NULL }, ++ { "io_exits", VCPU_STAT(io_exits), NULL }, ++ { "mmio_exits", VCPU_STAT(mmio_exits), NULL }, ++ { "signal_exits", VCPU_STAT(signal_exits), NULL }, ++ { "irq_window", VCPU_STAT(irq_window_exits), NULL }, ++ { "nmi_window", VCPU_STAT(nmi_window_exits), NULL }, ++ { "halt_exits", VCPU_STAT(halt_exits), NULL }, ++ { "halt_wakeup", VCPU_STAT(halt_wakeup), NULL }, ++ { "hypercalls", VCPU_STAT(hypercalls), NULL }, ++ { "request_irq", VCPU_STAT(request_irq_exits), NULL }, ++ { "request_nmi", VCPU_STAT(request_nmi_exits), NULL }, ++ { "irq_exits", VCPU_STAT(irq_exits), NULL }, ++ { "host_state_reload", VCPU_STAT(host_state_reload), NULL }, ++ { "efer_reload", VCPU_STAT(efer_reload), NULL }, ++ { "fpu_reload", VCPU_STAT(fpu_reload), NULL }, ++ { "insn_emulation", VCPU_STAT(insn_emulation), NULL }, ++ { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail), NULL }, ++ { "irq_injections", VCPU_STAT(irq_injections), NULL }, ++ { "nmi_injections", VCPU_STAT(nmi_injections), NULL }, ++ { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped), NULL }, ++ { "mmu_pte_write", VM_STAT(mmu_pte_write), NULL }, ++ { "mmu_pte_updated", VM_STAT(mmu_pte_updated), NULL }, ++ { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped), NULL }, ++ { "mmu_flooded", VM_STAT(mmu_flooded), NULL }, ++ { "mmu_recycled", VM_STAT(mmu_recycled), NULL }, ++ { "mmu_cache_miss", VM_STAT(mmu_cache_miss), NULL }, ++ { "mmu_unsync", VM_STAT(mmu_unsync), NULL }, ++ { "mmu_unsync_global", VM_STAT(mmu_unsync_global), NULL }, ++ { "remote_tlb_flush", VM_STAT(remote_tlb_flush), NULL }, ++ { "largepages", VM_STAT(lpages), NULL }, + { NULL } + }; + +@@ -1368,7 +1368,7 @@ static int kvm_vcpu_ioctl_set_lapic(stru + static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) + { +- if (irq->irq < 0 || irq->irq >= 256) ++ if (irq->irq >= 256) + return -EINVAL; + if (irqchip_in_kernel(vcpu->kvm)) + return -ENXIO; +@@ -2587,10 +2587,10 @@ int kvm_emulate_pio_string(struct kvm_vc + } + EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); + +-int kvm_arch_init(void *opaque) ++int kvm_arch_init(const void *opaque) + { + int r; +- struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; ++ const struct kvm_x86_ops *ops = (const struct kvm_x86_ops *)opaque; + + if (kvm_x86_ops) { + printk(KERN_ERR "kvm: already loaded the other module\n"); +diff -urNp linux-2.6.29/arch/x86/lib/checksum_32.S linux-2.6.29/arch/x86/lib/checksum_32.S +--- linux-2.6.29/arch/x86/lib/checksum_32.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/checksum_32.S 2009-03-28 14:26:19.000000000 -0400 +@@ -28,7 +28,8 @@ + #include + #include + #include +- ++#include ++ + /* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ +@@ -304,9 +305,22 @@ unsigned int csum_partial_copy_generic ( + + #define ARGBASE 16 + #define FP 12 +- +-ENTRY(csum_partial_copy_generic) ++ ++ENTRY(csum_partial_copy_generic_to_user) + CFI_STARTPROC ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp csum_partial_copy_generic ++ ++ENTRY(csum_partial_copy_generic_from_user) ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ ++ENTRY(csum_partial_copy_generic) + subl $4,%esp + CFI_ADJUST_CFA_OFFSET 4 + pushl %edi +@@ -331,7 +345,7 @@ ENTRY(csum_partial_copy_generic) + jmp 4f + SRC(1: movw (%esi), %bx ) + addl $2, %esi +-DST( movw %bx, (%edi) ) ++DST( movw %bx, %es:(%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +@@ -343,30 +357,30 @@ DST( movw %bx, (%edi) ) + SRC(1: movl (%esi), %ebx ) + SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, (%edi) ) ++DST( movl %ebx, %es:(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 4(%edi) ) ++DST( movl %edx, %es:4(%edi) ) + + SRC( movl 8(%esi), %ebx ) + SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, 8(%edi) ) ++DST( movl %ebx, %es:8(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 12(%edi) ) ++DST( movl %edx, %es:12(%edi) ) + + SRC( movl 16(%esi), %ebx ) + SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, 16(%edi) ) ++DST( movl %ebx, %es:16(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 20(%edi) ) ++DST( movl %edx, %es:20(%edi) ) + + SRC( movl 24(%esi), %ebx ) + SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, 24(%edi) ) ++DST( movl %ebx, %es:24(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 28(%edi) ) ++DST( movl %edx, %es:28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi +@@ -380,7 +394,7 @@ DST( movl %edx, 28(%edi) ) + shrl $2, %edx # This clears CF + SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +-DST( movl %ebx, (%edi) ) ++DST( movl %ebx, %es:(%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx +@@ -392,12 +406,12 @@ DST( movl %ebx, (%edi) ) + jb 5f + SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +-DST( movw %cx, (%edi) ) ++DST( movw %cx, %es:(%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx + SRC(5: movb (%esi), %cl ) +-DST( movb %cl, (%edi) ) ++DST( movb %cl, %es:(%edi) ) + 6: addl %ecx, %eax + adcl $0, %eax + 7: +@@ -408,7 +422,7 @@ DST( movb %cl, (%edi) ) + + 6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr +- movl $-EFAULT, (%ebx) ++ movl $-EFAULT, %ss:(%ebx) + + # zero the complete destination - computing the rest + # is too much work +@@ -421,11 +435,19 @@ DST( movb %cl, (%edi) ) + + 6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr +- movl $-EFAULT,(%ebx) ++ movl $-EFAULT,%ss:(%ebx) + jmp 5000b + + .previous + ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx +@@ -439,26 +461,41 @@ DST( movb %cl, (%edi) ) + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC +-ENDPROC(csum_partial_copy_generic) ++ENDPROC(csum_partial_copy_generic_to_user) + + #else + + /* Version for PentiumII/PPro */ + + #define ROUND1(x) \ ++ nop; nop; nop; \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ +- DST(movl %ebx, x(%edi) ) ; ++ DST(movl %ebx, %es:x(%edi)) ; + + #define ROUND(x) \ ++ nop; nop; nop; \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ +- DST(movl %ebx, x(%edi) ) ; ++ DST(movl %ebx, %es:x(%edi)) ; + + #define ARGBASE 12 +- +-ENTRY(csum_partial_copy_generic) ++ ++ENTRY(csum_partial_copy_generic_to_user) + CFI_STARTPROC ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp csum_partial_copy_generic ++ ++ENTRY(csum_partial_copy_generic_from_user) ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ ++ENTRY(csum_partial_copy_generic) + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 +@@ -482,7 +519,7 @@ ENTRY(csum_partial_copy_generic) + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx +- lea 3f(%ebx,%ebx), %ebx ++ lea 3f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + 1: addl $64,%esi +@@ -503,19 +540,19 @@ ENTRY(csum_partial_copy_generic) + jb 5f + SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +-DST( movw %dx, (%edi) ) ++DST( movw %dx, %es:(%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx + 5: + SRC( movb (%esi), %dl ) +-DST( movb %dl, (%edi) ) ++DST( movb %dl, %es:(%edi) ) + 6: addl %edx, %eax + adcl $0, %eax + 7: + .section .fixup, "ax" + 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr +- movl $-EFAULT, (%ebx) ++ movl $-EFAULT, %ss:(%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len +@@ -523,10 +560,18 @@ DST( movb %dl, (%edi) ) + rep; stosb + jmp 7b + 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr +- movl $-EFAULT, (%ebx) ++ movl $-EFAULT, %ss:(%ebx) + jmp 7b + .previous + ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi +@@ -538,7 +583,7 @@ DST( movb %dl, (%edi) ) + CFI_RESTORE ebx + ret + CFI_ENDPROC +-ENDPROC(csum_partial_copy_generic) ++ENDPROC(csum_partial_copy_generic_to_user) + + #undef ROUND + #undef ROUND1 +diff -urNp linux-2.6.29/arch/x86/lib/clear_page_64.S linux-2.6.29/arch/x86/lib/clear_page_64.S +--- linux-2.6.29/arch/x86/lib/clear_page_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/clear_page_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -44,7 +44,7 @@ ENDPROC(clear_page) + + #include + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp */ + .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ + 2: +diff -urNp linux-2.6.29/arch/x86/lib/copy_page_64.S linux-2.6.29/arch/x86/lib/copy_page_64.S +--- linux-2.6.29/arch/x86/lib/copy_page_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/copy_page_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -104,7 +104,7 @@ ENDPROC(copy_page) + + #include + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp */ + .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ + 2: +diff -urNp linux-2.6.29/arch/x86/lib/copy_user_64.S linux-2.6.29/arch/x86/lib/copy_user_64.S +--- linux-2.6.29/arch/x86/lib/copy_user_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/copy_user_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -21,7 +21,7 @@ + .byte 0xe9 /* 32bit jump */ + .long \orig-1f /* by default jump to orig */ + 1: +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 2: .byte 0xe9 /* near jump with 32bit immediate */ + .long \alt-1b /* offset */ /* or alternatively to alt */ + .previous +@@ -106,6 +106,8 @@ ENDPROC(__copy_from_user_inatomic) + ENTRY(bad_from_user) + bad_from_user: + CFI_STARTPROC ++ testl %edx,%edx ++ js bad_to_user + movl %edx,%ecx + xorl %eax,%eax + rep +diff -urNp linux-2.6.29/arch/x86/lib/getuser.S linux-2.6.29/arch/x86/lib/getuser.S +--- linux-2.6.29/arch/x86/lib/getuser.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/getuser.S 2009-03-28 14:26:19.000000000 -0400 +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + + .text + ENTRY(__get_user_1) +@@ -40,7 +41,19 @@ ENTRY(__get_user_1) + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 1: movzb (%_ASM_AX),%edx ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %eax,%eax + ret + CFI_ENDPROC +@@ -53,7 +66,19 @@ ENTRY(__get_user_2) + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 2: movzwl -1(%_ASM_AX),%edx ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %eax,%eax + ret + CFI_ENDPROC +@@ -66,7 +91,19 @@ ENTRY(__get_user_4) + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 3: mov -3(%_ASM_AX),%edx ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %eax,%eax + ret + CFI_ENDPROC +@@ -89,6 +126,12 @@ ENDPROC(__get_user_8) + + bad_get_user: + CFI_STARTPROC ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX + ret +diff -urNp linux-2.6.29/arch/x86/lib/memcpy_64.S linux-2.6.29/arch/x86/lib/memcpy_64.S +--- linux-2.6.29/arch/x86/lib/memcpy_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/memcpy_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -114,7 +114,7 @@ ENDPROC(__memcpy) + /* Some CPUs run faster using the string copy instructions. + It is also a lot simpler. Use this when possible */ + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp */ + .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ + 2: +diff -urNp linux-2.6.29/arch/x86/lib/memset_64.S linux-2.6.29/arch/x86/lib/memset_64.S +--- linux-2.6.29/arch/x86/lib/memset_64.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/memset_64.S 2009-03-28 14:26:19.000000000 -0400 +@@ -118,7 +118,7 @@ ENDPROC(__memset) + + #include + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp */ + .byte (memset_c - memset) - (2f - 1b) /* offset */ + 2: +diff -urNp linux-2.6.29/arch/x86/lib/mmx_32.c linux-2.6.29/arch/x86/lib/mmx_32.c +--- linux-2.6.29/arch/x86/lib/mmx_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/mmx_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -29,6 +29,7 @@ void *_mmx_memcpy(void *to, const void * + { + void *p; + int i; ++ unsigned long cr0; + + if (unlikely(in_interrupt())) + return __memcpy(to, from, len); +@@ -39,44 +40,72 @@ void *_mmx_memcpy(void *to, const void * + kernel_fpu_begin(); + + __asm__ __volatile__ ( +- "1: prefetch (%0)\n" /* This set is 28 bytes */ +- " prefetch 64(%0)\n" +- " prefetch 128(%0)\n" +- " prefetch 192(%0)\n" +- " prefetch 256(%0)\n" ++ "1: prefetch (%1)\n" /* This set is 28 bytes */ ++ " prefetch 64(%1)\n" ++ " prefetch 128(%1)\n" ++ " prefetch 192(%1)\n" ++ " prefetch 256(%1)\n" + "2: \n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ "3: \n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) +- : : "r" (from)); ++ : "=&r" (cr0) : "r" (from) : "ax"); + + for ( ; i > 5; i--) { + __asm__ __volatile__ ( +- "1: prefetch 320(%0)\n" +- "2: movq (%0), %%mm0\n" +- " movq 8(%0), %%mm1\n" +- " movq 16(%0), %%mm2\n" +- " movq 24(%0), %%mm3\n" +- " movq %%mm0, (%1)\n" +- " movq %%mm1, 8(%1)\n" +- " movq %%mm2, 16(%1)\n" +- " movq %%mm3, 24(%1)\n" +- " movq 32(%0), %%mm0\n" +- " movq 40(%0), %%mm1\n" +- " movq 48(%0), %%mm2\n" +- " movq 56(%0), %%mm3\n" +- " movq %%mm0, 32(%1)\n" +- " movq %%mm1, 40(%1)\n" +- " movq %%mm2, 48(%1)\n" +- " movq %%mm3, 56(%1)\n" ++ "1: prefetch 320(%1)\n" ++ "2: movq (%1), %%mm0\n" ++ " movq 8(%1), %%mm1\n" ++ " movq 16(%1), %%mm2\n" ++ " movq 24(%1), %%mm3\n" ++ " movq %%mm0, (%2)\n" ++ " movq %%mm1, 8(%2)\n" ++ " movq %%mm2, 16(%2)\n" ++ " movq %%mm3, 24(%2)\n" ++ " movq 32(%1), %%mm0\n" ++ " movq 40(%1), %%mm1\n" ++ " movq 48(%1), %%mm2\n" ++ " movq 56(%1), %%mm3\n" ++ " movq %%mm0, 32(%2)\n" ++ " movq %%mm1, 40(%2)\n" ++ " movq %%mm2, 48(%2)\n" ++ " movq %%mm3, 56(%2)\n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ "3:\n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) +- : : "r" (from), "r" (to) : "memory"); ++ : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); + + from += 64; + to += 64; +@@ -158,6 +187,7 @@ static void fast_clear_page(void *page) + static void fast_copy_page(void *to, void *from) + { + int i; ++ unsigned long cr0; + + kernel_fpu_begin(); + +@@ -166,42 +196,70 @@ static void fast_copy_page(void *to, voi + * but that is for later. -AV + */ + __asm__ __volatile__( +- "1: prefetch (%0)\n" +- " prefetch 64(%0)\n" +- " prefetch 128(%0)\n" +- " prefetch 192(%0)\n" +- " prefetch 256(%0)\n" ++ "1: prefetch (%1)\n" ++ " prefetch 64(%1)\n" ++ " prefetch 128(%1)\n" ++ " prefetch 192(%1)\n" ++ " prefetch 256(%1)\n" + "2: \n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ "3: \n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" +- _ASM_EXTABLE(1b, 3b) : : "r" (from)); ++ _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); + + for (i = 0; i < (4096-320)/64; i++) { + __asm__ __volatile__ ( +- "1: prefetch 320(%0)\n" +- "2: movq (%0), %%mm0\n" +- " movntq %%mm0, (%1)\n" +- " movq 8(%0), %%mm1\n" +- " movntq %%mm1, 8(%1)\n" +- " movq 16(%0), %%mm2\n" +- " movntq %%mm2, 16(%1)\n" +- " movq 24(%0), %%mm3\n" +- " movntq %%mm3, 24(%1)\n" +- " movq 32(%0), %%mm4\n" +- " movntq %%mm4, 32(%1)\n" +- " movq 40(%0), %%mm5\n" +- " movntq %%mm5, 40(%1)\n" +- " movq 48(%0), %%mm6\n" +- " movntq %%mm6, 48(%1)\n" +- " movq 56(%0), %%mm7\n" +- " movntq %%mm7, 56(%1)\n" ++ "1: prefetch 320(%1)\n" ++ "2: movq (%1), %%mm0\n" ++ " movntq %%mm0, (%2)\n" ++ " movq 8(%1), %%mm1\n" ++ " movntq %%mm1, 8(%2)\n" ++ " movq 16(%1), %%mm2\n" ++ " movntq %%mm2, 16(%2)\n" ++ " movq 24(%1), %%mm3\n" ++ " movntq %%mm3, 24(%2)\n" ++ " movq 32(%1), %%mm4\n" ++ " movntq %%mm4, 32(%2)\n" ++ " movq 40(%1), %%mm5\n" ++ " movntq %%mm5, 40(%2)\n" ++ " movq 48(%1), %%mm6\n" ++ " movntq %%mm6, 48(%2)\n" ++ " movq 56(%1), %%mm7\n" ++ " movntq %%mm7, 56(%2)\n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ "3:\n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" +- _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); ++ _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); + + from += 64; + to += 64; +@@ -280,47 +338,76 @@ static void fast_clear_page(void *page) + static void fast_copy_page(void *to, void *from) + { + int i; ++ unsigned long cr0; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( +- "1: prefetch (%0)\n" +- " prefetch 64(%0)\n" +- " prefetch 128(%0)\n" +- " prefetch 192(%0)\n" +- " prefetch 256(%0)\n" ++ "1: prefetch (%1)\n" ++ " prefetch 64(%1)\n" ++ " prefetch 128(%1)\n" ++ " prefetch 192(%1)\n" ++ " prefetch 256(%1)\n" + "2: \n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ "3: \n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" +- _ASM_EXTABLE(1b, 3b) : : "r" (from)); ++ _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); + + for (i = 0; i < 4096/64; i++) { + __asm__ __volatile__ ( +- "1: prefetch 320(%0)\n" +- "2: movq (%0), %%mm0\n" +- " movq 8(%0), %%mm1\n" +- " movq 16(%0), %%mm2\n" +- " movq 24(%0), %%mm3\n" +- " movq %%mm0, (%1)\n" +- " movq %%mm1, 8(%1)\n" +- " movq %%mm2, 16(%1)\n" +- " movq %%mm3, 24(%1)\n" +- " movq 32(%0), %%mm0\n" +- " movq 40(%0), %%mm1\n" +- " movq 48(%0), %%mm2\n" +- " movq 56(%0), %%mm3\n" +- " movq %%mm0, 32(%1)\n" +- " movq %%mm1, 40(%1)\n" +- " movq %%mm2, 48(%1)\n" +- " movq %%mm3, 56(%1)\n" ++ "1: prefetch 320(%1)\n" ++ "2: movq (%1), %%mm0\n" ++ " movq 8(%1), %%mm1\n" ++ " movq 16(%1), %%mm2\n" ++ " movq 24(%1), %%mm3\n" ++ " movq %%mm0, (%2)\n" ++ " movq %%mm1, 8(%2)\n" ++ " movq %%mm2, 16(%2)\n" ++ " movq %%mm3, 24(%2)\n" ++ " movq 32(%1), %%mm0\n" ++ " movq 40(%1), %%mm1\n" ++ " movq 48(%1), %%mm2\n" ++ " movq 56(%1), %%mm3\n" ++ " movq %%mm0, 32(%2)\n" ++ " movq %%mm1, 40(%2)\n" ++ " movq %%mm2, 48(%2)\n" ++ " movq %%mm3, 56(%2)\n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ "3:\n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) +- : : "r" (from), "r" (to) : "memory"); ++ : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); + + from += 64; + to += 64; +diff -urNp linux-2.6.29/arch/x86/lib/putuser.S linux-2.6.29/arch/x86/lib/putuser.S +--- linux-2.6.29/arch/x86/lib/putuser.S 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/putuser.S 2009-03-28 14:26:19.000000000 -0400 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + + /* +@@ -39,7 +40,19 @@ ENTRY(__put_user_1) + ENTER + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 1: movb %al,(%_ASM_CX) ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_1) +@@ -50,7 +63,19 @@ ENTRY(__put_user_2) + sub $1,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 2: movw %ax,(%_ASM_CX) ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_2) +@@ -61,7 +86,19 @@ ENTRY(__put_user_4) + sub $3,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 3: movl %eax,(%_ASM_CX) ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_4) +@@ -72,16 +109,34 @@ ENTRY(__put_user_8) + sub $7,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 4: mov %_ASM_AX,(%_ASM_CX) + #ifdef CONFIG_X86_32 + 5: movl %edx,4(%_ASM_CX) + #endif ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_8) + + bad_put_user: + CFI_STARTPROC ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + movl $-EFAULT,%eax + EXIT + END(bad_put_user) +diff -urNp linux-2.6.29/arch/x86/lib/usercopy_32.c linux-2.6.29/arch/x86/lib/usercopy_32.c +--- linux-2.6.29/arch/x86/lib/usercopy_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/lib/usercopy_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -36,31 +36,38 @@ static inline int __movsl_is_ok(unsigned + * Copy a null terminated string from userspace. + */ + +-#define __do_strncpy_from_user(dst, src, count, res) \ +-do { \ +- int __d0, __d1, __d2; \ +- might_fault(); \ +- __asm__ __volatile__( \ +- " testl %1,%1\n" \ +- " jz 2f\n" \ +- "0: lodsb\n" \ +- " stosb\n" \ +- " testb %%al,%%al\n" \ +- " jz 1f\n" \ +- " decl %1\n" \ +- " jnz 0b\n" \ +- "1: subl %1,%0\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "3: movl %5,%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- _ASM_EXTABLE(0b,3b) \ +- : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ +- "=&D" (__d2) \ +- : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ +- : "memory"); \ +-} while (0) ++static long __do_strncpy_from_user(char *dst, const char __user *src, long count) ++{ ++ int __d0, __d1, __d2; ++ long res = -EFAULT; ++ ++ might_fault(); ++ __asm__ __volatile__( ++ " movw %w10,%%ds\n" ++ " testl %1,%1\n" ++ " jz 2f\n" ++ "0: lodsb\n" ++ " stosb\n" ++ " testb %%al,%%al\n" ++ " jz 1f\n" ++ " decl %1\n" ++ " jnz 0b\n" ++ "1: subl %1,%0\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "3: movl %5,%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ _ASM_EXTABLE(0b,3b) ++ : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), ++ "=&D" (__d2) ++ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst), ++ "r"(__USER_DS) ++ : "memory"); ++ return res; ++} + + /** + * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. +@@ -85,9 +92,7 @@ do { \ + long + __strncpy_from_user(char *dst, const char __user *src, long count) + { +- long res; +- __do_strncpy_from_user(dst, src, count, res); +- return res; ++ return __do_strncpy_from_user(dst, src, count); + } + EXPORT_SYMBOL(__strncpy_from_user); + +@@ -114,7 +119,7 @@ strncpy_from_user(char *dst, const char + { + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) +- __do_strncpy_from_user(dst, src, count, res); ++ res = __do_strncpy_from_user(dst, src, count); + return res; + } + EXPORT_SYMBOL(strncpy_from_user); +@@ -123,24 +128,30 @@ EXPORT_SYMBOL(strncpy_from_user); + * Zero Userspace + */ + +-#define __do_clear_user(addr,size) \ +-do { \ +- int __d0; \ +- might_fault(); \ +- __asm__ __volatile__( \ +- "0: rep; stosl\n" \ +- " movl %2,%0\n" \ +- "1: rep; stosb\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "3: lea 0(%2,%0,4),%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- _ASM_EXTABLE(0b,3b) \ +- _ASM_EXTABLE(1b,2b) \ +- : "=&c"(size), "=&D" (__d0) \ +- : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +-} while (0) ++static unsigned long __do_clear_user(void __user *addr, unsigned long size) ++{ ++ int __d0; ++ ++ might_fault(); ++ __asm__ __volatile__( ++ " movw %w6,%%es\n" ++ "0: rep; stosl\n" ++ " movl %2,%0\n" ++ "1: rep; stosb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" ++ ".section .fixup,\"ax\"\n" ++ "3: lea 0(%2,%0,4),%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ _ASM_EXTABLE(0b,3b) ++ _ASM_EXTABLE(1b,2b) ++ : "=&c"(size), "=&D" (__d0) ++ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0), ++ "r"(__USER_DS)); ++ return size; ++} + + /** + * clear_user: - Zero a block of memory in user space. +@@ -157,7 +168,7 @@ clear_user(void __user *to, unsigned lon + { + might_fault(); + if (access_ok(VERIFY_WRITE, to, n)) +- __do_clear_user(to, n); ++ n = __do_clear_user(to, n); + return n; + } + EXPORT_SYMBOL(clear_user); +@@ -176,8 +187,7 @@ EXPORT_SYMBOL(clear_user); + unsigned long + __clear_user(void __user *to, unsigned long n) + { +- __do_clear_user(to, n); +- return n; ++ return __do_clear_user(to, n); + } + EXPORT_SYMBOL(__clear_user); + +@@ -200,14 +210,17 @@ long strnlen_user(const char __user *s, + might_fault(); + + __asm__ __volatile__( ++ " movw %w8,%%es\n" + " testl %0, %0\n" + " jz 3f\n" +- " andl %0,%%ecx\n" ++ " movl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" +@@ -219,7 +232,7 @@ long strnlen_user(const char __user *s, + " .long 0b,2b\n" + ".previous" + :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) +- :"0" (n), "1" (s), "2" (0), "3" (mask) ++ :"0" (n), "1" (s), "2" (0), "3" (mask), "r" (__USER_DS) + :"cc"); + return res & mask; + } +@@ -227,10 +240,11 @@ EXPORT_SYMBOL(strnlen_user); + + #ifdef CONFIG_X86_INTEL_USERCOPY + static unsigned long +-__copy_user_intel(void __user *to, const void *from, unsigned long size) ++__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size) + { + int d0, d1; + __asm__ __volatile__( ++ " movw %w6, %%es\n" + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -239,36 +253,36 @@ __copy_user_intel(void __user *to, const + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" +- "5: movl %%eax, 0(%3)\n" +- "6: movl %%edx, 4(%3)\n" ++ "5: movl %%eax, %%es:0(%3)\n" ++ "6: movl %%edx, %%es:4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" +- "9: movl %%eax, 8(%3)\n" +- "10: movl %%edx, 12(%3)\n" ++ "9: movl %%eax, %%es:8(%3)\n" ++ "10: movl %%edx, %%es:12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" +- "13: movl %%eax, 16(%3)\n" +- "14: movl %%edx, 20(%3)\n" ++ "13: movl %%eax, %%es:16(%3)\n" ++ "14: movl %%edx, %%es:20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" +- "17: movl %%eax, 24(%3)\n" +- "18: movl %%edx, 28(%3)\n" ++ "17: movl %%eax, %%es:24(%3)\n" ++ "18: movl %%edx, %%es:28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" +- "21: movl %%eax, 32(%3)\n" +- "22: movl %%edx, 36(%3)\n" ++ "21: movl %%eax, %%es:32(%3)\n" ++ "22: movl %%edx, %%es:36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" +- "25: movl %%eax, 40(%3)\n" +- "26: movl %%edx, 44(%3)\n" ++ "25: movl %%eax, %%es:40(%3)\n" ++ "26: movl %%edx, %%es:44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" +- "29: movl %%eax, 48(%3)\n" +- "30: movl %%edx, 52(%3)\n" ++ "29: movl %%eax, %%es:48(%3)\n" ++ "30: movl %%edx, %%es:52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" +- "33: movl %%eax, 56(%3)\n" +- "34: movl %%edx, 60(%3)\n" ++ "33: movl %%eax, %%es:56(%3)\n" ++ "34: movl %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -282,6 +296,8 @@ __copy_user_intel(void __user *to, const + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" +@@ -328,7 +344,117 @@ __copy_user_intel(void __user *to, const + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) ++ : "eax", "edx", "memory"); ++ return size; ++} ++ ++static unsigned long ++__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size) ++{ ++ int d0, d1; ++ __asm__ __volatile__( ++ " movw %w6, %%ds\n" ++ " .align 2,0x90\n" ++ "1: movl 32(%4), %%eax\n" ++ " cmpl $67, %0\n" ++ " jbe 3f\n" ++ "2: movl 64(%4), %%eax\n" ++ " .align 2,0x90\n" ++ "3: movl 0(%4), %%eax\n" ++ "4: movl 4(%4), %%edx\n" ++ "5: movl %%eax, %%es:0(%3)\n" ++ "6: movl %%edx, %%es:4(%3)\n" ++ "7: movl 8(%4), %%eax\n" ++ "8: movl 12(%4),%%edx\n" ++ "9: movl %%eax, %%es:8(%3)\n" ++ "10: movl %%edx, %%es:12(%3)\n" ++ "11: movl 16(%4), %%eax\n" ++ "12: movl 20(%4), %%edx\n" ++ "13: movl %%eax, %%es:16(%3)\n" ++ "14: movl %%edx, %%es:20(%3)\n" ++ "15: movl 24(%4), %%eax\n" ++ "16: movl 28(%4), %%edx\n" ++ "17: movl %%eax, %%es:24(%3)\n" ++ "18: movl %%edx, %%es:28(%3)\n" ++ "19: movl 32(%4), %%eax\n" ++ "20: movl 36(%4), %%edx\n" ++ "21: movl %%eax, %%es:32(%3)\n" ++ "22: movl %%edx, %%es:36(%3)\n" ++ "23: movl 40(%4), %%eax\n" ++ "24: movl 44(%4), %%edx\n" ++ "25: movl %%eax, %%es:40(%3)\n" ++ "26: movl %%edx, %%es:44(%3)\n" ++ "27: movl 48(%4), %%eax\n" ++ "28: movl 52(%4), %%edx\n" ++ "29: movl %%eax, %%es:48(%3)\n" ++ "30: movl %%edx, %%es:52(%3)\n" ++ "31: movl 56(%4), %%eax\n" ++ "32: movl 60(%4), %%edx\n" ++ "33: movl %%eax, %%es:56(%3)\n" ++ "34: movl %%edx, %%es:60(%3)\n" ++ " addl $-64, %0\n" ++ " addl $64, %4\n" ++ " addl $64, %3\n" ++ " cmpl $63, %0\n" ++ " ja 1b\n" ++ "35: movl %0, %%eax\n" ++ " shrl $2, %0\n" ++ " andl $3, %%eax\n" ++ " cld\n" ++ "99: rep; movsl\n" ++ "36: movl %%eax, %0\n" ++ "37: rep; movsb\n" ++ "100:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "101: lea 0(%%eax,%0,4),%0\n" ++ " jmp 100b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 1b,100b\n" ++ " .long 2b,100b\n" ++ " .long 3b,100b\n" ++ " .long 4b,100b\n" ++ " .long 5b,100b\n" ++ " .long 6b,100b\n" ++ " .long 7b,100b\n" ++ " .long 8b,100b\n" ++ " .long 9b,100b\n" ++ " .long 10b,100b\n" ++ " .long 11b,100b\n" ++ " .long 12b,100b\n" ++ " .long 13b,100b\n" ++ " .long 14b,100b\n" ++ " .long 15b,100b\n" ++ " .long 16b,100b\n" ++ " .long 17b,100b\n" ++ " .long 18b,100b\n" ++ " .long 19b,100b\n" ++ " .long 20b,100b\n" ++ " .long 21b,100b\n" ++ " .long 22b,100b\n" ++ " .long 23b,100b\n" ++ " .long 24b,100b\n" ++ " .long 25b,100b\n" ++ " .long 26b,100b\n" ++ " .long 27b,100b\n" ++ " .long 28b,100b\n" ++ " .long 29b,100b\n" ++ " .long 30b,100b\n" ++ " .long 31b,100b\n" ++ " .long 32b,100b\n" ++ " .long 33b,100b\n" ++ " .long 34b,100b\n" ++ " .long 35b,100b\n" ++ " .long 36b,100b\n" ++ " .long 37b,100b\n" ++ " .long 99b,101b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (d0), "=&S" (d1) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -338,6 +464,7 @@ __copy_user_zeroing_intel(void *to, cons + { + int d0, d1; + __asm__ __volatile__( ++ " movw %w6, %%ds\n" + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -346,36 +473,36 @@ __copy_user_zeroing_intel(void *to, cons + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" +- " movl %%eax, 0(%3)\n" +- " movl %%edx, 4(%3)\n" ++ " movl %%eax, %%es:0(%3)\n" ++ " movl %%edx, %%es:4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" +- " movl %%eax, 8(%3)\n" +- " movl %%edx, 12(%3)\n" ++ " movl %%eax, %%es:8(%3)\n" ++ " movl %%edx, %%es:12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" +- " movl %%eax, 16(%3)\n" +- " movl %%edx, 20(%3)\n" ++ " movl %%eax, %%es:16(%3)\n" ++ " movl %%edx, %%es:20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" +- " movl %%eax, 24(%3)\n" +- " movl %%edx, 28(%3)\n" ++ " movl %%eax, %%es:24(%3)\n" ++ " movl %%edx, %%es:28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" +- " movl %%eax, 32(%3)\n" +- " movl %%edx, 36(%3)\n" ++ " movl %%eax, %%es:32(%3)\n" ++ " movl %%edx, %%es:36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" +- " movl %%eax, 40(%3)\n" +- " movl %%edx, 44(%3)\n" ++ " movl %%eax, %%es:40(%3)\n" ++ " movl %%edx, %%es:44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" +- " movl %%eax, 48(%3)\n" +- " movl %%edx, 52(%3)\n" ++ " movl %%eax, %%es:48(%3)\n" ++ " movl %%edx, %%es:52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" +- " movl %%eax, 56(%3)\n" +- " movl %%edx, 60(%3)\n" ++ " movl %%eax, %%es:56(%3)\n" ++ " movl %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -389,6 +516,8 @@ __copy_user_zeroing_intel(void *to, cons + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" +@@ -423,7 +552,7 @@ __copy_user_zeroing_intel(void *to, cons + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -439,6 +568,7 @@ static unsigned long __copy_user_zeroing + int d0, d1; + + __asm__ __volatile__( ++ " movw %w6, %%ds\n" + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -447,36 +577,36 @@ static unsigned long __copy_user_zeroing + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" +- " movnti %%eax, 0(%3)\n" +- " movnti %%edx, 4(%3)\n" ++ " movnti %%eax, %%es:0(%3)\n" ++ " movnti %%edx, %%es:4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" +- " movnti %%eax, 8(%3)\n" +- " movnti %%edx, 12(%3)\n" ++ " movnti %%eax, %%es:8(%3)\n" ++ " movnti %%edx, %%es:12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" +- " movnti %%eax, 16(%3)\n" +- " movnti %%edx, 20(%3)\n" ++ " movnti %%eax, %%es:16(%3)\n" ++ " movnti %%edx, %%es:20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" +- " movnti %%eax, 24(%3)\n" +- " movnti %%edx, 28(%3)\n" ++ " movnti %%eax, %%es:24(%3)\n" ++ " movnti %%edx, %%es:28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" +- " movnti %%eax, 32(%3)\n" +- " movnti %%edx, 36(%3)\n" ++ " movnti %%eax, %%es:32(%3)\n" ++ " movnti %%edx, %%es:36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" +- " movnti %%eax, 40(%3)\n" +- " movnti %%edx, 44(%3)\n" ++ " movnti %%eax, %%es:40(%3)\n" ++ " movnti %%edx, %%es:44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" +- " movnti %%eax, 48(%3)\n" +- " movnti %%edx, 52(%3)\n" ++ " movnti %%eax, %%es:48(%3)\n" ++ " movnti %%edx, %%es:52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" +- " movnti %%eax, 56(%3)\n" +- " movnti %%edx, 60(%3)\n" ++ " movnti %%eax, %%es:56(%3)\n" ++ " movnti %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -491,6 +621,8 @@ static unsigned long __copy_user_zeroing + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" +@@ -525,7 +657,7 @@ static unsigned long __copy_user_zeroing + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -536,6 +668,7 @@ static unsigned long __copy_user_intel_n + int d0, d1; + + __asm__ __volatile__( ++ " movw %w6, %%ds\n" + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -544,36 +677,36 @@ static unsigned long __copy_user_intel_n + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" +- " movnti %%eax, 0(%3)\n" +- " movnti %%edx, 4(%3)\n" ++ " movnti %%eax, %%es:0(%3)\n" ++ " movnti %%edx, %%es:4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" +- " movnti %%eax, 8(%3)\n" +- " movnti %%edx, 12(%3)\n" ++ " movnti %%eax, %%es:8(%3)\n" ++ " movnti %%edx, %%es:12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" +- " movnti %%eax, 16(%3)\n" +- " movnti %%edx, 20(%3)\n" ++ " movnti %%eax, %%es:16(%3)\n" ++ " movnti %%edx, %%es:20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" +- " movnti %%eax, 24(%3)\n" +- " movnti %%edx, 28(%3)\n" ++ " movnti %%eax, %%es:24(%3)\n" ++ " movnti %%edx, %%es:28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" +- " movnti %%eax, 32(%3)\n" +- " movnti %%edx, 36(%3)\n" ++ " movnti %%eax, %%es:32(%3)\n" ++ " movnti %%edx, %%es:36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" +- " movnti %%eax, 40(%3)\n" +- " movnti %%edx, 44(%3)\n" ++ " movnti %%eax, %%es:40(%3)\n" ++ " movnti %%edx, %%es:44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" +- " movnti %%eax, 48(%3)\n" +- " movnti %%edx, 52(%3)\n" ++ " movnti %%eax, %%es:48(%3)\n" ++ " movnti %%edx, %%es:52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" +- " movnti %%eax, 56(%3)\n" +- " movnti %%edx, 60(%3)\n" ++ " movnti %%eax, %%es:56(%3)\n" ++ " movnti %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -588,6 +721,8 @@ static unsigned long __copy_user_intel_n + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: jmp 8b\n" +@@ -616,7 +751,7 @@ static unsigned long __copy_user_intel_n + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -629,90 +764,146 @@ static unsigned long __copy_user_intel_n + */ + unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, + unsigned long size); +-unsigned long __copy_user_intel(void __user *to, const void *from, ++unsigned long __generic_copy_to_user_intel(void __user *to, const void *from, ++ unsigned long size); ++unsigned long __generic_copy_from_user_intel(void *to, const void __user *from, + unsigned long size); + unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size); + #endif /* CONFIG_X86_INTEL_USERCOPY */ + + /* Generic arbitrary sized copy. */ +-#define __copy_user(to, from, size) \ +-do { \ +- int __d0, __d1, __d2; \ +- __asm__ __volatile__( \ +- " cmp $7,%0\n" \ +- " jbe 1f\n" \ +- " movl %1,%0\n" \ +- " negl %0\n" \ +- " andl $7,%0\n" \ +- " subl %0,%3\n" \ +- "4: rep; movsb\n" \ +- " movl %3,%0\n" \ +- " shrl $2,%0\n" \ +- " andl $3,%3\n" \ +- " .align 2,0x90\n" \ +- "0: rep; movsl\n" \ +- " movl %3,%0\n" \ +- "1: rep; movsb\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "5: addl %3,%0\n" \ +- " jmp 2b\n" \ +- "3: lea 0(%3,%0,4),%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- ".section __ex_table,\"a\"\n" \ +- " .align 4\n" \ +- " .long 4b,5b\n" \ +- " .long 0b,3b\n" \ +- " .long 1b,2b\n" \ +- ".previous" \ +- : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ +- : "3"(size), "0"(size), "1"(to), "2"(from) \ +- : "memory"); \ +-} while (0) +- +-#define __copy_user_zeroing(to, from, size) \ +-do { \ +- int __d0, __d1, __d2; \ +- __asm__ __volatile__( \ +- " cmp $7,%0\n" \ +- " jbe 1f\n" \ +- " movl %1,%0\n" \ +- " negl %0\n" \ +- " andl $7,%0\n" \ +- " subl %0,%3\n" \ +- "4: rep; movsb\n" \ +- " movl %3,%0\n" \ +- " shrl $2,%0\n" \ +- " andl $3,%3\n" \ +- " .align 2,0x90\n" \ +- "0: rep; movsl\n" \ +- " movl %3,%0\n" \ +- "1: rep; movsb\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "5: addl %3,%0\n" \ +- " jmp 6f\n" \ +- "3: lea 0(%3,%0,4),%0\n" \ +- "6: pushl %0\n" \ +- " pushl %%eax\n" \ +- " xorl %%eax,%%eax\n" \ +- " rep; stosb\n" \ +- " popl %%eax\n" \ +- " popl %0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- ".section __ex_table,\"a\"\n" \ +- " .align 4\n" \ +- " .long 4b,5b\n" \ +- " .long 0b,3b\n" \ +- " .long 1b,6b\n" \ +- ".previous" \ +- : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ +- : "3"(size), "0"(size), "1"(to), "2"(from) \ +- : "memory"); \ +-} while (0) ++static unsigned long ++__generic_copy_to_user(void __user *to, const void *from, unsigned long size) ++{ ++ int __d0, __d1, __d2; ++ ++ __asm__ __volatile__( ++ " movw %w8,%%es\n" ++ " cmp $7,%0\n" ++ " jbe 1f\n" ++ " movl %1,%0\n" ++ " negl %0\n" ++ " andl $7,%0\n" ++ " subl %0,%3\n" ++ "4: rep; movsb\n" ++ " movl %3,%0\n" ++ " shrl $2,%0\n" ++ " andl $3,%3\n" ++ " .align 2,0x90\n" ++ "0: rep; movsl\n" ++ " movl %3,%0\n" ++ "1: rep; movsb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" ++ ".section .fixup,\"ax\"\n" ++ "5: addl %3,%0\n" ++ " jmp 2b\n" ++ "3: lea 0(%3,%0,4),%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 4b,5b\n" ++ " .long 0b,3b\n" ++ " .long 1b,2b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) ++ : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) ++ : "memory"); ++ return size; ++} ++ ++static unsigned long ++__generic_copy_from_user(void *to, const void __user *from, unsigned long size) ++{ ++ int __d0, __d1, __d2; ++ ++ __asm__ __volatile__( ++ " movw %w8,%%ds\n" ++ " cmp $7,%0\n" ++ " jbe 1f\n" ++ " movl %1,%0\n" ++ " negl %0\n" ++ " andl $7,%0\n" ++ " subl %0,%3\n" ++ "4: rep; movsb\n" ++ " movl %3,%0\n" ++ " shrl $2,%0\n" ++ " andl $3,%3\n" ++ " .align 2,0x90\n" ++ "0: rep; movsl\n" ++ " movl %3,%0\n" ++ "1: rep; movsb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "5: addl %3,%0\n" ++ " jmp 2b\n" ++ "3: lea 0(%3,%0,4),%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 4b,5b\n" ++ " .long 0b,3b\n" ++ " .long 1b,2b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) ++ : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) ++ : "memory"); ++ return size; ++} ++ ++static unsigned long ++__copy_user_zeroing(void *to, const void __user *from, unsigned long size) ++{ ++ int __d0, __d1, __d2; ++ ++ __asm__ __volatile__( ++ " movw %w8,%%ds\n" ++ " cmp $7,%0\n" ++ " jbe 1f\n" ++ " movl %1,%0\n" ++ " negl %0\n" ++ " andl $7,%0\n" ++ " subl %0,%3\n" ++ "4: rep; movsb\n" ++ " movl %3,%0\n" ++ " shrl $2,%0\n" ++ " andl $3,%3\n" ++ " .align 2,0x90\n" ++ "0: rep; movsl\n" ++ " movl %3,%0\n" ++ "1: rep; movsb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "5: addl %3,%0\n" ++ " jmp 6f\n" ++ "3: lea 0(%3,%0,4),%0\n" ++ "6: pushl %0\n" ++ " pushl %%eax\n" ++ " xorl %%eax,%%eax\n" ++ " rep; stosb\n" ++ " popl %%eax\n" ++ " popl %0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 4b,5b\n" ++ " .long 0b,3b\n" ++ " .long 1b,6b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) ++ : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) ++ : "memory"); ++ return size; ++} + + unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) +@@ -775,9 +966,9 @@ survive: + } + #endif + if (movsl_is_ok(to, from, n)) +- __copy_user(to, from, n); ++ n = __generic_copy_to_user(to, from, n); + else +- n = __copy_user_intel(to, from, n); ++ n = __generic_copy_to_user_intel(to, from, n); + return n; + } + EXPORT_SYMBOL(__copy_to_user_ll); +@@ -786,7 +977,7 @@ unsigned long __copy_from_user_ll(void * + unsigned long n) + { + if (movsl_is_ok(to, from, n)) +- __copy_user_zeroing(to, from, n); ++ n = __copy_user_zeroing(to, from, n); + else + n = __copy_user_zeroing_intel(to, from, n); + return n; +@@ -797,10 +988,9 @@ unsigned long __copy_from_user_ll_nozero + unsigned long n) + { + if (movsl_is_ok(to, from, n)) +- __copy_user(to, from, n); ++ n = __generic_copy_from_user(to, from, n); + else +- n = __copy_user_intel((void __user *)to, +- (const void *)from, n); ++ n = __generic_copy_from_user_intel(to, from, n); + return n; + } + EXPORT_SYMBOL(__copy_from_user_ll_nozero); +@@ -812,9 +1002,9 @@ unsigned long __copy_from_user_ll_nocach + if (n > 64 && cpu_has_xmm2) + n = __copy_user_zeroing_intel_nocache(to, from, n); + else +- __copy_user_zeroing(to, from, n); ++ n = __copy_user_zeroing(to, from, n); + #else +- __copy_user_zeroing(to, from, n); ++ n = __copy_user_zeroing(to, from, n); + #endif + return n; + } +@@ -827,9 +1017,9 @@ unsigned long __copy_from_user_ll_nocach + if (n > 64 && cpu_has_xmm2) + n = __copy_user_intel_nocache(to, from, n); + else +- __copy_user(to, from, n); ++ n = __generic_copy_from_user(to, from, n); + #else +- __copy_user(to, from, n); ++ n = __generic_copy_from_user(to, from, n); + #endif + return n; + } +@@ -878,8 +1068,35 @@ copy_from_user(void *to, const void __us + { + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); +- else ++ else if ((long)n > 0) + memset(to, 0, n); + return n; + } + EXPORT_SYMBOL(copy_from_user); ++ ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++void __set_fs(mm_segment_t x, int cpu) ++{ ++ unsigned long limit = x.seg; ++ struct desc_struct d; ++ ++ current_thread_info()->addr_limit = x; ++ if (likely(limit)) ++ limit = (limit - 1UL) >> PAGE_SHIFT; ++ pack_descriptor(&d, 0UL, limit, 0xF3, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_DS, &d, DESCTYPE_S); ++} ++ ++void set_fs(mm_segment_t x) ++{ ++ __set_fs(x, get_cpu()); ++ put_cpu_no_resched(); ++} ++#else ++void set_fs(mm_segment_t x) ++{ ++ current_thread_info()->addr_limit = x; ++} ++#endif ++ ++EXPORT_SYMBOL(set_fs); +diff -urNp linux-2.6.29/arch/x86/mach-voyager/voyager_basic.c linux-2.6.29/arch/x86/mach-voyager/voyager_basic.c +--- linux-2.6.29/arch/x86/mach-voyager/voyager_basic.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mach-voyager/voyager_basic.c 2009-03-28 14:26:19.000000000 -0400 +@@ -123,7 +123,7 @@ int __init voyager_memory_detect(int reg + __u8 cmos[4]; + ClickMap_t *map; + unsigned long map_addr; +- unsigned long old; ++ pte_t old; + + if (region >= CLICK_ENTRIES) { + printk("Voyager: Illegal ClickMap region %d\n", region); +@@ -138,7 +138,7 @@ int __init voyager_memory_detect(int reg + + /* steal page 0 for this */ + old = pg0[0]; +- pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); ++ pg0[0] = __pte((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); + local_flush_tlb(); + /* now clear everything out but page 0 */ + map = (ClickMap_t *) (map_addr & (~PAGE_MASK)); +diff -urNp linux-2.6.29/arch/x86/mach-voyager/voyager_smp.c linux-2.6.29/arch/x86/mach-voyager/voyager_smp.c +--- linux-2.6.29/arch/x86/mach-voyager/voyager_smp.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mach-voyager/voyager_smp.c 2009-03-28 14:26:19.000000000 -0400 +@@ -511,6 +511,10 @@ static void __init do_boot_cpu(__u8 cpu) + __u32 *hijack_vector; + __u32 start_phys_address = setup_trampoline(); + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* There's a clever trick to this: The linux trampoline is + * compiled to begin at absolute location zero, so make the + * address zero but have the data segment selector compensate +@@ -530,7 +534,17 @@ static void __init do_boot_cpu(__u8 cpu) + + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; +- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ early_gdt_descr.address = get_cpu_gdt_table(cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + irq_ctx_init(cpu); + + /* Note: Don't modify initial ss override */ +@@ -1144,7 +1158,7 @@ void smp_local_timer_interrupt(void) + per_cpu(prof_counter, cpu); + } + +- update_process_times(user_mode_vm(get_irq_regs())); ++ update_process_times(user_mode(get_irq_regs())); + } + + if (((1 << cpu) & voyager_extended_vic_processors) == 0) +diff -urNp linux-2.6.29/arch/x86/Makefile linux-2.6.29/arch/x86/Makefile +--- linux-2.6.29/arch/x86/Makefile 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/Makefile 2009-03-28 14:26:19.000000000 -0400 +@@ -232,3 +232,12 @@ endef + CLEAN_FILES += arch/x86/boot/fdimage \ + arch/x86/boot/image.iso \ + arch/x86/boot/mtools.conf ++ ++define OLD_LD ++ ++*** ${VERSION}.${PATCHLEVEL} PaX kernels no longer build correctly with old versions of binutils. ++*** Please upgrade your binutils to 2.18 or newer ++endef ++ ++archprepare: ++ $(if $(LDFLAGS_BUILD_ID),,$(error $(OLD_LD))) +diff -urNp linux-2.6.29/arch/x86/mm/extable.c linux-2.6.29/arch/x86/mm/extable.c +--- linux-2.6.29/arch/x86/mm/extable.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/extable.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1,14 +1,62 @@ + #include + #include ++#include + #include + ++/* ++ * The exception table needs to be sorted so that the binary ++ * search that we use to find entries in it works properly. ++ * This is used both for the kernel exception table and for ++ * the exception tables of modules that get loaded. ++ */ ++static int cmp_ex(const void *a, const void *b) ++{ ++ const struct exception_table_entry *x = a, *y = b; ++ ++ /* avoid overflow */ ++ if (x->insn > y->insn) ++ return 1; ++ if (x->insn < y->insn) ++ return -1; ++ return 0; ++} ++ ++static void swap_ex(void *a, void *b, int size) ++{ ++ struct exception_table_entry t, *x = a, *y = b; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ ++ t = *x; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ *x = *y; ++ *y = t; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++} ++ ++void sort_extable(struct exception_table_entry *start, ++ struct exception_table_entry *finish) ++{ ++ sort(start, finish - start, sizeof(struct exception_table_entry), ++ cmp_ex, swap_ex); ++} + + int fixup_exception(struct pt_regs *regs) + { + const struct exception_table_entry *fixup; + + #ifdef CONFIG_PNPBIOS +- if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { ++ if (unlikely(!v8086_mode(regs) && SEGMENT_IS_PNP_CODE(regs->cs))) { + extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; + extern u32 pnp_bios_is_utter_crap; + pnp_bios_is_utter_crap = 1; +diff -urNp linux-2.6.29/arch/x86/mm/fault.c linux-2.6.29/arch/x86/mm/fault.c +--- linux-2.6.29/arch/x86/mm/fault.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/fault.c 2009-03-28 14:26:19.000000000 -0400 +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -67,7 +69,7 @@ static inline int notify_page_fault(stru + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; +@@ -265,6 +267,30 @@ bad: + #endif + } + ++#ifdef CONFIG_PAX_EMUTRAMP ++static int pax_handle_fetch_fault(struct pt_regs *regs); ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) ++{ ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++ pgd = pgd_offset(mm, address); ++ if (!pgd_present(*pgd)) ++ return NULL; ++ pud = pud_offset(pgd, address); ++ if (!pud_present(*pud)) ++ return NULL; ++ pmd = pmd_offset(pud, address); ++ if (!pmd_present(*pmd)) ++ return NULL; ++ return pmd; ++} ++#endif ++ + #ifdef CONFIG_X86_32 + static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) + { +@@ -351,7 +377,7 @@ static int is_errata93(struct pt_regs *r + static int is_errata100(struct pt_regs *regs, unsigned long address) + { + #ifdef CONFIG_X86_64 +- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && ++ if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) && + (address >> 32)) + return 1; + #endif +@@ -386,14 +412,31 @@ static void show_fault_oops(struct pt_re + #endif + + #ifdef CONFIG_X86_PAE +- if (error_code & PF_INSTR) { ++ if (nx_enabled && (error_code & PF_INSTR)) { + unsigned int level; + pte_t *pte = lookup_address(address, &level); + + if (pte && pte_present(*pte) && !pte_exec(*pte)) + printk(KERN_CRIT "kernel tried to execute " + "NX-protected page - exploit attempt? " +- "(uid: %d)\n", current_uid()); ++ "(uid: %d, task: %s, pid: %d)\n", ++ current_uid(), current->comm, task_pid_nr(current)); ++ } ++#endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++#ifdef CONFIG_MODULES ++ if (init_mm.start_code <= address && address < (unsigned long)MODULES_END) ++#else ++ if (init_mm.start_code <= address && address < init_mm.end_code) ++#endif ++ { ++ if (current->signal->curr_ip) ++ printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", ++ NIPQUAD(current->signal->curr_ip), current->comm, task_pid_nr(current), current_uid(), current_euid()); ++ else ++ printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", ++ current->comm, task_pid_nr(current), current_uid(), current_euid()); + } + #endif + +@@ -586,7 +629,6 @@ void __kprobes do_page_fault(struct pt_r + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; +- unsigned long address; + int write, si_code; + int fault; + #ifdef CONFIG_X86_64 +@@ -594,13 +636,20 @@ void __kprobes do_page_fault(struct pt_r + int sig; + #endif + ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) ++ pte_t *pte; ++ pmd_t *pmd; ++ spinlock_t *ptl; ++ unsigned char pte_mask; ++#endif ++ ++ /* get the address */ ++ const unsigned long address = read_cr2(); ++ + tsk = current; + mm = tsk->mm; + prefetchw(&mm->mmap_sem); + +- /* get the address */ +- address = read_cr2(); +- + si_code = SEGV_MAPERR; + + if (unlikely(kmmio_fault(regs, address))) +@@ -653,7 +702,7 @@ void __kprobes do_page_fault(struct pt_r + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. + */ +- if (user_mode_vm(regs)) { ++ if (user_mode(regs)) { + local_irq_enable(); + error_code |= PF_USER; + } else if (regs->flags & X86_EFLAGS_IF) +@@ -669,7 +718,7 @@ void __kprobes do_page_fault(struct pt_r + * atomic region then we must not take the fault. + */ + if (unlikely(in_atomic() || !mm)) +- goto bad_area_nosemaphore; ++ goto bad_area_nopax; + + /* + * When running in the kernel we expect faults to occur only to +@@ -690,10 +739,104 @@ void __kprobes do_page_fault(struct pt_r + if (!down_read_trylock(&mm->mmap_sem)) { + if ((error_code & PF_USER) == 0 && + !search_exception_tables(regs->ip)) +- goto bad_area_nosemaphore; ++ goto bad_area_nopax; + down_read(&mm->mmap_sem); + } + ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) ++ if (nx_enabled || (error_code & (PF_PROT|PF_USER)) != (PF_PROT|PF_USER) || v8086_mode(regs) || ++ !(mm->pax_flags & MF_PAX_PAGEEXEC)) ++ goto not_pax_fault; ++ ++ /* PaX: it's our fault, let's handle it if we can */ ++ ++ /* PaX: take a look at read faults before acquiring any locks */ ++ if (unlikely(!(error_code & PF_WRITE) && (regs->ip == address))) { ++ /* instruction fetch attempt from a protected page in user mode */ ++ up_read(&mm->mmap_sem); ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ switch (pax_handle_fetch_fault(regs)) { ++ case 2: ++ return; ++ } ++#endif ++ ++ pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++ ++ pmd = pax_get_pmd(mm, address); ++ if (unlikely(!pmd)) ++ goto not_pax_fault; ++ ++ pte = pte_offset_map_lock(mm, pmd, address, &ptl); ++ if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { ++ pte_unmap_unlock(pte, ptl); ++ goto not_pax_fault; ++ } ++ ++ if (unlikely((error_code & PF_WRITE) && !pte_write(*pte))) { ++ /* write attempt to a protected page in user mode */ ++ pte_unmap_unlock(pte, ptl); ++ goto not_pax_fault; ++ } ++ ++#ifdef CONFIG_SMP ++ if (likely(address > get_limit(regs->cs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) ++#else ++ if (likely(address > get_limit(regs->cs))) ++#endif ++ { ++ set_pte(pte, pte_mkread(*pte)); ++ __flush_tlb_one(address); ++ pte_unmap_unlock(pte, ptl); ++ up_read(&mm->mmap_sem); ++ return; ++ } ++ ++ pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & PF_WRITE) << (_PAGE_BIT_DIRTY-1)); ++ ++ /* ++ * PaX: fill DTLB with user rights and retry ++ */ ++ __asm__ __volatile__ ( ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ "movw %w4,%%es\n" ++#endif ++ "orb %2,(%1)\n" ++#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) ++/* ++ * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's ++ * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* ++ * page fault when examined during a TLB load attempt. this is true not only ++ * for PTEs holding a non-present entry but also present entries that will ++ * raise a page fault (such as those set up by PaX, or the copy-on-write ++ * mechanism). in effect it means that we do *not* need to flush the TLBs ++ * for our target pages since their PTEs are simply not in the TLBs at all. ++ ++ * the best thing in omitting it is that we gain around 15-20% speed in the ++ * fast path of the page fault handler and can get rid of tracing since we ++ * can no longer flush unintended entries. ++ */ ++ "invlpg (%0)\n" ++#endif ++ "testb $0,%%es:(%0)\n" ++ "xorb %3,(%1)\n" ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ "pushl %%ss\n" ++ "popl %%es\n" ++#endif ++ : ++ : "r" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER), "r" (__USER_DS) ++ : "memory", "cc"); ++ pte_unmap_unlock(pte, ptl); ++ up_read(&mm->mmap_sem); ++ return; ++ ++not_pax_fault: ++#endif ++ + vma = find_vma(mm, address); + if (!vma) + goto bad_area; +@@ -701,16 +844,20 @@ void __kprobes do_page_fault(struct pt_r + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (error_code & PF_USER) { +- /* +- * Accessing the stack below %sp is always a bug. +- * The large cushion allows instructions like enter +- * and pusha to work. ("enter $65535,$31" pushes +- * 32 pointers and then decrements %sp by 65535.) +- */ +- if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) +- goto bad_area; +- } ++ /* ++ * Accessing the stack below %sp is always a bug. ++ * The large cushion allows instructions like enter ++ * and pusha to work. ("enter $65535,$31" pushes ++ * 32 pointers and then decrements %sp by 65535.) ++ */ ++ if (address + 65536 + 32 * sizeof(unsigned long) < task_pt_regs(tsk)->sp) ++ goto bad_area; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_end - SEGMEXEC_TASK_SIZE - 1 < address - SEGMEXEC_TASK_SIZE - 1) ++ goto bad_area; ++#endif ++ + if (expand_stack(vma, address)) + goto bad_area; + /* +@@ -720,6 +867,8 @@ void __kprobes do_page_fault(struct pt_r + good_area: + si_code = SEGV_ACCERR; + write = 0; ++ if (nx_enabled && (error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC)) ++ goto bad_area; + switch (error_code & (PF_PROT|PF_WRITE)) { + default: /* 3: write, present */ + /* fall through */ +@@ -774,6 +923,54 @@ bad_area: + up_read(&mm->mmap_sem); + + bad_area_nosemaphore: ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (mm && (error_code & PF_USER)) { ++ unsigned long ip = regs->ip; ++ ++ if (v8086_mode(regs)) ++ ip = ((regs->cs & 0xffff) << 4) + (regs->ip & 0xffff); ++ ++ /* ++ * It's possible to have interrupts off here. ++ */ ++ local_irq_enable(); ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && ++ ((nx_enabled && (error_code & PF_INSTR)) || (!(error_code & (PF_PROT | PF_WRITE)) && regs->ip == address))) { ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ switch (pax_handle_fetch_fault(regs)) { ++ case 2: ++ return; ++ } ++#endif ++ ++ pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && !(error_code & (PF_PROT | PF_WRITE)) && (regs->ip + SEGMEXEC_TASK_SIZE == address)) { ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ switch (pax_handle_fetch_fault(regs)) { ++ case 2: ++ return; ++ } ++#endif ++ ++ pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ ++ } ++#endif ++ ++bad_area_nopax: + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { + /* +@@ -852,7 +1049,7 @@ no_context: + #ifdef CONFIG_X86_32 + die("Oops", regs, error_code); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ do_group_exit(SIGKILL); + #else + sig = SIGKILL; + if (__die("Oops", regs, error_code)) +@@ -935,3 +1132,174 @@ void vmalloc_sync_all(void) + } + #endif + } ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++static int pax_handle_fetch_fault_32(struct pt_regs *regs) ++{ ++ int err; ++ ++ do { /* PaX: gcc trampoline emulation #1 */ ++ unsigned char mov1, mov2; ++ unsigned short jmp; ++ unsigned int addr1, addr2; ++ ++#ifdef CONFIG_X86_64 ++ if ((regs->ip + 11) >> 32) ++ break; ++#endif ++ ++ err = get_user(mov1, (unsigned char __user *)regs->ip); ++ err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); ++ err |= get_user(mov2, (unsigned char __user *)(regs->ip + 5)); ++ err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); ++ err |= get_user(jmp, (unsigned short __user *)(regs->ip + 10)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0xB9 && mov2 == 0xB8 && jmp == 0xE0FF) { ++ regs->cx = addr1; ++ regs->ax = addr2; ++ regs->ip = addr2; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: gcc trampoline emulation #2 */ ++ unsigned char mov, jmp; ++ unsigned int addr1, addr2; ++ ++#ifdef CONFIG_X86_64 ++ if ((regs->ip + 9) >> 32) ++ break; ++#endif ++ ++ err = get_user(mov, (unsigned char __user *)regs->ip); ++ err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); ++ err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5)); ++ err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); ++ ++ if (err) ++ break; ++ ++ if (mov == 0xB9 && jmp == 0xE9) { ++ regs->cx = addr1; ++ regs->ip = (unsigned int)(regs->ip + addr2 + 10); ++ return 2; ++ } ++ } while (0); ++ ++ return 1; /* PaX in action */ ++} ++ ++#ifdef CONFIG_X86_64 ++static int pax_handle_fetch_fault_64(struct pt_regs *regs) ++{ ++ int err; ++ ++ do { /* PaX: gcc trampoline emulation #1 */ ++ unsigned short mov1, mov2, jmp1; ++ unsigned char jmp2; ++ unsigned int addr1; ++ unsigned long addr2; ++ ++ err = get_user(mov1, (unsigned short __user *)regs->ip); ++ err |= get_user(addr1, (unsigned int __user *)(regs->ip + 2)); ++ err |= get_user(mov2, (unsigned short __user *)(regs->ip + 6)); ++ err |= get_user(addr2, (unsigned long __user *)(regs->ip + 8)); ++ err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 16)); ++ err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 18)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0xBB41 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { ++ regs->r11 = addr1; ++ regs->r10 = addr2; ++ regs->ip = addr1; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: gcc trampoline emulation #2 */ ++ unsigned short mov1, mov2, jmp1; ++ unsigned char jmp2; ++ unsigned long addr1, addr2; ++ ++ err = get_user(mov1, (unsigned short __user *)regs->ip); ++ err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2)); ++ err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10)); ++ err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12)); ++ err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 20)); ++ err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 22)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0xBB49 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { ++ regs->r11 = addr1; ++ regs->r10 = addr2; ++ regs->ip = addr1; ++ return 2; ++ } ++ } while (0); ++ ++ return 1; /* PaX in action */ ++} ++#endif ++ ++/* ++ * PaX: decide what to do with offenders (regs->ip = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when gcc trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ if (v8086_mode(regs)) ++ return 1; ++ ++ if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) ++ return 1; ++ ++#ifdef CONFIG_X86_32 ++ return pax_handle_fetch_fault_32(regs); ++#else ++ if (regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) ++ return pax_handle_fetch_fault_32(regs); ++ else ++ return pax_handle_fetch_fault_64(regs); ++#endif ++} ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++void pax_report_insns(void *pc, void *sp) ++{ ++ long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 20; i++) { ++ unsigned char c; ++ if (get_user(c, (unsigned char __user *)pc+i)) ++ printk(KERN_CONT "?? "); ++ else ++ printk(KERN_CONT "%02x ", c); ++ } ++ printk("\n"); ++ ++ printk(KERN_ERR "PAX: bytes at SP-%lu: ", (unsigned long)sizeof(long)); ++ for (i = -1; i < 80 / sizeof(long); i++) { ++ unsigned long c; ++ if (get_user(c, (unsigned long __user *)sp+i)) ++#ifdef CONFIG_X86_32 ++ printk(KERN_CONT "???????? "); ++#else ++ printk(KERN_CONT "???????????????? "); ++#endif ++ else ++ printk(KERN_CONT "%0*lx ", 2 * (int)sizeof(long), c); ++ } ++ printk("\n"); ++} ++#endif +diff -urNp linux-2.6.29/arch/x86/mm/highmem_32.c linux-2.6.29/arch/x86/mm/highmem_32.c +--- linux-2.6.29/arch/x86/mm/highmem_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/highmem_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -74,6 +74,10 @@ void *kmap_atomic_prot(struct page *page + enum fixed_addresses idx; + unsigned long vaddr; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); + +@@ -85,7 +89,17 @@ void *kmap_atomic_prot(struct page *page + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte-idx))); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_pte(kmap_pte-idx, mk_pte(page, prot)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +@@ -101,15 +115,29 @@ void kunmap_atomic(void *kvaddr, enum km + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ +- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) ++ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + kpte_clear_flush(kmap_pte-idx, vaddr); +- else { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ } else { + #ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr < PAGE_OFFSET); + BUG_ON(vaddr >= (unsigned long)high_memory); +@@ -128,11 +156,25 @@ void *kmap_atomic_pfn(unsigned long pfn, + enum fixed_addresses idx; + unsigned long vaddr; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; +diff -urNp linux-2.6.29/arch/x86/mm/hugetlbpage.c linux-2.6.29/arch/x86/mm/hugetlbpage.c +--- linux-2.6.29/arch/x86/mm/hugetlbpage.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/hugetlbpage.c 2009-03-28 14:26:19.000000000 -0400 +@@ -263,13 +263,18 @@ static unsigned long hugetlb_get_unmappe + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; +- unsigned long start_addr; ++ unsigned long start_addr, pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif + + if (len > mm->cached_hole_size) { +- start_addr = mm->free_area_cache; ++ start_addr = mm->free_area_cache; + } else { +- start_addr = TASK_UNMAPPED_BASE; +- mm->cached_hole_size = 0; ++ start_addr = mm->mmap_base; ++ mm->cached_hole_size = 0; + } + + full_search: +@@ -277,13 +282,13 @@ full_search: + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ +- if (TASK_SIZE - len < addr) { ++ if (pax_task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ +- if (start_addr != TASK_UNMAPPED_BASE) { +- start_addr = TASK_UNMAPPED_BASE; ++ if (start_addr != mm->mmap_base) { ++ start_addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +@@ -306,9 +311,8 @@ static unsigned long hugetlb_get_unmappe + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma, *prev_vma; +- unsigned long base = mm->mmap_base, addr = addr0; ++ unsigned long base = mm->mmap_base, addr; + unsigned long largest_hole = mm->cached_hole_size; +- int first_time = 1; + + /* don't allow allocations above current base */ + if (mm->free_area_cache > base) +@@ -318,7 +322,7 @@ static unsigned long hugetlb_get_unmappe + largest_hole = 0; + mm->free_area_cache = base; + } +-try_again: ++ + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; +@@ -360,22 +364,26 @@ try_again: + + fail: + /* +- * if hint left us with no space for the requested +- * mapping then try again: +- */ +- if (first_time) { +- mm->free_area_cache = base; +- largest_hole = 0; +- first_time = 0; +- goto try_again; +- } +- /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ +- mm->free_area_cache = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; ++ else ++#endif ++ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = hugetlb_get_unmapped_area_bottomup(file, addr0, + len, pgoff, flags); +@@ -383,6 +391,7 @@ fail: + /* + * Restore the topdown base: + */ ++ mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + +@@ -396,10 +405,17 @@ hugetlb_get_unmapped_area(struct file *f + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; ++ unsigned long pax_task_size = TASK_SIZE; + + if (len & ~huge_page_mask(h)) + return -EINVAL; +- if (len > TASK_SIZE) ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) { +@@ -411,7 +427,7 @@ hugetlb_get_unmapped_area(struct file *f + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); +- if (TASK_SIZE - len >= addr && ++ if (pax_task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } +diff -urNp linux-2.6.29/arch/x86/mm/init_32.c linux-2.6.29/arch/x86/mm/init_32.c +--- linux-2.6.29/arch/x86/mm/init_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/init_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + unsigned int __VMALLOC_RESERVE = 128 << 20; + +@@ -82,36 +83,6 @@ static __init void *alloc_low_page(void) + } + + /* +- * Creates a middle page table and puts a pointer to it in the +- * given global directory entry. This only returns the gd entry +- * in non-PAE compilation mode, since the middle layer is folded. +- */ +-static pmd_t * __init one_md_table_init(pgd_t *pgd) +-{ +- pud_t *pud; +- pmd_t *pmd_table; +- +-#ifdef CONFIG_X86_PAE +- if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { +- if (after_init_bootmem) +- pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); +- else +- pmd_table = (pmd_t *)alloc_low_page(); +- paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); +- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); +- pud = pud_offset(pgd, 0); +- BUG_ON(pmd_table != pmd_offset(pud, 0)); +- +- return pmd_table; +- } +-#endif +- pud = pud_offset(pgd, 0); +- pmd_table = pmd_offset(pud, 0); +- +- return pmd_table; +-} +- +-/* + * Create a page table and place a pointer to it in a middle page + * directory entry: + */ +@@ -131,7 +102,11 @@ static pte_t * __init one_page_table_ini + page_table = (pte_t *)alloc_low_page(); + + paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); ++#else + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); ++#endif + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + } + +@@ -194,6 +169,7 @@ page_table_range_init(unsigned long star + int pgd_idx, pmd_idx; + unsigned long vaddr; + pgd_t *pgd; ++ pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + +@@ -203,8 +179,13 @@ page_table_range_init(unsigned long star + pgd = pgd_base + pgd_idx; + + for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { +- pmd = one_md_table_init(pgd); +- pmd = pmd + pmd_index(vaddr); ++ pud = pud_offset(pgd, vaddr); ++ pmd = pmd_offset(pud, vaddr); ++ ++#ifdef CONFIG_X86_PAE ++ paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT); ++#endif ++ + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); + pmd++, pmd_idx++) { + pte = page_table_kmap_check(one_page_table_init(pmd), +@@ -216,11 +197,23 @@ page_table_range_init(unsigned long star + } + } + +-static inline int is_kernel_text(unsigned long addr) ++static inline int is_kernel_text(unsigned long start, unsigned long end) + { +- if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) +- return 1; +- return 0; ++ unsigned long etext; ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ etext = ktva_ktla((unsigned long)&MODULES_END); ++#else ++ etext = (unsigned long)&_etext; ++#endif ++ ++ if ((start > ktla_ktva(etext) || ++ end <= ktla_ktva((unsigned long)_stext)) && ++ (start > ktla_ktva((unsigned long)_einittext) || ++ end <= ktla_ktva((unsigned long)_sinittext)) && ++ (start > (unsigned long)__va(0xfffff) || end <= (unsigned long)__va(0xc0000))) ++ return 0; ++ return 1; + } + + /* +@@ -233,9 +226,10 @@ static void __init kernel_physical_mappi + unsigned long end_pfn, + int use_pse) + { +- int pgd_idx, pmd_idx, pte_ofs; ++ unsigned int pgd_idx, pmd_idx, pte_ofs; + unsigned long pfn; + pgd_t *pgd; ++ pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned pages_2m, pages_4k; +@@ -265,8 +259,13 @@ repeat: + pfn = start_pfn; + pgd_idx = pgd_index((pfn<> PAGE_SHIFT); ++#endif + + if (pfn >= end_pfn) + continue; +@@ -278,14 +277,13 @@ repeat: + #endif + for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; + pmd++, pmd_idx++) { +- unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; ++ unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET; + + /* + * Map with big pages if possible, otherwise + * create normal page tables: + */ + if (use_pse) { +- unsigned int addr2; + pgprot_t prot = PAGE_KERNEL_LARGE; + /* + * first pass will use the same initial +@@ -295,11 +293,7 @@ repeat: + __pgprot(PTE_IDENT_ATTR | + _PAGE_PSE); + +- addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + +- PAGE_OFFSET + PAGE_SIZE-1; +- +- if (is_kernel_text(addr) || +- is_kernel_text(addr2)) ++ if (is_kernel_text(address, address + PMD_SIZE)) + prot = PAGE_KERNEL_LARGE_EXEC; + + pages_2m++; +@@ -316,7 +310,7 @@ repeat: + pte_ofs = pte_index((pfn<> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; +@@ -508,7 +504,7 @@ void __init native_pagetable_setup_start + + pud = pud_offset(pgd, va); + pmd = pmd_offset(pud, va); +- if (!pmd_present(*pmd)) ++ if (!pmd_present(*pmd) || pmd_huge(*pmd)) + break; + + pte = pte_offset_kernel(pmd, va); +@@ -559,9 +555,7 @@ static void __init early_ioremap_page_ta + + static void __init pagetable_init(void) + { +- pgd_t *pgd_base = swapper_pg_dir; +- +- permanent_kmaps_init(pgd_base); ++ permanent_kmaps_init(swapper_pg_dir); + } + + #ifdef CONFIG_ACPI_SLEEP +@@ -569,12 +563,12 @@ static void __init pagetable_init(void) + * ACPI suspend needs this for resume, because things like the intel-agp + * driver might have split up a kernel 4MB mapping. + */ +-char swsusp_pg_dir[PAGE_SIZE] ++pgd_t swsusp_pg_dir[PTRS_PER_PGD] + __attribute__ ((aligned(PAGE_SIZE))); + + static inline void save_pg_dir(void) + { +- memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); ++ clone_pgd_range(swsusp_pg_dir, swapper_pg_dir, PTRS_PER_PGD); + } + #else /* !CONFIG_ACPI_SLEEP */ + static inline void save_pg_dir(void) +@@ -604,13 +598,11 @@ void zap_low_mappings(void) + + int nx_enabled; + +-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); ++pteval_t __supported_pte_mask __read_only = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); + EXPORT_SYMBOL_GPL(__supported_pte_mask); + + #ifdef CONFIG_X86_PAE + +-static int disable_nx __initdata; +- + /* + * noexec = on|off + * +@@ -619,40 +611,33 @@ static int disable_nx __initdata; + * on Enable + * off Disable + */ ++#if !defined(CONFIG_PAX_PAGEEXEC) + static int __init noexec_setup(char *str) + { + if (!str || !strcmp(str, "on")) { +- if (cpu_has_nx) { +- __supported_pte_mask |= _PAGE_NX; +- disable_nx = 0; +- } ++ if (cpu_has_nx) ++ nx_enabled = 1; + } else { +- if (!strcmp(str, "off")) { +- disable_nx = 1; +- __supported_pte_mask &= ~_PAGE_NX; +- } else { ++ if (!strcmp(str, "off")) ++ nx_enabled = 0; ++ else + return -EINVAL; +- } + } + + return 0; + } + early_param("noexec", noexec_setup); ++#endif + + static void __init set_nx(void) + { +- unsigned int v[4], l, h; +- +- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { +- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); ++ if (!nx_enabled && cpu_has_nx) { ++ unsigned l, h; + +- if ((v[3] & (1 << 20)) && !disable_nx) { +- rdmsr(MSR_EFER, l, h); +- l |= EFER_NX; +- wrmsr(MSR_EFER, l, h); +- nx_enabled = 1; +- __supported_pte_mask |= _PAGE_NX; +- } ++ __supported_pte_mask &= ~_PAGE_NX; ++ rdmsr(MSR_EFER, l, h); ++ l &= ~EFER_NX; ++ wrmsr(MSR_EFER, l, h); + } + } + #endif +@@ -1035,7 +1020,7 @@ void __init mem_init(void) + set_highmem_pages_init(); + + codesize = (unsigned long) &_etext - (unsigned long) &_text; +- datasize = (unsigned long) &_edata - (unsigned long) &_etext; ++ datasize = (unsigned long) &_edata - (unsigned long) &_data; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); +@@ -1081,10 +1066,10 @@ void __init mem_init(void) + ((unsigned long)&__init_end - + (unsigned long)&__init_begin) >> 10, + +- (unsigned long)&_etext, (unsigned long)&_edata, +- ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, ++ (unsigned long)&_data, (unsigned long)&_edata, ++ ((unsigned long)&_edata - (unsigned long)&_data) >> 10, + +- (unsigned long)&_text, (unsigned long)&_etext, ++ ktla_ktva((unsigned long)&_text), ktla_ktva((unsigned long)&_etext), + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + + /* +@@ -1227,6 +1212,46 @@ void free_init_pages(char *what, unsigne + + void free_initmem(void) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ /* PaX: limit KERNEL_CS to actual size */ ++ unsigned long addr, limit; ++ struct desc_struct d; ++ int cpu; ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++#ifdef CONFIG_MODULES ++ limit = ktva_ktla((unsigned long)&MODULES_END); ++#else ++ limit = (unsigned long)&_etext; ++#endif ++ limit = (limit - 1UL) >> PAGE_SHIFT; ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ pack_descriptor(&d, get_desc_base(&get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS]), limit, 0x9B, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEL_CS, &d, DESCTYPE_S); ++ } ++ ++ /* PaX: make KERNEL_CS read-only */ ++ for (addr = ktla_ktva((unsigned long)&_text); addr < (unsigned long)&_data; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); ++ } ++#ifdef CONFIG_X86_PAE ++ for (addr = (unsigned long)&__init_begin; addr < (unsigned long)&__init_end; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); ++ } ++#endif ++ flush_tlb_all(); ++#endif ++ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +diff -urNp linux-2.6.29/arch/x86/mm/init_64.c linux-2.6.29/arch/x86/mm/init_64.c +--- linux-2.6.29/arch/x86/mm/init_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/init_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -175,6 +175,10 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig + pmd_t *pmd; + pte_t *pte; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + pud = pud_page + pud_index(vaddr); + if (pud_none(*pud)) { + pmd = (pmd_t *) spp_getpage(); +@@ -196,8 +200,17 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig + } + + pte = pte_offset_kernel(pmd, vaddr); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_pte(pte, new_pte); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) +@@ -238,14 +251,12 @@ static void __init __init_extra_mapping( + pgd = pgd_offset_k((unsigned long)__va(phys)); + if (pgd_none(*pgd)) { + pud = (pud_t *) spp_getpage(); +- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | +- _PAGE_USER)); ++ set_pgd(pgd, __pgd(__pa(pud) | _PAGE_TABLE)); + } + pud = pud_offset(pgd, (unsigned long)__va(phys)); + if (pud_none(*pud)) { + pmd = (pmd_t *) spp_getpage(); +- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | +- _PAGE_USER)); ++ set_pud(pud, __pud(__pa(pmd) | _PAGE_TABLE)); + } + pmd = pmd_offset(pud, phys); + BUG_ON(!pmd_none(*pmd)); +@@ -888,7 +899,9 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to + */ + int devmem_is_allowed(unsigned long pagenr) + { +- if (pagenr <= 256) ++ if (!pagenr) ++ return 1; ++ if ((ISA_START_ADDRESS >> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; +@@ -979,6 +992,39 @@ void free_init_pages(char *what, unsigne + + void free_initmem(void) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long addr, end; ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++ /* PaX: make kernel code/rodata read-only, rest non-executable */ ++ for (addr = __START_KERNEL_map; addr < __START_KERNEL_map + KERNEL_IMAGE_SIZE; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ if ((unsigned long)_text <= addr && addr < (unsigned long)_data) ++ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); ++ else ++ set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); ++ } ++ ++ addr = (unsigned long)__va(__pa(__START_KERNEL_map)); ++ end = addr + KERNEL_IMAGE_SIZE; ++ for (; addr < end; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ if ((unsigned long)__va(__pa(_text)) <= addr && addr < (unsigned long)__va(__pa(_data))) ++ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); ++ else ++ set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); ++ } ++ ++ flush_tlb_all(); ++#endif ++ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +@@ -1151,7 +1197,7 @@ int in_gate_area_no_task(unsigned long a + + const char *arch_vma_name(struct vm_area_struct *vma) + { +- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) ++ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; + if (vma == &gate_vma) + return "[vsyscall]"; +diff -urNp linux-2.6.29/arch/x86/mm/ioremap.c linux-2.6.29/arch/x86/mm/ioremap.c +--- linux-2.6.29/arch/x86/mm/ioremap.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/ioremap.c 2009-03-28 14:26:19.000000000 -0400 +@@ -114,8 +114,8 @@ int page_is_ram(unsigned long pagenr) + * Second special case: Some BIOSen report the PC BIOS + * area (640->1Mb) as ram even though it is not. + */ +- if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && +- pagenr < (BIOS_END >> PAGE_SHIFT)) ++ if (pagenr >= (ISA_START_ADDRESS >> PAGE_SHIFT) && ++ pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 0; + + for (i = 0; i < e820.nr_map; i++) { +@@ -275,6 +275,8 @@ static void __iomem *__ioremap_caller(re + break; + } + ++ prot = canon_pgprot(prot); ++ + /* + * Ok, go for it.. + */ +@@ -490,7 +492,7 @@ static int __init early_ioremap_debug_se + early_param("early_ioremap_debug", early_ioremap_debug_setup); + + static __initdata int after_paging_init; +-static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; ++static __read_only pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __aligned(PAGE_SIZE); + + static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) + { +@@ -505,7 +507,11 @@ static inline pmd_t * __init early_iorem + + static inline pte_t * __init early_ioremap_pte(unsigned long addr) + { ++#ifdef CONFIG_X86_32 + return &bm_pte[pte_index(addr)]; ++#else ++ return &level1_fixmap_pgt[pte_index(addr)]; ++#endif + } + + void __init early_ioremap_init(void) +@@ -516,8 +522,10 @@ void __init early_ioremap_init(void) + printk(KERN_INFO "early_ioremap_init()\n"); + + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); ++#ifdef CONFIG_X86_32 + memset(bm_pte, 0, sizeof(bm_pte)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); ++#endif + + /* + * The boot-ioremap range spans multiple pmds, for which +diff -urNp linux-2.6.29/arch/x86/mm/mmap.c linux-2.6.29/arch/x86/mm/mmap.c +--- linux-2.6.29/arch/x86/mm/mmap.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/mmap.c 2009-03-28 14:26:19.000000000 -0400 +@@ -36,7 +36,7 @@ + * Leave an at least ~128 MB hole. + */ + #define MIN_GAP (128*1024*1024) +-#define MAX_GAP (TASK_SIZE/6*5) ++#define MAX_GAP (pax_task_size/6*5) + + /* + * True on X86_32 or when emulating IA32 on X86_64 +@@ -81,27 +81,40 @@ static unsigned long mmap_rnd(void) + return rnd << PAGE_SHIFT; + } + +-static unsigned long mmap_base(void) ++static unsigned long mmap_base(struct mm_struct *mm) + { + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; ++ unsigned long pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + +- return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); ++ return PAGE_ALIGN(pax_task_size - gap - mmap_rnd()); + } + + /* + * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 + * does, but not when emulating X86_32 + */ +-static unsigned long mmap_legacy_base(void) ++static unsigned long mmap_legacy_base(struct mm_struct *mm) + { +- if (mmap_is_ia32()) ++ if (mmap_is_ia32()) { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ return SEGMEXEC_TASK_UNMAPPED_BASE; ++ else ++#endif ++ + return TASK_UNMAPPED_BASE; +- else ++ } else + return TASK_UNMAPPED_BASE + mmap_rnd(); + } + +@@ -112,11 +125,23 @@ static unsigned long mmap_legacy_base(vo + void arch_pick_mmap_layout(struct mm_struct *mm) + { + if (mmap_is_legacy()) { +- mm->mmap_base = mmap_legacy_base(); ++ mm->mmap_base = mmap_legacy_base(mm); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { +- mm->mmap_base = mmap_base(); ++ mm->mmap_base = mmap_base(mm); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +diff -urNp linux-2.6.29/arch/x86/mm/numa_32.c linux-2.6.29/arch/x86/mm/numa_32.c +--- linux-2.6.29/arch/x86/mm/numa_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/numa_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -98,7 +98,6 @@ unsigned long node_memmap_size_bytes(int + } + #endif + +-extern unsigned long find_max_low_pfn(void); + extern unsigned long highend_pfn, highstart_pfn; + + #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) +diff -urNp linux-2.6.29/arch/x86/mm/pageattr.c linux-2.6.29/arch/x86/mm/pageattr.c +--- linux-2.6.29/arch/x86/mm/pageattr.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/pageattr.c 2009-03-28 14:26:19.000000000 -0400 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + /* + * The current flushing context - we pass it instead of 5 arguments: +@@ -259,7 +260,7 @@ static inline pgprot_t static_protection + * Does not cover __inittext since that is gone later on. On + * 64bit we do not enforce !NX on the low mapping + */ +- if (within(address, (unsigned long)_text, (unsigned long)_etext)) ++ if (within(address, ktla_ktva((unsigned long)_text), ktla_ktva((unsigned long)_etext))) + pgprot_val(forbidden) |= _PAGE_NX; + + /* +@@ -321,8 +322,20 @@ EXPORT_SYMBOL_GPL(lookup_address); + */ + static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + /* change init_mm */ + set_pte_atomic(kpte, pte); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + #ifdef CONFIG_X86_32 + if (!SHARED_KERNEL_PMD) { + struct page *page; +diff -urNp linux-2.6.29/arch/x86/mm/pageattr-test.c linux-2.6.29/arch/x86/mm/pageattr-test.c +--- linux-2.6.29/arch/x86/mm/pageattr-test.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/pageattr-test.c 2009-03-28 14:26:19.000000000 -0400 +@@ -36,7 +36,7 @@ enum { + + static int pte_testbit(pte_t pte) + { +- return pte_flags(pte) & _PAGE_UNUSED1; ++ return pte_flags(pte) & _PAGE_CPA_TEST; + } + + struct split_state { +diff -urNp linux-2.6.29/arch/x86/mm/pat.c linux-2.6.29/arch/x86/mm/pat.c +--- linux-2.6.29/arch/x86/mm/pat.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/pat.c 2009-03-28 14:26:19.000000000 -0400 +@@ -503,7 +503,7 @@ pgprot_t phys_mem_access_prot(struct fil + return vma_prot; + } + +-#ifdef CONFIG_STRICT_DEVMEM ++#ifndef CONFIG_STRICT_DEVMEM + /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ + static inline int range_is_allowed(unsigned long pfn, unsigned long size) + { +diff -urNp linux-2.6.29/arch/x86/mm/pgtable_32.c linux-2.6.29/arch/x86/mm/pgtable_32.c +--- linux-2.6.29/arch/x86/mm/pgtable_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/mm/pgtable_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -31,6 +31,10 @@ void set_pte_vaddr(unsigned long vaddr, + pmd_t *pmd; + pte_t *pte; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + pgd = swapper_pg_dir + pgd_index(vaddr); + if (pgd_none(*pgd)) { + BUG(); +@@ -47,11 +51,20 @@ void set_pte_vaddr(unsigned long vaddr, + return; + } + pte = pte_offset_kernel(pmd, vaddr); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + if (pte_val(pteval)) + set_pte_present(&init_mm, vaddr, pte, pteval); + else + pte_clear(&init_mm, vaddr, pte); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) +diff -urNp linux-2.6.29/arch/x86/oprofile/backtrace.c linux-2.6.29/arch/x86/oprofile/backtrace.c +--- linux-2.6.29/arch/x86/oprofile/backtrace.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/oprofile/backtrace.c 2009-03-28 14:26:19.000000000 -0400 +@@ -37,7 +37,7 @@ static void backtrace_address(void *data + unsigned int *depth = data; + + if ((*depth)--) +- oprofile_add_trace(addr); ++ oprofile_add_trace(ktla_ktva(addr)); + } + + static struct stacktrace_ops backtrace_ops = { +@@ -78,7 +78,7 @@ x86_backtrace(struct pt_regs * const reg + struct frame_head *head = (struct frame_head *)frame_pointer(regs); + unsigned long stack = kernel_trap_sp(regs); + +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + if (depth) + dump_trace(NULL, regs, (unsigned long *)stack, 0, + &backtrace_ops, &depth); +diff -urNp linux-2.6.29/arch/x86/oprofile/op_model_p4.c linux-2.6.29/arch/x86/oprofile/op_model_p4.c +--- linux-2.6.29/arch/x86/oprofile/op_model_p4.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/oprofile/op_model_p4.c 2009-03-28 14:26:19.000000000 -0400 +@@ -48,7 +48,7 @@ static inline void setup_num_counters(vo + #endif + } + +-static int inline addr_increment(void) ++static inline int addr_increment(void) + { + #ifdef CONFIG_SMP + return smp_num_siblings == 2 ? 2 : 1; +diff -urNp linux-2.6.29/arch/x86/pci/common.c linux-2.6.29/arch/x86/pci/common.c +--- linux-2.6.29/arch/x86/pci/common.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/pci/common.c 2009-03-28 14:26:19.000000000 -0400 +@@ -367,7 +367,7 @@ static struct dmi_system_id __devinitdat + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), + }, + }, +- {} ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL} + }; + + void __init dmi_check_pciprobe(void) +diff -urNp linux-2.6.29/arch/x86/pci/fixup.c linux-2.6.29/arch/x86/pci/fixup.c +--- linux-2.6.29/arch/x86/pci/fixup.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/pci/fixup.c 2009-03-28 14:26:19.000000000 -0400 +@@ -364,7 +364,7 @@ static struct dmi_system_id __devinitdat + DMI_MATCH(DMI_PRODUCT_NAME, "MS-6702E"), + }, + }, +- {} ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + /* +@@ -435,7 +435,7 @@ static struct dmi_system_id __devinitdat + DMI_MATCH(DMI_PRODUCT_VERSION, "PSA40U"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +diff -urNp linux-2.6.29/arch/x86/pci/irq.c linux-2.6.29/arch/x86/pci/irq.c +--- linux-2.6.29/arch/x86/pci/irq.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/pci/irq.c 2009-03-28 14:26:19.000000000 -0400 +@@ -543,7 +543,7 @@ static __init int intel_router_probe(str + static struct pci_device_id __initdata pirq_440gx[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, +- { }, ++ { PCI_DEVICE(0, 0) } + }; + + /* 440GX has a proprietary PIRQ router -- don't use it */ +@@ -1145,7 +1145,7 @@ static struct dmi_system_id __initdata p + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + int __init pcibios_irq_init(void) +diff -urNp linux-2.6.29/arch/x86/pci/pcbios.c linux-2.6.29/arch/x86/pci/pcbios.c +--- linux-2.6.29/arch/x86/pci/pcbios.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/pci/pcbios.c 2009-03-28 14:26:19.000000000 -0400 +@@ -56,50 +56,120 @@ union bios32 { + static struct { + unsigned long address; + unsigned short segment; +-} bios32_indirect = { 0, __KERNEL_CS }; ++} bios32_indirect __read_only = { 0, __PCIBIOS_CS }; + + /* + * Returns the entry point for the given service, NULL on error + */ + +-static unsigned long bios32_service(unsigned long service) ++static unsigned long __devinit bios32_service(unsigned long service) + { + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + unsigned long flags; ++ struct desc_struct d, *gdt; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + local_irq_save(flags); +- __asm__("lcall *(%%edi); cld" ++ ++ gdt = get_cpu_gdt_table(smp_processor_id()); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x9B, 0xC); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); ++ pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x93, 0xC); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ __asm__("movw %w7, %%ds; lcall *(%%edi); push %%ss; pop %%ds; cld" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), +- "D" (&bios32_indirect)); ++ "D" (&bios32_indirect), ++ "r"(__PCIBIOS_DS) ++ : "memory"); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ gdt[GDT_ENTRY_PCIBIOS_CS].a = 0; ++ gdt[GDT_ENTRY_PCIBIOS_CS].b = 0; ++ gdt[GDT_ENTRY_PCIBIOS_DS].a = 0; ++ gdt[GDT_ENTRY_PCIBIOS_DS].b = 0; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + local_irq_restore(flags); + + switch (return_code) { +- case 0: +- return address + entry; +- case 0x80: /* Not present */ +- printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); +- return 0; +- default: /* Shouldn't happen */ +- printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", +- service, return_code); ++ case 0: { ++ int cpu; ++ unsigned char flags; ++ ++ printk(KERN_INFO "bios32_service: base:%08lx length:%08lx entry:%08lx\n", address, length, entry); ++ if (address >= 0xFFFF0 || length > 0x100000 - address || length <= entry) { ++ printk(KERN_WARNING "bios32_service: not valid\n"); + return 0; ++ } ++ address = address + PAGE_OFFSET; ++ length += 16UL; /* some BIOSs underreport this... */ ++ flags = 4; ++ if (length >= 64*1024*1024) { ++ length >>= PAGE_SHIFT; ++ flags |= 8; ++ } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ gdt = get_cpu_gdt_table(cpu); ++ pack_descriptor(&d, address, length, 0x9b, flags); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); ++ pack_descriptor(&d, address, length, 0x93, flags); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); ++ } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ return entry; ++ } ++ case 0x80: /* Not present */ ++ printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); ++ return 0; ++ default: /* Shouldn't happen */ ++ printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", ++ service, return_code); ++ return 0; + } + } + + static struct { + unsigned long address; + unsigned short segment; +-} pci_indirect = { 0, __KERNEL_CS }; ++} pci_indirect __read_only = { 0, __PCIBIOS_CS }; + +-static int pci_bios_present; ++static int pci_bios_present __read_only; + + static int __devinit check_pcibios(void) + { +@@ -108,11 +178,13 @@ static int __devinit check_pcibios(void) + unsigned long flags, pcibios_entry; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { +- pci_indirect.address = pcibios_entry + PAGE_OFFSET; ++ pci_indirect.address = pcibios_entry; + + local_irq_save(flags); +- __asm__( +- "lcall *(%%edi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%edi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -121,7 +193,8 @@ static int __devinit check_pcibios(void) + "=b" (ebx), + "=c" (ecx) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), +- "D" (&pci_indirect) ++ "D" (&pci_indirect), ++ "r" (__PCIBIOS_DS) + : "memory"); + local_irq_restore(flags); + +@@ -165,7 +238,10 @@ static int pci_bios_read(unsigned int se + + switch (len) { + case 1: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -174,7 +250,8 @@ static int pci_bios_read(unsigned int se + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + /* + * Zero-extend the result beyond 8 bits, do not trust the + * BIOS having done it: +@@ -182,7 +259,10 @@ static int pci_bios_read(unsigned int se + *value &= 0xff; + break; + case 2: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -191,7 +271,8 @@ static int pci_bios_read(unsigned int se + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + /* + * Zero-extend the result beyond 16 bits, do not trust the + * BIOS having done it: +@@ -199,7 +280,10 @@ static int pci_bios_read(unsigned int se + *value &= 0xffff; + break; + case 4: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -208,7 +292,8 @@ static int pci_bios_read(unsigned int se + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + } + +@@ -231,7 +316,10 @@ static int pci_bios_write(unsigned int s + + switch (len) { + case 1: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -240,10 +328,14 @@ static int pci_bios_write(unsigned int s + "c" (value), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + case 2: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -252,10 +344,14 @@ static int pci_bios_write(unsigned int s + "c" (value), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + case 4: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -264,7 +360,8 @@ static int pci_bios_write(unsigned int s + "c" (value), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + } + +@@ -368,10 +465,13 @@ struct irq_routing_table * pcibios_get_i + + DBG("PCI: Fetching IRQ routing table... "); + __asm__("push %%es\n\t" ++ "movw %w8, %%ds\n\t" + "push %%ds\n\t" + "pop %%es\n\t" +- "lcall *(%%esi); cld\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" + "pop %%es\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -382,7 +482,8 @@ struct irq_routing_table * pcibios_get_i + "1" (0), + "D" ((long) &opt), + "S" (&pci_indirect), +- "m" (opt) ++ "m" (opt), ++ "r" (__PCIBIOS_DS) + : "memory"); + DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); + if (ret & 0xff00) +@@ -406,7 +507,10 @@ int pcibios_set_irq_routing(struct pci_d + { + int ret; + +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w5, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -414,7 +518,8 @@ int pcibios_set_irq_routing(struct pci_d + : "0" (PCIBIOS_SET_PCI_HW_INT), + "b" ((dev->bus->number << 8) | dev->devfn), + "c" ((irq << 8) | (pin + 10)), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + return !(ret & 0xff00); + } + EXPORT_SYMBOL(pcibios_set_irq_routing); +diff -urNp linux-2.6.29/arch/x86/power/cpu_32.c linux-2.6.29/arch/x86/power/cpu_32.c +--- linux-2.6.29/arch/x86/power/cpu_32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/power/cpu_32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -67,7 +67,7 @@ static void do_fpu_end(void) + static void fix_processor_context(void) + { + int cpu = smp_processor_id(); +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; + + set_tss_desc(cpu, t); /* + * This just modifies memory; should not be +diff -urNp linux-2.6.29/arch/x86/power/cpu_64.c linux-2.6.29/arch/x86/power/cpu_64.c +--- linux-2.6.29/arch/x86/power/cpu_64.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/power/cpu_64.c 2009-03-28 14:26:19.000000000 -0400 +@@ -143,7 +143,11 @@ void restore_processor_state(void) + static void fix_processor_context(void) + { + int cpu = smp_processor_id(); +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + /* + * This just modifies memory; should not be necessary. But... This +@@ -152,8 +156,16 @@ static void fix_processor_context(void) + */ + set_tss_desc(cpu, t); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + syscall_init(); /* This sets MSR_*STAR and related */ + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ +diff -urNp linux-2.6.29/arch/x86/vdso/vdso32-setup.c linux-2.6.29/arch/x86/vdso/vdso32-setup.c +--- linux-2.6.29/arch/x86/vdso/vdso32-setup.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/vdso/vdso32-setup.c 2009-03-28 14:26:19.000000000 -0400 +@@ -226,7 +226,7 @@ static inline void map_compat_vdso(int m + void enable_sep_cpu(void) + { + int cpu = get_cpu(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); +@@ -249,7 +249,7 @@ static int __init gate_vma_init(void) + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; +- gate_vma.vm_page_prot = __P101; ++ gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags); + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later +@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct l + if (compat) + addr = VDSO_HIGH_BASE; + else { +- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); ++ addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; +@@ -358,7 +358,7 @@ int arch_setup_additional_pages(struct l + goto up_fail; + } + +- current->mm->context.vdso = (void *)addr; ++ current->mm->context.vdso = addr; + current_thread_info()->sysenter_return = + VDSO32_SYMBOL(addr, SYSENTER_RETURN); + +@@ -384,7 +384,7 @@ static ctl_table abi_table2[] = { + .mode = 0644, + .proc_handler = proc_dointvec + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static ctl_table abi_root_table2[] = { +@@ -394,7 +394,7 @@ static ctl_table abi_root_table2[] = { + .mode = 0555, + .child = abi_table2 + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static __init int ia32_binfmt_init(void) +@@ -409,8 +409,14 @@ __initcall(ia32_binfmt_init); + + const char *arch_vma_name(struct vm_area_struct *vma) + { +- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) ++ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma->vm_mm && vma->vm_mirror && vma->vm_mirror->vm_start == vma->vm_mm->context.vdso) ++ return "[vdso]"; ++#endif ++ + return NULL; + } + +@@ -419,7 +425,7 @@ struct vm_area_struct *get_gate_vma(stru + struct mm_struct *mm = tsk->mm; + + /* Check to see if this task was created in compat vdso mode */ +- if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) ++ if (mm && mm->context.vdso == VDSO_HIGH_BASE) + return &gate_vma; + return NULL; + } +diff -urNp linux-2.6.29/arch/x86/vdso/vma.c linux-2.6.29/arch/x86/vdso/vma.c +--- linux-2.6.29/arch/x86/vdso/vma.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/vdso/vma.c 2009-03-28 14:26:19.000000000 -0400 +@@ -123,7 +123,7 @@ int arch_setup_additional_pages(struct l + if (ret) + goto up_fail; + +- current->mm->context.vdso = (void *)addr; ++ current->mm->context.vdso = addr; + up_fail: + up_write(&mm->mmap_sem); + return ret; +diff -urNp linux-2.6.29/arch/x86/xen/enlighten.c linux-2.6.29/arch/x86/xen/enlighten.c +--- linux-2.6.29/arch/x86/xen/enlighten.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/xen/enlighten.c 2009-03-28 14:26:19.000000000 -0400 +@@ -319,7 +319,7 @@ static void xen_set_ldt(const void *addr + static void xen_load_gdt(const struct desc_ptr *dtr) + { + unsigned long *frames; +- unsigned long va = dtr->address; ++ unsigned long va = (unsigned long)dtr->address; + unsigned int size = dtr->size + 1; + unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + int f; +@@ -334,7 +334,7 @@ static void xen_load_gdt(const struct de + mcs = xen_mc_entry(sizeof(*frames) * pages); + frames = mcs.args; + +- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { ++ for (f = 0; va < (unsigned long)dtr->address + size; va += PAGE_SIZE, f++) { + frames[f] = virt_to_mfn(va); + make_lowmem_page_readonly((void *)va); + } +@@ -442,7 +442,7 @@ static void xen_write_idt_entry(gate_des + + preempt_disable(); + +- start = __get_cpu_var(idt_desc).address; ++ start = (unsigned long)__get_cpu_var(idt_desc).address; + end = start + __get_cpu_var(idt_desc).size + 1; + + xen_mc_flush(); +@@ -1528,6 +1528,8 @@ static __init pgd_t *xen_setup_kernel_pa + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); ++ convert_pfn_mfn(level3_vmalloc_pgt); ++ convert_pfn_mfn(level3_vmemmap_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); +@@ -1546,9 +1548,12 @@ static __init pgd_t *xen_setup_kernel_pa + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level3_vmalloc_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level3_vmemmap_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, +diff -urNp linux-2.6.29/arch/x86/xen/smp.c linux-2.6.29/arch/x86/xen/smp.c +--- linux-2.6.29/arch/x86/xen/smp.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/x86/xen/smp.c 2009-03-28 14:26:19.000000000 -0400 +@@ -171,11 +171,6 @@ static void __init xen_smp_prepare_boot_ + { + BUG_ON(smp_processor_id() != 0); + native_smp_prepare_boot_cpu(); +- +- /* We've switched to the "real" per-cpu gdt, so make sure the +- old memory can be recycled */ +- make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); +- + xen_setup_vcpu_info_placement(); + } + +@@ -234,8 +229,8 @@ cpu_initialize_context(unsigned int cpu, + gdt = get_cpu_gdt_table(cpu); + + ctxt->flags = VGCF_IN_KERNEL; +- ctxt->user_regs.ds = __USER_DS; +- ctxt->user_regs.es = __USER_DS; ++ ctxt->user_regs.ds = __KERNEL_DS; ++ ctxt->user_regs.es = __KERNEL_DS; + ctxt->user_regs.ss = __KERNEL_DS; + #ifdef CONFIG_X86_32 + ctxt->user_regs.fs = __KERNEL_PERCPU; +diff -urNp linux-2.6.29/arch/xtensa/include/asm/kmap_types.h linux-2.6.29/arch/xtensa/include/asm/kmap_types.h +--- linux-2.6.29/arch/xtensa/include/asm/kmap_types.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/arch/xtensa/include/asm/kmap_types.h 2009-03-28 14:26:19.000000000 -0400 +@@ -25,6 +25,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp linux-2.6.29/crypto/lrw.c linux-2.6.29/crypto/lrw.c +--- linux-2.6.29/crypto/lrw.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/crypto/lrw.c 2009-03-28 14:26:19.000000000 -0400 +@@ -60,7 +60,7 @@ static int setkey(struct crypto_tfm *par + struct priv *ctx = crypto_tfm_ctx(parent); + struct crypto_cipher *child = ctx->child; + int err, i; +- be128 tmp = { 0 }; ++ be128 tmp = { 0, 0 }; + int bsize = crypto_cipher_blocksize(child); + + crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); +diff -urNp linux-2.6.29/Documentation/dontdiff linux-2.6.29/Documentation/dontdiff +--- linux-2.6.29/Documentation/dontdiff 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/Documentation/dontdiff 2009-03-28 14:26:19.000000000 -0400 +@@ -3,6 +3,7 @@ + *.bin + *.cpio + *.csp ++*.dbg + *.dsp + *.dvi + *.elf +@@ -49,6 +50,10 @@ + 53c700_d.h + CVS + ChangeSet ++GPATH ++GRTAGS ++GSYMS ++GTAGS + Image + Kerntypes + Module.markers +@@ -62,7 +67,6 @@ aic7*reg_print.c* + aic7*seq.h* + aicasm + aicdb.h* +-asm + asm-offsets.h + asm_offsets.h + autoconf.h* +@@ -77,6 +81,7 @@ btfixupprep + build + bvmlinux + bzImage* ++capflags.c + classlist.h* + comp*.log + compile.h* +@@ -188,12 +193,15 @@ version.h* + vmlinux + vmlinux-* + vmlinux.aout ++vmlinux.bin.all + vmlinux.lds ++vmlinux.relocs + vsyscall.lds + vsyscall_32.lds + wanxlfw.inc + uImage + unifdef ++utsrelease.h + wakeup.bin + wakeup.elf + wakeup.lds +diff -urNp linux-2.6.29/drivers/acpi/blacklist.c linux-2.6.29/drivers/acpi/blacklist.c +--- linux-2.6.29/drivers/acpi/blacklist.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/acpi/blacklist.c 2009-03-28 14:26:19.000000000 -0400 +@@ -71,7 +71,7 @@ static struct acpi_blacklist_item acpi_b + {"IBM ", "TP600E ", 0x00000105, ACPI_SIG_DSDT, less_than_or_equal, + "Incorrect _ADR", 1}, + +- {""} ++ {"", "", 0, 0, 0, all_versions, 0} + }; + + #if CONFIG_ACPI_BLACKLIST_YEAR +diff -urNp linux-2.6.29/drivers/acpi/osl.c linux-2.6.29/drivers/acpi/osl.c +--- linux-2.6.29/drivers/acpi/osl.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/acpi/osl.c 2009-03-28 14:26:19.000000000 -0400 +@@ -483,6 +483,8 @@ acpi_os_read_memory(acpi_physical_addres + void __iomem *virt_addr; + + virt_addr = ioremap(phys_addr, width); ++ if (!virt_addr) ++ return AE_NO_MEMORY; + if (!value) + value = &dummy; + +@@ -511,6 +513,8 @@ acpi_os_write_memory(acpi_physical_addre + void __iomem *virt_addr; + + virt_addr = ioremap(phys_addr, width); ++ if (!virt_addr) ++ return AE_NO_MEMORY; + + switch (width) { + case 8: +diff -urNp linux-2.6.29/drivers/acpi/processor_core.c linux-2.6.29/drivers/acpi/processor_core.c +--- linux-2.6.29/drivers/acpi/processor_core.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/acpi/processor_core.c 2009-03-28 14:26:19.000000000 -0400 +@@ -678,7 +678,7 @@ static int __cpuinit acpi_processor_star + return 0; + } + +- BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); ++ BUG_ON(pr->id >= nr_cpu_ids); + + /* + * Buggy BIOS check +diff -urNp linux-2.6.29/drivers/acpi/processor_idle.c linux-2.6.29/drivers/acpi/processor_idle.c +--- linux-2.6.29/drivers/acpi/processor_idle.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/acpi/processor_idle.c 2009-03-28 14:26:19.000000000 -0400 +@@ -156,7 +156,7 @@ static struct dmi_system_id __cpuinitdat + DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), + DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, + (void *)2}, +- {}, ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL}, + }; + + static inline u32 ticks_elapsed(u32 t1, u32 t2) +diff -urNp linux-2.6.29/drivers/ata/ahci.c linux-2.6.29/drivers/ata/ahci.c +--- linux-2.6.29/drivers/ata/ahci.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ata/ahci.c 2009-03-28 14:26:19.000000000 -0400 +@@ -611,7 +611,7 @@ static const struct pci_device_id ahci_p + { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff, board_ahci }, + +- { } /* terminate list */ ++ { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ + }; + + +diff -urNp linux-2.6.29/drivers/ata/ata_piix.c linux-2.6.29/drivers/ata/ata_piix.c +--- linux-2.6.29/drivers/ata/ata_piix.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ata/ata_piix.c 2009-03-28 14:26:19.000000000 -0400 +@@ -291,7 +291,7 @@ static const struct pci_device_id piix_p + { 0x8086, 0x3b2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (PCH) */ + { 0x8086, 0x3b2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata }, +- { } /* terminate list */ ++ { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ + }; + + static struct pci_driver piix_pci_driver = { +@@ -595,7 +595,7 @@ static const struct ich_laptop ich_lapto + { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */ + { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */ + /* end marker */ +- { 0, } ++ { 0, 0, 0 } + }; + + /** +@@ -1054,7 +1054,7 @@ static int piix_broken_suspend(void) + }, + }, + +- { } /* terminate list */ ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } /* terminate list */ + }; + static const char *oemstrs[] = { + "Tecra M3,", +diff -urNp linux-2.6.29/drivers/ata/libata-core.c linux-2.6.29/drivers/ata/libata-core.c +--- linux-2.6.29/drivers/ata/libata-core.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ata/libata-core.c 2009-03-28 14:26:19.000000000 -0400 +@@ -889,7 +889,7 @@ static const struct ata_xfer_ent { + { ATA_SHIFT_PIO, ATA_NR_PIO_MODES, XFER_PIO_0 }, + { ATA_SHIFT_MWDMA, ATA_NR_MWDMA_MODES, XFER_MW_DMA_0 }, + { ATA_SHIFT_UDMA, ATA_NR_UDMA_MODES, XFER_UDMA_0 }, +- { -1, }, ++ { -1, 0, 0 } + }; + + /** +@@ -3105,7 +3105,7 @@ static const struct ata_timing ata_timin + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 }, + +- { 0xFF } ++ { 0xFF, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + #define ENOUGH(v, unit) (((v)-1)/(unit)+1) +@@ -4267,7 +4267,7 @@ static const struct ata_blacklist_entry + { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, }, + + /* End Marker */ +- { } ++ { NULL, NULL, 0 } + }; + + static int strn_pattern_cmp(const char *patt, const char *name, int wildchar) +diff -urNp linux-2.6.29/drivers/char/agp/frontend.c linux-2.6.29/drivers/char/agp/frontend.c +--- linux-2.6.29/drivers/char/agp/frontend.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/agp/frontend.c 2009-03-28 14:26:19.000000000 -0400 +@@ -824,7 +824,7 @@ static int agpioc_reserve_wrap(struct ag + if (copy_from_user(&reserve, arg, sizeof(struct agp_region))) + return -EFAULT; + +- if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment)) ++ if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment_priv)) + return -EFAULT; + + client = agp_find_client_by_pid(reserve.pid); +diff -urNp linux-2.6.29/drivers/char/agp/intel-agp.c linux-2.6.29/drivers/char/agp/intel-agp.c +--- linux-2.6.29/drivers/char/agp/intel-agp.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/agp/intel-agp.c 2009-03-28 14:26:19.000000000 -0400 +@@ -2369,7 +2369,7 @@ static struct pci_device_id agp_intel_pc + ID(PCI_DEVICE_ID_INTEL_Q45_HB), + ID(PCI_DEVICE_ID_INTEL_G45_HB), + ID(PCI_DEVICE_ID_INTEL_G41_HB), +- { } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, agp_intel_pci_table); +diff -urNp linux-2.6.29/drivers/char/hpet.c linux-2.6.29/drivers/char/hpet.c +--- linux-2.6.29/drivers/char/hpet.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/hpet.c 2009-03-28 14:26:19.000000000 -0400 +@@ -975,7 +975,7 @@ static struct acpi_driver hpet_acpi_driv + }, + }; + +-static struct miscdevice hpet_misc = { HPET_MINOR, "hpet", &hpet_fops }; ++static struct miscdevice hpet_misc = { HPET_MINOR, "hpet", &hpet_fops, {NULL, NULL}, NULL, NULL }; + + static int __init hpet_init(void) + { +diff -urNp linux-2.6.29/drivers/char/keyboard.c linux-2.6.29/drivers/char/keyboard.c +--- linux-2.6.29/drivers/char/keyboard.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/keyboard.c 2009-03-28 14:26:19.000000000 -0400 +@@ -635,6 +635,16 @@ static void k_spec(struct vc_data *vc, u + kbd->kbdmode == VC_MEDIUMRAW) && + value != KVAL(K_SAK)) + return; /* SAK is allowed even in raw mode */ ++ ++#if defined(CONFIG_GRKERNSEC_PROC) || defined(CONFIG_GRKERNSEC_PROC_MEMMAP) ++ { ++ void *func = fn_handler[value]; ++ if (func == fn_show_state || func == fn_show_ptregs || ++ func == fn_show_mem) ++ return; ++ } ++#endif ++ + fn_handler[value](vc); + } + +@@ -1388,7 +1398,7 @@ static const struct input_device_id kbd_ + .evbit = { BIT_MASK(EV_SND) }, + }, + +- { }, /* Terminating entry */ ++ { 0 }, /* Terminating entry */ + }; + + MODULE_DEVICE_TABLE(input, kbd_ids); +diff -urNp linux-2.6.29/drivers/char/mem.c linux-2.6.29/drivers/char/mem.c +--- linux-2.6.29/drivers/char/mem.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/mem.c 2009-03-28 14:26:19.000000000 -0400 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -35,6 +36,10 @@ + # include + #endif + ++#ifdef CONFIG_GRKERNSEC ++extern struct file_operations grsec_fops; ++#endif ++ + /* + * Architectures vary in how they handle caching for addresses + * outside of main memory. +@@ -192,6 +197,11 @@ static ssize_t write_mem(struct file * f + if (!valid_phys_addr_range(p, count)) + return -EFAULT; + ++#ifdef CONFIG_GRKERNSEC_KMEM ++ gr_handle_mem_write(); ++ return -EPERM; ++#endif ++ + written = 0; + + #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED +@@ -350,6 +360,11 @@ static int mmap_mem(struct file * file, + &vma->vm_page_prot)) + return -EINVAL; + ++#ifdef CONFIG_GRKERNSEC_KMEM ++ if (gr_handle_mem_mmap(vma->vm_pgoff << PAGE_SHIFT, vma)) ++ return -EPERM; ++#endif ++ + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, + vma->vm_page_prot); +@@ -585,6 +600,11 @@ static ssize_t write_kmem(struct file * + ssize_t written; + char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ + ++#ifdef CONFIG_GRKERNSEC_KMEM ++ gr_handle_kmem_write(); ++ return -EPERM; ++#endif ++ + if (p < (unsigned long) high_memory) { + + wrote = count; +@@ -788,6 +808,16 @@ static loff_t memory_lseek(struct file * + + static int open_port(struct inode * inode, struct file * filp) + { ++#ifdef CONFIG_GRKERNSEC_KMEM ++ gr_handle_open_port(); ++ return -EPERM; ++#endif ++ ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ ++static int open_mem(struct inode * inode, struct file * filp) ++{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; + } + +@@ -795,7 +825,6 @@ static int open_port(struct inode * inod + #define full_lseek null_lseek + #define write_zero write_null + #define read_full read_zero +-#define open_mem open_port + #define open_kmem open_mem + #define open_oldmem open_mem + +@@ -935,6 +964,11 @@ static int memory_open(struct inode * in + filp->f_op = &oldmem_fops; + break; + #endif ++#ifdef CONFIG_GRKERNSEC ++ case 13: ++ filp->f_op = &grsec_fops; ++ break; ++#endif + default: + unlock_kernel(); + return -ENXIO; +@@ -971,6 +1005,9 @@ static const struct { + #ifdef CONFIG_CRASH_DUMP + {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops}, + #endif ++#ifdef CONFIG_GRKERNSEC ++ {13,"grsec", S_IRUSR | S_IWUGO, &grsec_fops}, ++#endif + }; + + static struct class *mem_class; +diff -urNp linux-2.6.29/drivers/char/nvram.c linux-2.6.29/drivers/char/nvram.c +--- linux-2.6.29/drivers/char/nvram.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/nvram.c 2009-03-28 14:26:19.000000000 -0400 +@@ -429,7 +429,10 @@ static const struct file_operations nvra + static struct miscdevice nvram_dev = { + NVRAM_MINOR, + "nvram", +- &nvram_fops ++ &nvram_fops, ++ {NULL, NULL}, ++ NULL, ++ NULL + }; + + static int __init nvram_init(void) +diff -urNp linux-2.6.29/drivers/char/random.c linux-2.6.29/drivers/char/random.c +--- linux-2.6.29/drivers/char/random.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/random.c 2009-03-28 14:26:19.000000000 -0400 +@@ -249,8 +249,13 @@ + /* + * Configuration information + */ ++#ifdef CONFIG_GRKERNSEC_RANDNET ++#define INPUT_POOL_WORDS 512 ++#define OUTPUT_POOL_WORDS 128 ++#else + #define INPUT_POOL_WORDS 128 + #define OUTPUT_POOL_WORDS 32 ++#endif + #define SEC_XFER_SIZE 512 + + /* +@@ -287,10 +292,17 @@ static struct poolinfo { + int poolwords; + int tap1, tap2, tap3, tap4, tap5; + } poolinfo_table[] = { ++#ifdef CONFIG_GRKERNSEC_RANDNET ++ /* x^512 + x^411 + x^308 + x^208 +x^104 + x + 1 -- 225 */ ++ { 512, 411, 308, 208, 104, 1 }, ++ /* x^128 + x^103 + x^76 + x^51 + x^25 + x + 1 -- 105 */ ++ { 128, 103, 76, 51, 25, 1 }, ++#else + /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */ + { 128, 103, 76, 51, 25, 1 }, + /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */ + { 32, 26, 20, 14, 7, 1 }, ++#endif + #if 0 + /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ + { 2048, 1638, 1231, 819, 411, 1 }, +@@ -1200,7 +1212,7 @@ EXPORT_SYMBOL(generate_random_uuid); + #include + + static int min_read_thresh = 8, min_write_thresh; +-static int max_read_thresh = INPUT_POOL_WORDS * 32; ++static int max_read_thresh = OUTPUT_POOL_WORDS * 32; + static int max_write_thresh = INPUT_POOL_WORDS * 32; + static char sysctl_bootid[16]; + +diff -urNp linux-2.6.29/drivers/char/tty_ldisc.c linux-2.6.29/drivers/char/tty_ldisc.c +--- linux-2.6.29/drivers/char/tty_ldisc.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/tty_ldisc.c 2009-03-28 14:26:19.000000000 -0400 +@@ -74,7 +74,7 @@ int tty_register_ldisc(int disc, struct + spin_lock_irqsave(&tty_ldisc_lock, flags); + tty_ldiscs[disc] = new_ldisc; + new_ldisc->num = disc; +- new_ldisc->refcount = 0; ++ atomic_set(&new_ldisc->refcount, 0); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + return ret; +@@ -102,7 +102,7 @@ int tty_unregister_ldisc(int disc) + return -EINVAL; + + spin_lock_irqsave(&tty_ldisc_lock, flags); +- if (tty_ldiscs[disc]->refcount) ++ if (atomic_read(&tty_ldiscs[disc]->refcount)) + ret = -EBUSY; + else + tty_ldiscs[disc] = NULL; +@@ -139,7 +139,7 @@ static int tty_ldisc_try_get(int disc, s + err = -EAGAIN; + else { + /* lock it */ +- ldops->refcount++; ++ atomic_inc(&ldops->refcount); + ld->ops = ldops; + err = 0; + } +@@ -196,8 +196,8 @@ static void tty_ldisc_put(struct tty_ldi + + spin_lock_irqsave(&tty_ldisc_lock, flags); + ld = tty_ldiscs[disc]; +- BUG_ON(ld->refcount == 0); +- ld->refcount--; ++ BUG_ON(atomic_read(&ld->refcount) == 0); ++ atomic_dec(&ld->refcount); + module_put(ld->owner); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + } +@@ -264,7 +264,7 @@ const struct file_operations tty_ldiscs_ + + static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) + { +- ld->refcount = 0; ++ atomic_set(&ld->refcount, 0); + tty->ldisc = *ld; + } + +@@ -289,7 +289,7 @@ static int tty_ldisc_try(struct tty_stru + spin_lock_irqsave(&tty_ldisc_lock, flags); + ld = &tty->ldisc; + if (test_bit(TTY_LDISC, &tty->flags)) { +- ld->refcount++; ++ atomic_inc(&ld->refcount); + ret = 1; + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +@@ -316,7 +316,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(str + { + /* wait_event is a macro */ + wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); +- WARN_ON(tty->ldisc.refcount == 0); ++ WARN_ON(atomic_read(&tty->ldisc.refcount) == 0); + return &tty->ldisc; + } + +@@ -359,11 +359,9 @@ void tty_ldisc_deref(struct tty_ldisc *l + BUG_ON(ld == NULL); + + spin_lock_irqsave(&tty_ldisc_lock, flags); +- if (ld->refcount == 0) ++ if (!atomic_add_unless(&ld->refcount, -1, 0)) + printk(KERN_ERR "tty_ldisc_deref: no references.\n"); +- else +- ld->refcount--; +- if (ld->refcount == 0) ++ if (atomic_read(&ld->refcount) == 0) + wake_up(&tty_ldisc_wait); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + } +@@ -507,8 +505,8 @@ restart: + clear_bit(TTY_LDISC, &o_tty->flags); + + spin_lock_irqsave(&tty_ldisc_lock, flags); +- if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { +- if (tty->ldisc.refcount) { ++ if (atomic_read(&tty->ldisc.refcount) || (o_tty && atomic_read(&o_tty->ldisc.refcount))) { ++ if (atomic_read(&tty->ldisc.refcount)) { + /* Free the new ldisc we grabbed. Must drop the lock + first. */ + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +@@ -520,14 +518,14 @@ restart: + * and retries if we made tty_ldisc_wait() smarter. + * That is up for discussion. + */ +- if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) ++ if (wait_event_interruptible(tty_ldisc_wait, atomic_read(&tty->ldisc.refcount) == 0) < 0) + return -ERESTARTSYS; + goto restart; + } +- if (o_tty && o_tty->ldisc.refcount) { ++ if (o_tty && atomic_read(&o_tty->ldisc.refcount)) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + tty_ldisc_put(o_tty->ldisc.ops); +- if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0) ++ if (wait_event_interruptible(tty_ldisc_wait, atomic_read(&o_tty->ldisc.refcount) == 0) < 0) + return -ERESTARTSYS; + goto restart; + } +@@ -670,9 +668,9 @@ void tty_ldisc_release(struct tty_struct + * side is zero. + */ + spin_lock_irqsave(&tty_ldisc_lock, flags); +- while (tty->ldisc.refcount) { ++ while (atomic_read(&tty->ldisc.refcount)) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +- wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); ++ wait_event(tty_ldisc_wait, atomic_read(&tty->ldisc.refcount) == 0); + spin_lock_irqsave(&tty_ldisc_lock, flags); + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +diff -urNp linux-2.6.29/drivers/char/vt_ioctl.c linux-2.6.29/drivers/char/vt_ioctl.c +--- linux-2.6.29/drivers/char/vt_ioctl.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/char/vt_ioctl.c 2009-03-28 14:26:19.000000000 -0400 +@@ -96,6 +96,12 @@ do_kdsk_ioctl(int cmd, struct kbentry __ + case KDSKBENT: + if (!perm) + return -EPERM; ++ ++#ifdef CONFIG_GRKERNSEC ++ if (!capable(CAP_SYS_TTY_CONFIG)) ++ return -EPERM; ++#endif ++ + if (!i && v == K_NOSUCHMAP) { + /* deallocate map */ + key_map = key_maps[s]; +@@ -236,6 +242,13 @@ do_kdgkb_ioctl(int cmd, struct kbsentry + goto reterr; + } + ++#ifdef CONFIG_GRKERNSEC ++ if (!capable(CAP_SYS_TTY_CONFIG)) { ++ ret = -EPERM; ++ goto reterr; ++ } ++#endif ++ + q = func_table[i]; + first_free = funcbufptr + (funcbufsize - funcbufleft); + for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) +diff -urNp linux-2.6.29/drivers/edac/edac_core.h linux-2.6.29/drivers/edac/edac_core.h +--- linux-2.6.29/drivers/edac/edac_core.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/edac/edac_core.h 2009-03-28 14:26:19.000000000 -0400 +@@ -85,11 +85,11 @@ extern int edac_debug_level; + + #else /* !CONFIG_EDAC_DEBUG */ + +-#define debugf0( ... ) +-#define debugf1( ... ) +-#define debugf2( ... ) +-#define debugf3( ... ) +-#define debugf4( ... ) ++#define debugf0( ... ) do {} while (0) ++#define debugf1( ... ) do {} while (0) ++#define debugf2( ... ) do {} while (0) ++#define debugf3( ... ) do {} while (0) ++#define debugf4( ... ) do {} while (0) + + #endif /* !CONFIG_EDAC_DEBUG */ + +diff -urNp linux-2.6.29/drivers/firmware/dmi_scan.c linux-2.6.29/drivers/firmware/dmi_scan.c +--- linux-2.6.29/drivers/firmware/dmi_scan.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/firmware/dmi_scan.c 2009-03-28 14:26:19.000000000 -0400 +@@ -389,11 +389,6 @@ void __init dmi_scan_machine(void) + } + } + else { +- /* +- * no iounmap() for that ioremap(); it would be a no-op, but +- * it's so early in setup that sucker gets confused into doing +- * what it shouldn't if we actually call it. +- */ + p = dmi_ioremap(0xF0000, 0x10000); + if (p == NULL) + goto error; +diff -urNp linux-2.6.29/drivers/hwmon/fscpos.c linux-2.6.29/drivers/hwmon/fscpos.c +--- linux-2.6.29/drivers/hwmon/fscpos.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/hwmon/fscpos.c 2009-03-28 14:26:19.000000000 -0400 +@@ -240,7 +240,6 @@ static ssize_t set_pwm(struct i2c_client + unsigned long v = simple_strtoul(buf, NULL, 10); + + /* Range: 0..255 */ +- if (v < 0) v = 0; + if (v > 255) v = 255; + + mutex_lock(&data->update_lock); +diff -urNp linux-2.6.29/drivers/hwmon/k8temp.c linux-2.6.29/drivers/hwmon/k8temp.c +--- linux-2.6.29/drivers/hwmon/k8temp.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/hwmon/k8temp.c 2009-03-28 14:26:19.000000000 -0400 +@@ -138,7 +138,7 @@ static DEVICE_ATTR(name, S_IRUGO, show_n + + static struct pci_device_id k8temp_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, +- { 0 }, ++ { 0, 0, 0, 0, 0, 0, 0 }, + }; + + MODULE_DEVICE_TABLE(pci, k8temp_ids); +diff -urNp linux-2.6.29/drivers/hwmon/sis5595.c linux-2.6.29/drivers/hwmon/sis5595.c +--- linux-2.6.29/drivers/hwmon/sis5595.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/hwmon/sis5595.c 2009-03-28 14:26:19.000000000 -0400 +@@ -699,7 +699,7 @@ static struct sis5595_data *sis5595_upda + + static struct pci_device_id sis5595_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, sis5595_pci_ids); +diff -urNp linux-2.6.29/drivers/hwmon/via686a.c linux-2.6.29/drivers/hwmon/via686a.c +--- linux-2.6.29/drivers/hwmon/via686a.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/hwmon/via686a.c 2009-03-28 14:26:19.000000000 -0400 +@@ -769,7 +769,7 @@ static struct via686a_data *via686a_upda + + static struct pci_device_id via686a_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, via686a_pci_ids); +diff -urNp linux-2.6.29/drivers/hwmon/vt8231.c linux-2.6.29/drivers/hwmon/vt8231.c +--- linux-2.6.29/drivers/hwmon/vt8231.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/hwmon/vt8231.c 2009-03-28 14:26:19.000000000 -0400 +@@ -699,7 +699,7 @@ static struct platform_driver vt8231_dri + + static struct pci_device_id vt8231_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231_4) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, vt8231_pci_ids); +diff -urNp linux-2.6.29/drivers/hwmon/w83791d.c linux-2.6.29/drivers/hwmon/w83791d.c +--- linux-2.6.29/drivers/hwmon/w83791d.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/hwmon/w83791d.c 2009-03-28 14:26:19.000000000 -0400 +@@ -330,8 +330,8 @@ static int w83791d_detect(struct i2c_cli + struct i2c_board_info *info); + static int w83791d_remove(struct i2c_client *client); + +-static int w83791d_read(struct i2c_client *client, u8 register); +-static int w83791d_write(struct i2c_client *client, u8 register, u8 value); ++static int w83791d_read(struct i2c_client *client, u8 reg); ++static int w83791d_write(struct i2c_client *client, u8 reg, u8 value); + static struct w83791d_data *w83791d_update_device(struct device *dev); + + #ifdef DEBUG +diff -urNp linux-2.6.29/drivers/i2c/busses/i2c-i801.c linux-2.6.29/drivers/i2c/busses/i2c-i801.c +--- linux-2.6.29/drivers/i2c/busses/i2c-i801.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/i2c/busses/i2c-i801.c 2009-03-28 14:26:19.000000000 -0400 +@@ -577,7 +577,7 @@ static struct pci_device_id i801_ids[] = + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PCH_SMBUS) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, i801_ids); +diff -urNp linux-2.6.29/drivers/i2c/busses/i2c-piix4.c linux-2.6.29/drivers/i2c/busses/i2c-piix4.c +--- linux-2.6.29/drivers/i2c/busses/i2c-piix4.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/i2c/busses/i2c-piix4.c 2009-03-28 14:26:19.000000000 -0400 +@@ -123,7 +123,7 @@ static struct dmi_system_id __devinitdat + .ident = "IBM", + .matches = { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, + }, +- { }, ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, NULL)}, NULL }, + }; + + static int __devinit piix4_setup(struct pci_dev *PIIX4_dev, +@@ -423,7 +423,7 @@ static struct pci_device_id piix4_ids[] + PCI_DEVICE_ID_SERVERWORKS_CSB6) }, + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, + PCI_DEVICE_ID_SERVERWORKS_HT1000SB) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, piix4_ids); +diff -urNp linux-2.6.29/drivers/i2c/busses/i2c-sis630.c linux-2.6.29/drivers/i2c/busses/i2c-sis630.c +--- linux-2.6.29/drivers/i2c/busses/i2c-sis630.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/i2c/busses/i2c-sis630.c 2009-03-28 14:26:19.000000000 -0400 +@@ -471,7 +471,7 @@ static struct i2c_adapter sis630_adapter + static struct pci_device_id sis630_ids[] __devinitdata = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503) }, + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, sis630_ids); +diff -urNp linux-2.6.29/drivers/i2c/busses/i2c-sis96x.c linux-2.6.29/drivers/i2c/busses/i2c-sis96x.c +--- linux-2.6.29/drivers/i2c/busses/i2c-sis96x.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/i2c/busses/i2c-sis96x.c 2009-03-28 14:26:19.000000000 -0400 +@@ -247,7 +247,7 @@ static struct i2c_adapter sis96x_adapter + + static struct pci_device_id sis96x_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_SMBUS) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, sis96x_ids); +diff -urNp linux-2.6.29/drivers/ieee1394/dv1394.c linux-2.6.29/drivers/ieee1394/dv1394.c +--- linux-2.6.29/drivers/ieee1394/dv1394.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/dv1394.c 2009-03-28 14:26:19.000000000 -0400 +@@ -739,7 +739,7 @@ static void frame_prepare(struct video_c + based upon DIF section and sequence + */ + +-static void inline ++static inline void + frame_put_packet (struct frame *f, struct packet *p) + { + int section_type = p->data[0] >> 5; /* section type is in bits 5 - 7 */ +@@ -2181,7 +2181,7 @@ static struct ieee1394_device_id dv1394_ + .specifier_id = AVC_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = AVC_SW_VERSION_ENTRY & 0xffffff + }, +- { } ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, dv1394_id_table); +diff -urNp linux-2.6.29/drivers/ieee1394/eth1394.c linux-2.6.29/drivers/ieee1394/eth1394.c +--- linux-2.6.29/drivers/ieee1394/eth1394.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/eth1394.c 2009-03-28 14:26:19.000000000 -0400 +@@ -445,7 +445,7 @@ static struct ieee1394_device_id eth1394 + .specifier_id = ETHER1394_GASP_SPECIFIER_ID, + .version = ETHER1394_GASP_VERSION, + }, +- {} ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, eth1394_id_table); +diff -urNp linux-2.6.29/drivers/ieee1394/hosts.c linux-2.6.29/drivers/ieee1394/hosts.c +--- linux-2.6.29/drivers/ieee1394/hosts.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/hosts.c 2009-03-28 14:26:19.000000000 -0400 +@@ -78,6 +78,7 @@ static int dummy_isoctl(struct hpsb_iso + } + + static struct hpsb_host_driver dummy_driver = { ++ .name = "dummy", + .transmit_packet = dummy_transmit_packet, + .devctl = dummy_devctl, + .isoctl = dummy_isoctl +diff -urNp linux-2.6.29/drivers/ieee1394/ohci1394.c linux-2.6.29/drivers/ieee1394/ohci1394.c +--- linux-2.6.29/drivers/ieee1394/ohci1394.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/ohci1394.c 2009-03-28 14:26:19.000000000 -0400 +@@ -147,9 +147,9 @@ printk(level "%s: " fmt "\n" , OHCI1394_ + printk(level "%s: fw-host%d: " fmt "\n" , OHCI1394_DRIVER_NAME, ohci->host->id , ## args) + + /* Module Parameters */ +-static int phys_dma = 1; ++static int phys_dma; + module_param(phys_dma, int, 0444); +-MODULE_PARM_DESC(phys_dma, "Enable physical DMA (default = 1)."); ++MODULE_PARM_DESC(phys_dma, "Enable physical DMA (default = 0)."); + + static void dma_trm_tasklet(unsigned long data); + static void dma_trm_reset(struct dma_trm_ctx *d); +@@ -3449,7 +3449,7 @@ static struct pci_device_id ohci1394_pci + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, +- { 0, }, ++ { 0, 0, 0, 0, 0, 0, 0 }, + }; + + MODULE_DEVICE_TABLE(pci, ohci1394_pci_tbl); +diff -urNp linux-2.6.29/drivers/ieee1394/raw1394.c linux-2.6.29/drivers/ieee1394/raw1394.c +--- linux-2.6.29/drivers/ieee1394/raw1394.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/raw1394.c 2009-03-28 14:26:19.000000000 -0400 +@@ -2995,7 +2995,7 @@ static struct ieee1394_device_id raw1394 + .match_flags = IEEE1394_MATCH_SPECIFIER_ID | IEEE1394_MATCH_VERSION, + .specifier_id = CAMERA_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = (CAMERA_SW_VERSION_ENTRY + 2) & 0xffffff}, +- {} ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, raw1394_id_table); +diff -urNp linux-2.6.29/drivers/ieee1394/sbp2.c linux-2.6.29/drivers/ieee1394/sbp2.c +--- linux-2.6.29/drivers/ieee1394/sbp2.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/sbp2.c 2009-03-28 14:26:19.000000000 -0400 +@@ -290,7 +290,7 @@ static struct ieee1394_device_id sbp2_id + .match_flags = IEEE1394_MATCH_SPECIFIER_ID | IEEE1394_MATCH_VERSION, + .specifier_id = SBP2_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = SBP2_SW_VERSION_ENTRY & 0xffffff}, +- {} ++ { 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(ieee1394, sbp2_id_table); + +@@ -2112,7 +2112,7 @@ MODULE_DESCRIPTION("IEEE-1394 SBP-2 prot + MODULE_SUPPORTED_DEVICE(SBP2_DEVICE_NAME); + MODULE_LICENSE("GPL"); + +-static int sbp2_module_init(void) ++static int __init sbp2_module_init(void) + { + int ret; + +diff -urNp linux-2.6.29/drivers/ieee1394/video1394.c linux-2.6.29/drivers/ieee1394/video1394.c +--- linux-2.6.29/drivers/ieee1394/video1394.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/ieee1394/video1394.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1310,7 +1310,7 @@ static struct ieee1394_device_id video13 + .specifier_id = CAMERA_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = (CAMERA_SW_VERSION_ENTRY + 2) & 0xffffff + }, +- { } ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, video1394_id_table); +diff -urNp linux-2.6.29/drivers/input/keyboard/atkbd.c linux-2.6.29/drivers/input/keyboard/atkbd.c +--- linux-2.6.29/drivers/input/keyboard/atkbd.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/keyboard/atkbd.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1181,7 +1181,7 @@ static struct serio_device_id atkbd_seri + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, +- { 0 } ++ { 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(serio, atkbd_serio_ids); +diff -urNp linux-2.6.29/drivers/input/mouse/lifebook.c linux-2.6.29/drivers/input/mouse/lifebook.c +--- linux-2.6.29/drivers/input/mouse/lifebook.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/mouse/lifebook.c 2009-03-28 14:26:19.000000000 -0400 +@@ -110,7 +110,7 @@ static const struct dmi_system_id lifebo + DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook B142"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL} + }; + + static psmouse_ret_t lifebook_process_byte(struct psmouse *psmouse) +diff -urNp linux-2.6.29/drivers/input/mouse/psmouse-base.c linux-2.6.29/drivers/input/mouse/psmouse-base.c +--- linux-2.6.29/drivers/input/mouse/psmouse-base.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/mouse/psmouse-base.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1378,7 +1378,7 @@ static struct serio_device_id psmouse_se + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, +- { 0 } ++ { 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(serio, psmouse_serio_ids); +diff -urNp linux-2.6.29/drivers/input/mouse/synaptics.c linux-2.6.29/drivers/input/mouse/synaptics.c +--- linux-2.6.29/drivers/input/mouse/synaptics.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/mouse/synaptics.c 2009-03-28 14:26:19.000000000 -0400 +@@ -412,7 +412,7 @@ static void synaptics_process_packet(str + break; + case 2: + if (SYN_MODEL_PEN(priv->model_id)) +- ; /* Nothing, treat a pen as a single finger */ ++ break; /* Nothing, treat a pen as a single finger */ + break; + case 4 ... 15: + if (SYN_CAP_PALMDETECT(priv->capabilities)) +@@ -625,7 +625,7 @@ static const struct dmi_system_id toshib + DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE M300"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + #endif + +diff -urNp linux-2.6.29/drivers/input/mousedev.c linux-2.6.29/drivers/input/mousedev.c +--- linux-2.6.29/drivers/input/mousedev.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/mousedev.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1062,7 +1062,7 @@ static struct input_handler mousedev_han + + #ifdef CONFIG_INPUT_MOUSEDEV_PSAUX + static struct miscdevice psaux_mouse = { +- PSMOUSE_MINOR, "psaux", &mousedev_fops ++ PSMOUSE_MINOR, "psaux", &mousedev_fops, {NULL, NULL}, NULL, NULL + }; + static int psaux_registered; + #endif +diff -urNp linux-2.6.29/drivers/input/serio/i8042-x86ia64io.h linux-2.6.29/drivers/input/serio/i8042-x86ia64io.h +--- linux-2.6.29/drivers/input/serio/i8042-x86ia64io.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/serio/i8042-x86ia64io.h 2009-03-28 14:26:19.000000000 -0400 +@@ -151,7 +151,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_PRODUCT_VERSION, "01"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + /* +@@ -366,7 +366,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro1510"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + #ifdef CONFIG_PNP +@@ -378,7 +378,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + #endif + +@@ -445,7 +445,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 4280"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + #endif /* CONFIG_X86 */ +diff -urNp linux-2.6.29/drivers/input/serio/serio_raw.c linux-2.6.29/drivers/input/serio/serio_raw.c +--- linux-2.6.29/drivers/input/serio/serio_raw.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/input/serio/serio_raw.c 2009-03-28 14:26:19.000000000 -0400 +@@ -378,7 +378,7 @@ static struct serio_device_id serio_raw_ + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, +- { 0 } ++ { 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(serio, serio_raw_serio_ids); +diff -urNp linux-2.6.29/drivers/lguest/core.c linux-2.6.29/drivers/lguest/core.c +--- linux-2.6.29/drivers/lguest/core.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/lguest/core.c 2009-03-28 14:26:19.000000000 -0400 +@@ -80,9 +80,17 @@ static __init int map_switcher(void) + * (SWITCHER_ADDR). We might not get it in theory, but in practice + * it's worked so far. The end address needs +1 because __get_vm_area + * allocates an extra guard page, so we need space for that. */ ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, ++ VM_ALLOC | VM_KERNEXEC, SWITCHER_ADDR, SWITCHER_ADDR ++ + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); ++#else + switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, + VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR + + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); ++#endif ++ + if (!switcher_vma) { + err = -ENOMEM; + printk("lguest: could not map switcher pages high\n"); +diff -urNp linux-2.6.29/drivers/md/bitmap.c linux-2.6.29/drivers/md/bitmap.c +--- linux-2.6.29/drivers/md/bitmap.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/md/bitmap.c 2009-03-28 14:26:19.000000000 -0400 +@@ -57,7 +57,7 @@ + # if DEBUG > 0 + # define PRINTK(x...) printk(KERN_DEBUG x) + # else +-# define PRINTK(x...) ++# define PRINTK(x...) do {} while (0) + # endif + #endif + +diff -urNp linux-2.6.29/drivers/md/md.c linux-2.6.29/drivers/md/md.c +--- linux-2.6.29/drivers/md/md.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/md/md.c 2009-03-28 14:26:19.000000000 -0400 +@@ -5640,7 +5640,7 @@ static int md_seq_show(struct seq_file * + chunk_kb ? "KB" : "B"); + if (bitmap->file) { + seq_printf(seq, ", file: "); +- seq_path(seq, &bitmap->file->f_path, " \t\n"); ++ seq_path(seq, &bitmap->file->f_path, " \t\n\\"); + } + + seq_printf(seq, "\n"); +diff -urNp linux-2.6.29/drivers/mtd/devices/doc2000.c linux-2.6.29/drivers/mtd/devices/doc2000.c +--- linux-2.6.29/drivers/mtd/devices/doc2000.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/mtd/devices/doc2000.c 2009-03-28 14:26:19.000000000 -0400 +@@ -777,7 +777,7 @@ static int doc_write(struct mtd_info *mt + + /* The ECC will not be calculated correctly if less than 512 is written */ + /* DBB- +- if (len != 0x200 && eccbuf) ++ if (len != 0x200) + printk(KERN_WARNING + "ECC needs a full sector write (adr: %lx size %lx)\n", + (long) to, (long) len); +diff -urNp linux-2.6.29/drivers/mtd/devices/doc2001.c linux-2.6.29/drivers/mtd/devices/doc2001.c +--- linux-2.6.29/drivers/mtd/devices/doc2001.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/mtd/devices/doc2001.c 2009-03-28 14:26:19.000000000 -0400 +@@ -396,6 +396,8 @@ static int doc_read (struct mtd_info *mt + /* Don't allow read past end of device */ + if (from >= this->totlen) + return -EINVAL; ++ if (!len) ++ return -EINVAL; + + /* Don't allow a single read to cross a 512-byte block boundary */ + if (from + len > ((from | 0x1ff) + 1)) +diff -urNp linux-2.6.29/drivers/mtd/ubi/build.c linux-2.6.29/drivers/mtd/ubi/build.c +--- linux-2.6.29/drivers/mtd/ubi/build.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/mtd/ubi/build.c 2009-03-28 14:26:19.000000000 -0400 +@@ -1112,7 +1112,7 @@ static int __init bytes_str_to_int(const + unsigned long result; + + result = simple_strtoul(str, &endp, 0); +- if (str == endp || result < 0) { ++ if (str == endp) { + printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", + str); + return -EINVAL; +diff -urNp linux-2.6.29/drivers/net/irda/vlsi_ir.c linux-2.6.29/drivers/net/irda/vlsi_ir.c +--- linux-2.6.29/drivers/net/irda/vlsi_ir.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/net/irda/vlsi_ir.c 2009-03-28 14:26:19.000000000 -0400 +@@ -906,13 +906,12 @@ static int vlsi_hard_start_xmit(struct s + /* no race - tx-ring already empty */ + vlsi_set_baud(idev, iobase); + netif_wake_queue(ndev); +- } +- else +- ; ++ } else { + /* keep the speed change pending like it would + * for any len>0 packet. tx completion interrupt + * will apply it when the tx ring becomes empty. + */ ++ } + spin_unlock_irqrestore(&idev->lock, flags); + dev_kfree_skb_any(skb); + return 0; +diff -urNp linux-2.6.29/drivers/net/pcnet32.c linux-2.6.29/drivers/net/pcnet32.c +--- linux-2.6.29/drivers/net/pcnet32.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/net/pcnet32.c 2009-03-28 14:26:19.000000000 -0400 +@@ -78,7 +78,7 @@ static int cards_found; + /* + * VLB I/O addresses + */ +-static unsigned int pcnet32_portlist[] __initdata = ++static unsigned int pcnet32_portlist[] __devinitdata = + { 0x300, 0x320, 0x340, 0x360, 0 }; + + static int pcnet32_debug = 0; +diff -urNp linux-2.6.29/drivers/net/tg3.h linux-2.6.29/drivers/net/tg3.h +--- linux-2.6.29/drivers/net/tg3.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/net/tg3.h 2009-03-28 14:26:20.000000000 -0400 +@@ -89,6 +89,7 @@ + #define CHIPREV_ID_5750_A0 0x4000 + #define CHIPREV_ID_5750_A1 0x4001 + #define CHIPREV_ID_5750_A3 0x4003 ++#define CHIPREV_ID_5750_C1 0x4201 + #define CHIPREV_ID_5750_C2 0x4202 + #define CHIPREV_ID_5752_A0_HW 0x5000 + #define CHIPREV_ID_5752_A0 0x6000 +diff -urNp linux-2.6.29/drivers/pci/hotplug/cpqphp_nvram.c linux-2.6.29/drivers/pci/hotplug/cpqphp_nvram.c +--- linux-2.6.29/drivers/pci/hotplug/cpqphp_nvram.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pci/hotplug/cpqphp_nvram.c 2009-03-28 14:26:20.000000000 -0400 +@@ -425,9 +425,13 @@ static u32 store_HRT (void __iomem *rom_ + + void compaq_nvram_init (void __iomem *rom_start) + { ++ ++#ifndef CONFIG_PAX_KERNEXEC + if (rom_start) { + compaq_int15_entry_point = (rom_start + ROM_INT15_PHY_ADDR - ROM_PHY_ADDR); + } ++#endif ++ + dbg("int15 entry = %p\n", compaq_int15_entry_point); + + /* initialize our int15 lock */ +diff -urNp linux-2.6.29/drivers/pci/pcie/aer/aerdrv.c linux-2.6.29/drivers/pci/pcie/aer/aerdrv.c +--- linux-2.6.29/drivers/pci/pcie/aer/aerdrv.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pci/pcie/aer/aerdrv.c 2009-03-28 14:26:20.000000000 -0400 +@@ -59,7 +59,7 @@ static struct pcie_port_service_id aer_i + .port_type = PCIE_RC_PORT, + .service_type = PCIE_PORT_SERVICE_AER, + }, +- { /* end: all zeroes */ } ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + static struct pci_error_handlers aer_error_handlers = { +diff -urNp linux-2.6.29/drivers/pci/pcie/aer/aerdrv_core.c linux-2.6.29/drivers/pci/pcie/aer/aerdrv_core.c +--- linux-2.6.29/drivers/pci/pcie/aer/aerdrv_core.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pci/pcie/aer/aerdrv_core.c 2009-03-28 14:26:20.000000000 -0400 +@@ -670,7 +670,7 @@ static void aer_isr_one_error(struct pci + struct aer_err_source *e_src) + { + struct device *s_device; +- struct aer_err_info e_info = {0, 0, 0,}; ++ struct aer_err_info e_info = {0, 0, 0, {0, 0, 0, 0}}; + int i; + u16 id; + +diff -urNp linux-2.6.29/drivers/pci/pcie/portdrv_pci.c linux-2.6.29/drivers/pci/pcie/portdrv_pci.c +--- linux-2.6.29/drivers/pci/pcie/portdrv_pci.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pci/pcie/portdrv_pci.c 2009-03-28 14:26:20.000000000 -0400 +@@ -260,7 +260,7 @@ static void pcie_portdrv_err_resume(stru + static const struct pci_device_id port_pci_ids[] = { { + /* handle any PCI-Express port */ + PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0), +- }, { /* end: all zeroes */ } ++ }, { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, port_pci_ids); + +diff -urNp linux-2.6.29/drivers/pci/proc.c linux-2.6.29/drivers/pci/proc.c +--- linux-2.6.29/drivers/pci/proc.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pci/proc.c 2009-03-28 14:26:20.000000000 -0400 +@@ -480,7 +480,16 @@ static const struct file_operations proc + static int __init pci_proc_init(void) + { + struct pci_dev *dev = NULL; ++ ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_bus_pci_dir = proc_mkdir_mode("bus/pci", S_IRUSR | S_IXUSR, NULL); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_bus_pci_dir = proc_mkdir_mode("bus/pci", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); ++#endif ++#else + proc_bus_pci_dir = proc_mkdir("bus/pci", NULL); ++#endif + proc_create("devices", 0, proc_bus_pci_dir, + &proc_bus_pci_dev_operations); + proc_initialized = 1; +diff -urNp linux-2.6.29/drivers/pcmcia/ti113x.h linux-2.6.29/drivers/pcmcia/ti113x.h +--- linux-2.6.29/drivers/pcmcia/ti113x.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pcmcia/ti113x.h 2009-03-28 14:26:20.000000000 -0400 +@@ -903,7 +903,7 @@ static struct pci_device_id ene_tune_tbl + DEVID(PCI_VENDOR_ID_MOTOROLA, 0x3410, 0xECC0, PCI_ANY_ID, + ENE_TEST_C9_TLTENABLE | ENE_TEST_C9_PFENABLE, ENE_TEST_C9_TLTENABLE), + +- {} ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + static void ene_tune_bridge(struct pcmcia_socket *sock, struct pci_bus *bus) +diff -urNp linux-2.6.29/drivers/pcmcia/yenta_socket.c linux-2.6.29/drivers/pcmcia/yenta_socket.c +--- linux-2.6.29/drivers/pcmcia/yenta_socket.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pcmcia/yenta_socket.c 2009-03-28 14:26:20.000000000 -0400 +@@ -1366,7 +1366,7 @@ static struct pci_device_id yenta_table + + /* match any cardbus bridge */ + CB_ID(PCI_ANY_ID, PCI_ANY_ID, DEFAULT), +- { /* all zeroes */ } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, yenta_table); + +diff -urNp linux-2.6.29/drivers/pnp/pnpbios/bioscalls.c linux-2.6.29/drivers/pnp/pnpbios/bioscalls.c +--- linux-2.6.29/drivers/pnp/pnpbios/bioscalls.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pnp/pnpbios/bioscalls.c 2009-03-28 14:26:20.000000000 -0400 +@@ -60,7 +60,7 @@ set_base(gdt[(selname) >> 3], (u32)(addr + set_limit(gdt[(selname) >> 3], size); \ + } while(0) + +-static struct desc_struct bad_bios_desc; ++static struct desc_struct bad_bios_desc __read_only; + + /* + * At some point we want to use this stack frame pointer to unwind +@@ -87,6 +87,10 @@ static inline u16 call_pnp_bios(u16 func + struct desc_struct save_desc_40; + int cpu; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* + * PnP BIOSes are generally not terribly re-entrant. + * Also, don't rely on them to save everything correctly. +@@ -96,8 +100,17 @@ static inline u16 call_pnp_bios(u16 func + + cpu = get_cpu(); + save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* On some boxes IRQ's during PnP BIOS calls are deadly. */ + spin_lock_irqsave(&pnp_bios_lock, flags); + +@@ -134,7 +147,16 @@ static inline u16 call_pnp_bios(u16 func + :"memory"); + spin_unlock_irqrestore(&pnp_bios_lock, flags); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + put_cpu(); + + /* If we get here and this is set then the PnP BIOS faulted on us. */ +@@ -468,16 +490,24 @@ int pnp_bios_read_escd(char *data, u32 n + return status; + } + +-void pnpbios_calls_init(union pnp_bios_install_struct *header) ++void __init pnpbios_calls_init(union pnp_bios_install_struct *header) + { + int i; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + spin_lock_init(&pnp_bios_lock); + pnp_bios_callpoint.offset = header->fields.pm16offset; + pnp_bios_callpoint.segment = PNP_CS16; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + bad_bios_desc.a = 0; +- bad_bios_desc.b = 0x00409200; ++ bad_bios_desc.b = 0x00409300; + + set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); + _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); +@@ -491,4 +521,9 @@ void pnpbios_calls_init(union pnp_bios_i + set_base(gdt[GDT_ENTRY_PNPBIOS_DS], + __va(header->fields.pm16dseg)); + } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } +diff -urNp linux-2.6.29/drivers/pnp/quirks.c linux-2.6.29/drivers/pnp/quirks.c +--- linux-2.6.29/drivers/pnp/quirks.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pnp/quirks.c 2009-03-28 14:26:20.000000000 -0400 +@@ -327,7 +327,7 @@ static struct pnp_fixup pnp_fixups[] = { + /* PnP resources that might overlap PCI BARs */ + {"PNP0c01", quirk_system_pci_resources}, + {"PNP0c02", quirk_system_pci_resources}, +- {""} ++ {"", NULL} + }; + + void pnp_fixup_device(struct pnp_dev *dev) +diff -urNp linux-2.6.29/drivers/pnp/resource.c linux-2.6.29/drivers/pnp/resource.c +--- linux-2.6.29/drivers/pnp/resource.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/pnp/resource.c 2009-03-28 14:26:20.000000000 -0400 +@@ -355,7 +355,7 @@ int pnp_check_irq(struct pnp_dev *dev, s + return 1; + + /* check if the resource is valid */ +- if (*irq < 0 || *irq > 15) ++ if (*irq > 15) + return 0; + + /* check if the resource is reserved */ +@@ -419,7 +419,7 @@ int pnp_check_dma(struct pnp_dev *dev, s + return 1; + + /* check if the resource is valid */ +- if (*dma < 0 || *dma == 4 || *dma > 7) ++ if (*dma == 4 || *dma > 7) + return 0; + + /* check if the resource is reserved */ +diff -urNp linux-2.6.29/drivers/scsi/scsi_logging.h linux-2.6.29/drivers/scsi/scsi_logging.h +--- linux-2.6.29/drivers/scsi/scsi_logging.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/scsi/scsi_logging.h 2009-03-28 14:26:20.000000000 -0400 +@@ -51,7 +51,7 @@ do { \ + } while (0); \ + } while (0) + #else +-#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) ++#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) do {} while (0) + #endif /* CONFIG_SCSI_LOGGING */ + + /* +diff -urNp linux-2.6.29/drivers/serial/8250_pci.c linux-2.6.29/drivers/serial/8250_pci.c +--- linux-2.6.29/drivers/serial/8250_pci.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/serial/8250_pci.c 2009-03-28 14:26:20.000000000 -0400 +@@ -3162,7 +3162,7 @@ static struct pci_device_id serial_pci_t + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, + 0xffff00, pbn_default }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + static struct pci_driver serial_pci_driver = { +diff -urNp linux-2.6.29/drivers/usb/class/cdc-acm.c linux-2.6.29/drivers/usb/class/cdc-acm.c +--- linux-2.6.29/drivers/usb/class/cdc-acm.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/class/cdc-acm.c 2009-03-28 14:26:20.000000000 -0400 +@@ -1401,7 +1401,7 @@ static struct usb_device_id acm_ids[] = + USB_CDC_ACM_PROTO_AT_CDMA) }, + + /* NOTE: COMM/ACM/0xff is likely MSFT RNDIS ... NOT a modem!! */ +- { } ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (usb, acm_ids); +diff -urNp linux-2.6.29/drivers/usb/class/usblp.c linux-2.6.29/drivers/usb/class/usblp.c +--- linux-2.6.29/drivers/usb/class/usblp.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/class/usblp.c 2009-03-28 14:26:20.000000000 -0400 +@@ -228,7 +228,7 @@ static const struct quirk_printer_struct + { 0x0482, 0x0010, USBLP_QUIRK_BIDIR }, /* Kyocera Mita FS 820, by zut */ + { 0x04f9, 0x000d, USBLP_QUIRK_BIDIR }, /* Brother Industries, Ltd HL-1440 Laser Printer */ + { 0x04b8, 0x0202, USBLP_QUIRK_BAD_CLASS }, /* Seiko Epson Receipt Printer M129C */ +- { 0, 0 } ++ { 0, 0, 0 } + }; + + static int usblp_wwait(struct usblp *usblp, int nonblock); +@@ -1403,7 +1403,7 @@ static struct usb_device_id usblp_ids [] + { USB_INTERFACE_INFO(7, 1, 2) }, + { USB_INTERFACE_INFO(7, 1, 3) }, + { USB_DEVICE(0x04b8, 0x0202) }, /* Seiko Epson Receipt Printer M129C */ +- { } /* Terminating entry */ ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* Terminating entry */ + }; + + MODULE_DEVICE_TABLE (usb, usblp_ids); +diff -urNp linux-2.6.29/drivers/usb/core/hub.c linux-2.6.29/drivers/usb/core/hub.c +--- linux-2.6.29/drivers/usb/core/hub.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/core/hub.c 2009-03-28 14:26:20.000000000 -0400 +@@ -3193,7 +3193,7 @@ static struct usb_device_id hub_id_table + .bDeviceClass = USB_CLASS_HUB}, + { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, + .bInterfaceClass = USB_CLASS_HUB}, +- { } /* Terminating entry */ ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* Terminating entry */ + }; + + MODULE_DEVICE_TABLE (usb, hub_id_table); +diff -urNp linux-2.6.29/drivers/usb/host/ehci-pci.c linux-2.6.29/drivers/usb/host/ehci-pci.c +--- linux-2.6.29/drivers/usb/host/ehci-pci.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/host/ehci-pci.c 2009-03-28 14:26:20.000000000 -0400 +@@ -418,7 +418,7 @@ static const struct pci_device_id pci_id + PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_EHCI, ~0), + .driver_data = (unsigned long) &ehci_pci_hc_driver, + }, +- { /* end: all zeroes */ } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, pci_ids); + +diff -urNp linux-2.6.29/drivers/usb/host/uhci-hcd.c linux-2.6.29/drivers/usb/host/uhci-hcd.c +--- linux-2.6.29/drivers/usb/host/uhci-hcd.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/host/uhci-hcd.c 2009-03-28 14:26:20.000000000 -0400 +@@ -927,7 +927,7 @@ static const struct pci_device_id uhci_p + /* handle any USB UHCI controller */ + PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_UHCI, ~0), + .driver_data = (unsigned long) &uhci_driver, +- }, { /* end: all zeroes */ } ++ }, { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, uhci_pci_ids); +diff -urNp linux-2.6.29/drivers/usb/storage/debug.h linux-2.6.29/drivers/usb/storage/debug.h +--- linux-2.6.29/drivers/usb/storage/debug.h 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/storage/debug.h 2009-03-28 14:26:20.000000000 -0400 +@@ -54,9 +54,9 @@ void usb_stor_show_sense( unsigned char + #define US_DEBUGPX(x...) printk( x ) + #define US_DEBUG(x) x + #else +-#define US_DEBUGP(x...) +-#define US_DEBUGPX(x...) +-#define US_DEBUG(x) ++#define US_DEBUGP(x...) do {} while (0) ++#define US_DEBUGPX(x...) do {} while (0) ++#define US_DEBUG(x) do {} while (0) + #endif + + #endif +diff -urNp linux-2.6.29/drivers/usb/storage/usb.c linux-2.6.29/drivers/usb/storage/usb.c +--- linux-2.6.29/drivers/usb/storage/usb.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/usb/storage/usb.c 2009-03-28 14:26:20.000000000 -0400 +@@ -141,7 +141,7 @@ static struct usb_device_id storage_usb_ + #undef COMPLIANT_DEV + #undef USUAL_DEV + /* Terminating entry */ +- { } ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (usb, storage_usb_ids); +@@ -184,7 +184,7 @@ static struct us_unusual_dev us_unusual_ + # undef USUAL_DEV + + /* Terminating entry */ +- { NULL } ++ { NULL, NULL, 0, 0, NULL } + }; + + +diff -urNp linux-2.6.29/drivers/uwb/wlp/messages.c linux-2.6.29/drivers/uwb/wlp/messages.c +--- linux-2.6.29/drivers/uwb/wlp/messages.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/uwb/wlp/messages.c 2009-03-28 14:26:20.000000000 -0400 +@@ -903,7 +903,7 @@ int wlp_parse_f0(struct wlp *wlp, struct + size_t len = skb->len; + size_t used; + ssize_t result; +- struct wlp_nonce enonce, rnonce; ++ struct wlp_nonce enonce = {{0}}, rnonce = {{0}}; + enum wlp_assc_error assc_err; + char enonce_buf[WLP_WSS_NONCE_STRSIZE]; + char rnonce_buf[WLP_WSS_NONCE_STRSIZE]; +diff -urNp linux-2.6.29/drivers/video/fbmem.c linux-2.6.29/drivers/video/fbmem.c +--- linux-2.6.29/drivers/video/fbmem.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/video/fbmem.c 2009-03-28 14:26:20.000000000 -0400 +@@ -393,7 +393,7 @@ static void fb_do_show_logo(struct fb_in + image->dx += image->width + 8; + } + } else if (rotate == FB_ROTATE_UD) { +- for (x = 0; x < num && image->dx >= 0; x++) { ++ for (x = 0; x < num && (__s32)image->dx >= 0; x++) { + info->fbops->fb_imageblit(info, image); + image->dx -= image->width + 8; + } +@@ -405,7 +405,7 @@ static void fb_do_show_logo(struct fb_in + image->dy += image->height + 8; + } + } else if (rotate == FB_ROTATE_CCW) { +- for (x = 0; x < num && image->dy >= 0; x++) { ++ for (x = 0; x < num && (__s32)image->dy >= 0; x++) { + info->fbops->fb_imageblit(info, image); + image->dy -= image->height + 8; + } +@@ -1100,7 +1100,7 @@ static long do_fb_ioctl(struct fb_info * + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; +- if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) ++ if (con2fb.framebuffer >= FB_MAX) + return -EINVAL; + if (!registered_fb[con2fb.framebuffer]) + request_module("fb%d", con2fb.framebuffer); +diff -urNp linux-2.6.29/drivers/video/fbmon.c linux-2.6.29/drivers/video/fbmon.c +--- linux-2.6.29/drivers/video/fbmon.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/video/fbmon.c 2009-03-28 14:26:20.000000000 -0400 +@@ -45,7 +45,7 @@ + #ifdef DEBUG + #define DPRINTK(fmt, args...) printk(fmt,## args) + #else +-#define DPRINTK(fmt, args...) ++#define DPRINTK(fmt, args...) do {} while (0) + #endif + + #define FBMON_FIX_HEADER 1 +diff -urNp linux-2.6.29/drivers/video/i810/i810_accel.c linux-2.6.29/drivers/video/i810/i810_accel.c +--- linux-2.6.29/drivers/video/i810/i810_accel.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/video/i810/i810_accel.c 2009-03-28 14:26:20.000000000 -0400 +@@ -73,6 +73,7 @@ static inline int wait_for_space(struct + } + } + printk("ringbuffer lockup!!!\n"); ++ printk("head:%u tail:%u iring.size:%u space:%u\n", head, tail, par->iring.size, space); + i810_report_error(mmio); + par->dev_flags |= LOCKUP; + info->pixmap.scan_align = 1; +diff -urNp linux-2.6.29/drivers/video/i810/i810_main.c linux-2.6.29/drivers/video/i810/i810_main.c +--- linux-2.6.29/drivers/video/i810/i810_main.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/video/i810/i810_main.c 2009-03-28 14:26:20.000000000 -0400 +@@ -120,7 +120,7 @@ static struct pci_device_id i810fb_pci_t + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 }, +- { 0 }, ++ { 0, 0, 0, 0, 0, 0, 0 }, + }; + + static struct pci_driver i810fb_driver = { +diff -urNp linux-2.6.29/drivers/video/modedb.c linux-2.6.29/drivers/video/modedb.c +--- linux-2.6.29/drivers/video/modedb.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/video/modedb.c 2009-03-28 14:26:20.000000000 -0400 +@@ -38,232 +38,232 @@ static const struct fb_videomode modedb[ + { + /* 640x400 @ 70 Hz, 31.5 kHz hsync */ + NULL, 70, 640, 400, 39721, 40, 24, 39, 9, 96, 2, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 60 Hz, 31.5 kHz hsync */ + NULL, 60, 640, 480, 39721, 40, 24, 32, 11, 96, 2, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 56 Hz, 35.15 kHz hsync */ + NULL, 56, 800, 600, 27777, 128, 24, 22, 1, 72, 2, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 87 Hz interlaced, 35.5 kHz hsync */ + NULL, 87, 1024, 768, 22271, 56, 24, 33, 8, 160, 8, +- 0, FB_VMODE_INTERLACED ++ 0, FB_VMODE_INTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x400 @ 85 Hz, 37.86 kHz hsync */ + NULL, 85, 640, 400, 31746, 96, 32, 41, 1, 64, 3, +- FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 72 Hz, 36.5 kHz hsync */ + NULL, 72, 640, 480, 31746, 144, 40, 30, 8, 40, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 75 Hz, 37.50 kHz hsync */ + NULL, 75, 640, 480, 31746, 120, 16, 16, 1, 64, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 60 Hz, 37.8 kHz hsync */ + NULL, 60, 800, 600, 25000, 88, 40, 23, 1, 128, 4, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 85 Hz, 43.27 kHz hsync */ + NULL, 85, 640, 480, 27777, 80, 56, 25, 1, 56, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 89 Hz interlaced, 44 kHz hsync */ + NULL, 89, 1152, 864, 15384, 96, 16, 110, 1, 216, 10, +- 0, FB_VMODE_INTERLACED ++ 0, FB_VMODE_INTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 72 Hz, 48.0 kHz hsync */ + NULL, 72, 800, 600, 20000, 64, 56, 23, 37, 120, 6, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 60 Hz, 48.4 kHz hsync */ + NULL, 60, 1024, 768, 15384, 168, 8, 29, 3, 144, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 100 Hz, 53.01 kHz hsync */ + NULL, 100, 640, 480, 21834, 96, 32, 36, 8, 96, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 60 Hz, 53.5 kHz hsync */ + NULL, 60, 1152, 864, 11123, 208, 64, 16, 4, 256, 8, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 85 Hz, 55.84 kHz hsync */ + NULL, 85, 800, 600, 16460, 160, 64, 36, 16, 64, 5, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 70 Hz, 56.5 kHz hsync */ + NULL, 70, 1024, 768, 13333, 144, 24, 29, 3, 136, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 87 Hz interlaced, 51 kHz hsync */ + NULL, 87, 1280, 1024, 12500, 56, 16, 128, 1, 216, 12, +- 0, FB_VMODE_INTERLACED ++ 0, FB_VMODE_INTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 100 Hz, 64.02 kHz hsync */ + NULL, 100, 800, 600, 14357, 160, 64, 30, 4, 64, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 76 Hz, 62.5 kHz hsync */ + NULL, 76, 1024, 768, 11764, 208, 8, 36, 16, 120, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 70 Hz, 62.4 kHz hsync */ + NULL, 70, 1152, 864, 10869, 106, 56, 20, 1, 160, 10, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 61 Hz, 64.2 kHz hsync */ + NULL, 61, 1280, 1024, 9090, 200, 48, 26, 1, 184, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1400x1050 @ 60Hz, 63.9 kHz hsync */ + NULL, 60, 1400, 1050, 9259, 136, 40, 13, 1, 112, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1400x1050 @ 75,107 Hz, 82,392 kHz +hsync +vsync*/ + NULL, 75, 1400, 1050, 7190, 120, 56, 23, 10, 112, 13, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1400x1050 @ 60 Hz, ? kHz +hsync +vsync*/ + NULL, 60, 1400, 1050, 9259, 128, 40, 12, 0, 112, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 85 Hz, 70.24 kHz hsync */ + NULL, 85, 1024, 768, 10111, 192, 32, 34, 14, 160, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 78 Hz, 70.8 kHz hsync */ + NULL, 78, 1152, 864, 9090, 228, 88, 32, 0, 84, 12, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 70 Hz, 74.59 kHz hsync */ + NULL, 70, 1280, 1024, 7905, 224, 32, 28, 8, 160, 8, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 60Hz, 75.00 kHz hsync */ + NULL, 60, 1600, 1200, 6172, 304, 64, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 84 Hz, 76.0 kHz hsync */ + NULL, 84, 1152, 864, 7407, 184, 312, 32, 0, 128, 12, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 74 Hz, 78.85 kHz hsync */ + NULL, 74, 1280, 1024, 7407, 256, 32, 34, 3, 144, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 100Hz, 80.21 kHz hsync */ + NULL, 100, 1024, 768, 8658, 192, 32, 21, 3, 192, 10, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 76 Hz, 81.13 kHz hsync */ + NULL, 76, 1280, 1024, 7407, 248, 32, 34, 3, 104, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 70 Hz, 87.50 kHz hsync */ + NULL, 70, 1600, 1200, 5291, 304, 64, 46, 1, 192, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 100 Hz, 89.62 kHz hsync */ + NULL, 100, 1152, 864, 7264, 224, 32, 17, 2, 128, 19, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 85 Hz, 91.15 kHz hsync */ + NULL, 85, 1280, 1024, 6349, 224, 64, 44, 1, 160, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 75 Hz, 93.75 kHz hsync */ + NULL, 75, 1600, 1200, 4938, 304, 64, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1680x1050 @ 60 Hz, 65.191 kHz hsync */ + NULL, 60, 1680, 1050, 6848, 280, 104, 30, 3, 176, 6, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 85 Hz, 105.77 kHz hsync */ + NULL, 85, 1600, 1200, 4545, 272, 16, 37, 4, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 100 Hz, 107.16 kHz hsync */ + NULL, 100, 1280, 1024, 5502, 256, 32, 26, 7, 128, 15, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1800x1440 @ 64Hz, 96.15 kHz hsync */ + NULL, 64, 1800, 1440, 4347, 304, 96, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1800x1440 @ 70Hz, 104.52 kHz hsync */ + NULL, 70, 1800, 1440, 4000, 304, 96, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 512x384 @ 78 Hz, 31.50 kHz hsync */ + NULL, 78, 512, 384, 49603, 48, 16, 16, 1, 64, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 512x384 @ 85 Hz, 34.38 kHz hsync */ + NULL, 85, 512, 384, 45454, 48, 16, 16, 1, 64, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 320x200 @ 70 Hz, 31.5 kHz hsync, 8:5 aspect ratio */ + NULL, 70, 320, 200, 79440, 16, 16, 20, 4, 48, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 320x240 @ 60 Hz, 31.5 kHz hsync, 4:3 aspect ratio */ + NULL, 60, 320, 240, 79440, 16, 16, 16, 5, 48, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 320x240 @ 72 Hz, 36.5 kHz hsync */ + NULL, 72, 320, 240, 63492, 16, 16, 16, 4, 48, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 400x300 @ 56 Hz, 35.2 kHz hsync, 4:3 aspect ratio */ + NULL, 56, 400, 300, 55555, 64, 16, 10, 1, 32, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 400x300 @ 60 Hz, 37.8 kHz hsync */ + NULL, 60, 400, 300, 50000, 48, 16, 11, 1, 64, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 400x300 @ 72 Hz, 48.0 kHz hsync */ + NULL, 72, 400, 300, 40000, 32, 24, 11, 19, 64, 3, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 56 Hz, 35.2 kHz hsync, 8:5 aspect ratio */ + NULL, 56, 480, 300, 46176, 80, 16, 10, 1, 40, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 60 Hz, 37.8 kHz hsync */ + NULL, 60, 480, 300, 41858, 56, 16, 11, 1, 80, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 63 Hz, 39.6 kHz hsync */ + NULL, 63, 480, 300, 40000, 56, 16, 11, 1, 80, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 72 Hz, 48.0 kHz hsync */ + NULL, 72, 480, 300, 33386, 40, 24, 11, 19, 80, 3, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 1920x1200 @ 60 Hz, 74.5 Khz hsync */ + NULL, 60, 1920, 1200, 5177, 128, 336, 1, 38, 208, 3, + FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, +- FB_VMODE_NONINTERLACED ++ FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x768, 60 Hz, PowerBook G4 Titanium I and II */ + NULL, 60, 1152, 768, 14047, 158, 26, 29, 3, 136, 6, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1366x768, 60 Hz, 47.403 kHz hsync, WXGA 16:9 aspect ratio */ + NULL, 60, 1366, 768, 13806, 120, 10, 14, 3, 32, 5, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x800, 60 Hz, 47.403 kHz hsync, WXGA 16:10 aspect ratio */ + NULL, 60, 1280, 800, 12048, 200, 64, 24, 1, 136, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, + }; + +diff -urNp linux-2.6.29/drivers/video/uvesafb.c linux-2.6.29/drivers/video/uvesafb.c +--- linux-2.6.29/drivers/video/uvesafb.c 2009-03-23 19:12:14.000000000 -0400 ++++ linux-2.6.29/drivers/video/uvesafb.c 2009-03-28 14:26:20.000000000 -0400 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include