--- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100 +++ linux-2.6.33/scripts/mod/modpost.c 2010-03-07 14:26:47.242168558 +0100 @@ -15,7 +15,8 @@ #include #include #include "modpost.h" -#include "../../include/generated/autoconf.h" +// PLD architectures don't use CONFIG_SYMBOL_PREFIX +//#include "../../include/generated/autoconf.h" #include "../../include/linux/license.h" /* Some toolchains use a `_' prefix for all user symbols. */ --- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~ 2011-07-22 04:17:23.000000000 +0200 +++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh 2011-08-25 21:26:04.799150642 +0200 @@ -9,6 +9,12 @@ $cc -print-file-name=lib${lib}.${ext} | grep -q / if [ $? -eq 0 ]; then echo "-l${lib}" + for libt in tinfow tinfo ; do + $cc -print-file-name=lib${libt}.${ext} | grep -q / + if [ $? -eq 0 ]; then + echo "-l${libt}" + fi + done exit fi done --- a/Makefile 2016-11-10 20:41:43.646224629 +0100 +++ b/Makefile 2016-11-10 20:40:35.640323501 +0100 @@ -784,6 +774,9 @@ # Prohibit date/time macros, which would make the build non-deterministic KBUILD_CFLAGS += $(call cc-option,-Werror=date-time) +# enforce correct pointer usage +KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) + # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) From 7a29ac474a47eb8cf212b45917683ae89d6fa13b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Nov 2015 10:10:34 +1100 Subject: xfs: give all workqueues rescuer threads We're consistently hitting deadlocks here with XFS on recent kernels. After some digging through the crash files, it looks like everyone in the system is waiting for XFS to reclaim memory. Something like this: PID: 2733434 TASK: ffff8808cd242800 CPU: 19 COMMAND: "java" #0 [ffff880019c53588] __schedule at ffffffff818c4df2 #1 [ffff880019c535d8] schedule at ffffffff818c5517 #2 [ffff880019c535f8] _xfs_log_force_lsn at ffffffff81316348 #3 [ffff880019c53688] xfs_log_force_lsn at ffffffff813164fb #4 [ffff880019c536b8] xfs_iunpin_wait at ffffffff8130835e #5 [ffff880019c53728] xfs_reclaim_inode at ffffffff812fd453 #6 [ffff880019c53778] xfs_reclaim_inodes_ag at ffffffff812fd8c7 #7 [ffff880019c53928] xfs_reclaim_inodes_nr at ffffffff812fe433 #8 [ffff880019c53958] xfs_fs_free_cached_objects at ffffffff8130d3b9 #9 [ffff880019c53968] super_cache_scan at ffffffff811a6f73 #10 [ffff880019c539c8] shrink_slab at ffffffff811460e6 #11 [ffff880019c53aa8] shrink_zone at ffffffff8114a53f #12 [ffff880019c53b48] do_try_to_free_pages at ffffffff8114a8ba #13 [ffff880019c53be8] try_to_free_pages at ffffffff8114ad5a #14 [ffff880019c53c78] __alloc_pages_nodemask at ffffffff8113e1b8 #15 [ffff880019c53d88] alloc_kmem_pages_node at ffffffff8113e671 #16 [ffff880019c53dd8] copy_process at ffffffff8104f781 #17 [ffff880019c53ec8] do_fork at ffffffff8105129c #18 [ffff880019c53f38] sys_clone at ffffffff810515b6 #19 [ffff880019c53f48] stub_clone at ffffffff818c8e4d xfs_log_force_lsn is waiting for logs to get cleaned, which is waiting for IO, which is waiting for workers to complete the IO which is waiting for worker threads that don't exist yet: PID: 2752451 TASK: ffff880bd6bdda00 CPU: 37 COMMAND: "kworker/37:1" #0 [ffff8808d20abbb0] __schedule at ffffffff818c4df2 #1 [ffff8808d20abc00] schedule at ffffffff818c5517 #2 [ffff8808d20abc20] schedule_timeout at ffffffff818c7c6c #3 [ffff8808d20abcc0] wait_for_completion_killable at ffffffff818c6495 #4 [ffff8808d20abd30] kthread_create_on_node at ffffffff8106ec82 #5 [ffff8808d20abdf0] create_worker at ffffffff8106752f #6 [ffff8808d20abe40] worker_thread at ffffffff810699be #7 [ffff8808d20abec0] kthread at ffffffff8106ef59 #8 [ffff8808d20abf50] ret_from_fork at ffffffff818c8ac8 I think we should be using WQ_MEM_RECLAIM to make sure this thread pool makes progress when we're not able to allocate new workers. [dchinner: make all workqueues WQ_MEM_RECLAIM] Signed-off-by: Chris Mason Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_super.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 29531ec..65fbfb7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -838,17 +838,18 @@ xfs_init_mount_workqueues( goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, + mp->m_fsname); if (!mp->m_log_workqueue) goto out_destroy_reclaim; mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) goto out_destroy_log; patches.fixes/x86-mm-32-Enable-full-randomization-on-i386-and-X86_.patch From 8b8addf891de8a00e4d39fc32f93f7c5eb8feceb Mon Sep 17 00:00:00 2001 From: Hector Marco-Gisbert Date: Thu, 10 Mar 2016 20:51:00 +0100 Subject: [PATCH] x86/mm/32: Enable full randomization on i386 and X86_32 Git-commit: 8b8addf891de8a00e4d39fc32f93f7c5eb8feceb Patch-mainline: 4.6-rc1 References: bnc#974308, CVE-2016-3672 Currently on i386 and on X86_64 when emulating X86_32 in legacy mode, only the stack and the executable are randomized but not other mmapped files (libraries, vDSO, etc.). This patch enables randomization for the libraries, vDSO and mmap requests on i386 and in X86_32 in legacy mode. By default on i386 there are 8 bits for the randomization of the libraries, vDSO and mmaps which only uses 1MB of VA. This patch preserves the original randomness, using 1MB of VA out of 3GB or 4GB. We think that 1MB out of 3GB is not a big cost for having the ASLR. The first obvious security benefit is that all objects are randomized (not only the stack and the executable) in legacy mode which highly increases the ASLR effectiveness, otherwise the attackers may use these non-randomized areas. But also sensitive setuid/setgid applications are more secure because currently, attackers can disable the randomization of these applications by setting the ulimit stack to "unlimited". This is a very old and widely known trick to disable the ASLR in i386 which has been allowed for too long. Another trick used to disable the ASLR was to set the ADDR_NO_RANDOMIZE personality flag, but fortunately this doesn't work on setuid/setgid applications because there is security checks which clear Security-relevant flags. This patch always randomizes the mmap_legacy_base address, removing the possibility to disable the ASLR by setting the stack to "unlimited". Signed-off-by: Hector Marco-Gisbert Acked-by: Ismael Ripoll Ripoll Acked-by: Kees Cook Acked-by: Arjan van de Ven Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: kees Cook Link: http://lkml.kernel.org/r/1457639460-5242-1-git-send-email-hecmargi@upv.es Signed-off-by: Ingo Molnar Reviewed-by: Michal Hocko --- arch/x86/mm/mmap.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 96bd1e2bffaf..389939f74dd5 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd) } /* - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 - * does, but not when emulating X86_32 - */ -static unsigned long mmap_legacy_base(unsigned long rnd) -{ - if (mmap_is_ia32()) - return TASK_UNMAPPED_BASE; - else - return TASK_UNMAPPED_BASE + rnd; -} - -/* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ @@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); - mm->mmap_legacy_base = mmap_legacy_base(random_factor); + mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor; if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; -- 1.8.5.6 patches.fixes/rds-fix-an-infoleak-in-rds_inc_info_copy.patch From: Kangjie Lu Date: Thu, 2 Jun 2016 04:11:20 -0400 Subject: rds: fix an infoleak in rds_inc_info_copy Patch-mainline: v4.7-rc3 Git-commit: 4116def2337991b39919f3b448326e21c40e0dbb References: bsc#983213 CVE-2016-5244 The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Acked-by: Borislav Petkov --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -545,5 +545,7 @@ void rds_inc_info_copy(struct rds_incomi minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } patches.fixes/gro-Defer-clearing-of-flush-bit-in-tunnel-paths.patch From: Alexander Duyck Date: Wed, 9 Mar 2016 09:24:23 -0800 Subject: gro: Defer clearing of flush bit in tunnel paths Patch-mainline: v4.6-rc1 Git-commit: c194cf93c164ed1c71142485ee0f70f9f2d1fe35 References: CVE-2016-7039 bsc#1001486 This patch updates the GRO handlers for GRE, VXLAN, GENEVE, and FOU so that we do not clear the flush bit until after we have called the next level GRO handler. Previously this was being cleared before parsing through the list of frames, however this resulted in several paths where either the bit needed to be reset but wasn't as in the case of FOU, or cases where it was being set as in GENEVE. By just deferring the clearing of the bit until after the next level protocol has been parsed we can avoid any unnecessary bit twiddling and avoid bugs. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Acked-by: Michal Kubecek openSUSE-42.1: omit geneve part (geneve driver was added in v4.2-rc1) --- drivers/net/vxlan.c | 2 +- net/ipv4/fou.c | 3 +-- net/ipv4/gre_offload.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -621,7 +621,6 @@ static struct sk_buff **vxlan_gro_receiv goto out; } - flush = 0; for (p = *head; p; p = p->next) { if (!NAPI_GRO_CB(p)->same_flow) @@ -636,6 +635,7 @@ static struct sk_buff **vxlan_gro_receiv } pp = call_gro_receive(eth_gro_receive, head, skb); + flush = 0; out: skb_gro_remcsum_cleanup(skb, &grc); --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -323,8 +323,6 @@ static struct sk_buff **gue_gro_receive( skb_gro_pull(skb, hdrlen); - flush = 0; - for (p = *head; p; p = p->next) { const struct guehdr *guehdr2; @@ -356,6 +354,7 @@ static struct sk_buff **gue_gro_receive( goto out_unlock; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + flush = 0; out_unlock: rcu_read_unlock(); --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -182,8 +182,6 @@ static struct sk_buff **gre_gro_receive( null_compute_pseudo); } - flush = 0; - for (p = *head; p; p = p->next) { const struct gre_base_hdr *greh2; @@ -220,6 +218,7 @@ static struct sk_buff **gre_gro_receive( skb_gro_postpull_rcsum(skb, greh, grehlen); pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); + flush = 0; out_unlock: rcu_read_unlock(); patches.fixes/sctp-validate-chunk-len-before-actually-using-it.patch From: Marcelo Ricardo Leitner Date: Tue, 25 Oct 2016 14:27:39 -0200 Subject: sctp: validate chunk len before actually using it Patch-mainline: v4.9-rc4 Git-commit: bf911e985d6bbaa328c20c3e05f4eb03de11fdd6 References: CVE-2016-9555 bsc#1011685 Andrey Konovalov reported that KASAN detected that SCTP was using a slab beyond the boundaries. It was caused because when handling out of the blue packets in function sctp_sf_ootb() it was checking the chunk len only after already processing the first chunk, validating only for the 2nd and subsequent ones. The fix is to just move the check upwards so it's also validated for the 1st chunk. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/sctp/sm_statefuns.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e6bb98e583fb..690a973b72b5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3426,6 +3426,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, * do things that are type appropriate. */ @@ -3457,12 +3463,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); -- 2.10.2 patches.fixes/net-avoid-signed-overflows-for-SO_-SND-RCV-BUFFORCE.patch From: Eric Dumazet Date: Fri, 2 Dec 2016 09:44:53 -0800 Subject: net: avoid signed overflows for SO_{SND|RCV}BUFFORCE Patch-mainline: v4.9-rc8 Git-commit: b98b0bc8c431e3ceb4b26b0dfc8db509518fb290 References: CVE-2016-9793 bsc#1013531 CAP_NET_ADMIN users should not be allowed to set negative sk_sndbuf or sk_rcvbuf values, as it can lead to various memory corruptions, crashes, OOM... Note that before commit 82981930125a ("net: cleanups in sock_setsockopt()"), the bug was even more serious, since SO_SNDBUF and SO_RCVBUF were vulnerable. This needs to be backported to all known linux kernels. Again, many thanks to syzkaller team for discovering this gem. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 47fc8bb3b946..510003ac0567 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -732,7 +732,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -768,7 +768,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: -- 2.11.0 patches.fixes/tcp-take-care-of-truncations-done-by-sk_filter.patch From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: tcp: take care of truncations done by sk_filter() Patch-mainline: v4.9-rc6 Git-commit: ac6e780070e30e4c35bd395acfe9191e6268bdd3 References: CVE-2016-8645 bsc#1009969 With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3d3a365233f0..22c9eb3eea84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1094,6 +1094,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) } bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13b92d595138..bbaaf7c62645 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1540,6 +1540,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_prequeue); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1623,8 +1638,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); sk_incoming_cpu_update(sk); skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ac6c40d08ac5..3c552910831a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1215,7 +1215,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1421,8 +1421,10 @@ process: goto discard_and_relse; #endif - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); sk_incoming_cpu_update(sk); skb->dev = NULL; -- 2.11.0 patches.fixes/net-introduce-__sock_queue_rcv_skb-function.patch From: samanthakumar Date: Tue, 5 Apr 2016 12:41:15 -0400 Subject: net: introduce __sock_queue_rcv_skb() function Patch-mainline: v4.7-rc1 Git-commit: e6afc8ace6dd5cef5e812f26c72579da8806f5ac (partial) References: bsc#1009969 No-fix: 30c7be26fd3587abcb69587f781098e3ca2d565b No-fix: 10df8e6152c6c400a563a673e9956320bfce1871 No-fix: e83c6744e81abc93a20d0eb3b7f504a176a6126a No-fix: a612769774a30e4fc143c4cb6395c12573415660 No-fix: ce25d66ad5f8d921bac5fe2d32d62fa30c0f9a70 No-fix: 595d0b29463343c3be995d3948930b8231e5b8cd No-fix: 31c2e4926fe912f88388bcaa8450fcaa8f2ece47 No-fix: 9f9a45beaa96188085d52d273c2ecb052c7d8d27 No-fix: 4d0fc73ebe94ac984a187f21fbf4f3a1ac846f5a No-fix: 1da8c681d5c122afe9fbadc02e92a0f9e3f7af44 No-fix: 39b2dd765e0711e1efd1d1df089473a8dd93ad48 This allows to separate the call to sk_filter() out of sock_queue_rcv_skb() function. In mainline, this is a part of larger commit which does something else and has an impressive list of "Fixes:" follow-ups, pick only the part we need. Signed-off-by: Sam Kumar Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- include/net/sock.h | 1 + net/core/sock.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index e19fecd9d63e..3f5cdfecfa6c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1981,6 +1981,7 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/core/sock.c b/net/core/sock.c index 47fc8bb3b946..9a022b944784 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -431,9 +431,8 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags) } -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - int err; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; @@ -443,10 +442,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; } - err = sk_filter(sk, skb); - if (err) - return err; - if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; @@ -469,6 +464,18 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk->sk_data_ready(sk); return 0; } +EXPORT_SYMBOL(__sock_queue_rcv_skb); + +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sk_filter(sk, skb); + if (err) + return err; + + return __sock_queue_rcv_skb(sk, skb); +} EXPORT_SYMBOL(sock_queue_rcv_skb); int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) -- 2.11.0 patches.fixes/rose-limit-sk_filter-trim-to-payload.patch From: Willem de Bruijn Date: Tue, 12 Jul 2016 18:18:56 -0400 Subject: rose: limit sk_filter trim to payload Patch-mainline: v4.7 Git-commit: f4979fcea7fd36d8e2f556abef86f80e0d5af1ba References: bsc#1009969 Sockets can have a filter program attached that drops or trims incoming packets based on the filter program return value. Rose requires data packets to have at least ROSE_MIN_LEN bytes. It verifies this on arrival in rose_route_frame and unconditionally pulls the bytes in rose_recvmsg. The filter can trim packets to below this value in-between, causing pull to fail, leaving the partial header at the time of skb_copy_datagram_msg. Place a lower bound on the size to which sk_filter may trim packets by introducing sk_filter_trim_cap and call this for rose packets. Signed-off-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Acked-by: Michal Kubecek openSUSE-13.2: we only need sk_filter_trim_cap() function but the change in rose driver make good sense so let's take the whole patch. --- include/linux/filter.h | 6 +++++- net/core/filter.c | 9 +++++---- net/rose/rose_in.c | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 1ce6e1049a3b..ad808ab81ffa 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -358,7 +358,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) } #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ -int sk_filter(struct sock *sk, struct sk_buff *skb); +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) +{ + return sk_filter_trim_cap(sk, skb, 1); +} void bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); diff --git a/net/core/filter.c b/net/core/filter.c index 238bb3f9c51d..b3888d0fef82 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -47,9 +47,10 @@ #include /** - * sk_filter - run a packet through a socket filter + * sk_filter_trim_cap - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter + * @cap: limit on how short the eBPF program may trim the packet * * Run the filter code and then cut skb->data to correct size returned by * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller @@ -58,7 +59,7 @@ * be accepted or -EPERM if the packet should be tossed. * */ -int sk_filter(struct sock *sk, struct sk_buff *skb) +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) { int err; struct sk_filter *filter; @@ -80,13 +81,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) if (filter) { unsigned int pkt_len = SK_RUN_FILTER(filter, skb); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; } rcu_read_unlock(); return err; } -EXPORT_SYMBOL(sk_filter); +EXPORT_SYMBOL(sk_filter_trim_cap); static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 79c4abcfa6b4..0a6394754e81 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -164,7 +164,8 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety rose_frames_acked(sk, nr); if (ns == rose->vr) { rose_start_idletimer(sk); - if (sock_queue_rcv_skb(sk, skb) == 0) { + if (sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) == 0 && + __sock_queue_rcv_skb(sk, skb) == 0) { rose->vr = (rose->vr + 1) % ROSE_MODULUS; queued = 1; } else { -- 2.11.0 patches.kabi/kABI-reintroduce-sk_filter.patch From: Jiri Slaby Date: Wed, 14 Dec 2016 09:09:18 +0100 Subject: kABI: reintroduce sk_filter Patch-mainline: Never, kabi workaround References: bsc#1009969 Backport of mainline commit ac6e780070e3 ("tcp: take care of truncations done by sk_filter()") makde sk_filter() an inline wrapper. Restore the old function, because it is a part of kABI. The function is now only a wrapper to sk_filter_trim_cap. Signed-off-by: Jiri Slaby Signed-off-by: Michal Kubecek --- include/linux/filter.h | 5 +---- net/core/filter.c | 6 ++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index ad808ab81ffa..2799f6e90b90 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -359,10 +359,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); -static inline int sk_filter(struct sock *sk, struct sk_buff *skb) -{ - return sk_filter_trim_cap(sk, skb, 1); -} +int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); diff --git a/net/core/filter.c b/net/core/filter.c index b3888d0fef82..1e5c0c3d900e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -46,6 +46,12 @@ #include #include +int sk_filter(struct sock *sk, struct sk_buff *skb) +{ + return sk_filter_trim_cap(sk, skb, 1); +} +EXPORT_SYMBOL(sk_filter); + /** * sk_filter_trim_cap - run a packet through a socket filter * @sk: sock associated with &sk_buff -- 2.11.0 patches.fixes/ip6_gre-fix-ip6gre_err-invalid-reads.patch From: Eric Dumazet Date: Sat, 4 Feb 2017 23:18:55 -0800 Subject: ip6_gre: fix ip6gre_err() invalid reads Patch-mainline: v4.10 Git-commit: 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 References: CVE-2017-5897 bsc#1023762 Andrey Konovalov reported out of bound accesses in ip6gre_err() If GRE flags contains GRE_KEY, the following expression *(((__be32 *)p) + (grehlen / 4) - 1) accesses data ~40 bytes after the expected point, since grehlen includes the size of IPv6 headers. Let's use a "struct gre_base_hdr *greh" pointer to make this code more readable. p[1] becomes greh->protocol. grhlen is the GRE header length. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/ipv6/ip6_gre.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b1311da5d7b8..9e2cdeebf30d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -55,6 +55,7 @@ #include #include #include +#include static bool log_ecn_error = true; @@ -367,35 +368,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (!t) return; -- 2.11.1 patches.fixes/ipv4-keep-skb-dst-around-in-presence-of-IP-options.patch From: Eric Dumazet Date: Sat, 4 Feb 2017 11:16:52 -0800 Subject: ipv4: keep skb->dst around in presence of IP options Patch-mainline: v4.10-rc8 Git-commit: 34b2cef20f19c87999fff3da4071e66937db9644 References: CVE-2017-5970 bsc#1024938 Andrey Konovalov got crashes in __ip_options_echo() when a NULL skb->dst is accessed. ipv4_pktinfo_prepare() should not drop the dst if (evil) IP options are present. We could refine the test to the presence of ts_needtime or srr, but IP options are not often used, so let's be conservative. Thanks to syzkaller team for finding this bug. Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/ipv4/ip_sockglue.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b6c7bdea4853..6a06a3d0f28c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1186,7 +1186,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, -- 2.11.1 patches.fixes/sctp-avoid-BUG_ON-on-sctp_wait_for_sndbuf.patch From: Marcelo Ricardo Leitner Date: Mon, 6 Feb 2017 18:10:31 -0200 Subject: sctp: avoid BUG_ON on sctp_wait_for_sndbuf Patch-mainline: v4.10-rc8 Git-commit: 2dcab598484185dea7ec22219c76dcdd59e3cb90 References: CVE-2017-5986 bsc#1025235 Alexander Popov reported that an application may trigger a BUG_ON in sctp_wait_for_sndbuf if the socket tx buffer is full, a thread is waiting on it to queue more data and meanwhile another thread peels off the association being used by the first thread. This patch replaces the BUG_ON call with a proper error handling. It will return -EPIPE to the original sendmsg call, similarly to what would have been done if the association wasn't found in the first place. Acked-by: Alexander Popov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3c5833058b03..b4b2bb4eccf8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6963,7 +6963,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - BUG_ON(sk != asoc->base.sk); + if (sk != asoc->base.sk) + goto do_error; lock_sock(sk); *timeo_p = current_timeo; -- 2.11.1 patches.fixes/sctp-deny-peeloff-operation-on-asocs-with-threads-sl.patch From: Marcelo Ricardo Leitner Date: Thu, 23 Feb 2017 09:31:18 -0300 Subject: sctp: deny peeloff operation on asocs with threads sleeping on it Patch-mainline: v4.11-rc1 Git-commit: dfcb9f4f99f1e9a49e43398a7bfbf56927544af1 References: CVE-2017-6353 bsc#1027066 commit 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") attempted to avoid a BUG_ON call when the association being used for a sendmsg() is blocked waiting for more sndbuf and another thread did a peeloff operation on such asoc, moving it to another socket. As Ben Hutchings noticed, then in such case it would return without locking back the socket and would cause two unlocks in a row. Further analysis also revealed that it could allow a double free if the application managed to peeloff the asoc that is created during the sendmsg call, because then sctp_sendmsg() would try to free the asoc that was created only for that call. This patch takes another approach. It will deny the peeloff operation if there is a thread sleeping on the asoc, so this situation doesn't exist anymore. This avoids the issues described above and also honors the syscalls that are already being handled (it can be multiple sendmsg calls). Joint work with Xin Long. Fixes: 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") Cc: Alexander Popov Cc: Ben Hutchings Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/sctp/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b4b2bb4eccf8..fcf10252017e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4432,6 +4432,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL; + /* If there is a thread waiting on more sndbuf space for + * sending on this asoc, it cannot be peeled. + */ + if (waitqueue_active(&asoc->wait)) + return -EBUSY; + /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -6963,8 +6969,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - if (sk != asoc->base.sk) - goto do_error; lock_sock(sk); *timeo_p = current_timeo; -- 2.11.1 patches.fixes/tcp-avoid-infinite-loop-in-tcp_splice_read.patch From: Eric Dumazet Date: Fri, 3 Feb 2017 14:59:38 -0800 Subject: tcp: avoid infinite loop in tcp_splice_read() Patch-mainline: v4.10-rc8 Git-commit: ccf7abb93af09ad0868ae9033d1ca8108bdaec82 References: CVE-2017-6214 bsc#1026722 Splicing from TCP socket is vulnerable when a packet with URG flag is received and stored into receive queue. __tcp_splice_read() returns 0, and sk_wait_data() immediately returns since there is the problematic skb in queue. This is a nice way to burn cpu (aka infinite loop) and trigger soft lockups. Again, this gem was found by syzkaller tool. Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willy Tarreau Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 19d385a0f02d..cf6186f5a035 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -780,6 +780,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo); if (signal_pending(current)) { ret = sock_intr_errno(timeo); -- 2.11.1 patches.fixes/udp-fix-IP_CHECKSUM-handling.patch From: Eric Dumazet Date: Sun, 23 Oct 2016 18:03:06 -0700 Subject: udp: fix IP_CHECKSUM handling Patch-mainline: v4.9-rc4 Git-commit: 10df8e6152c6c400a563a673e9956320bfce1871 References: CVE-2017-6347 bsc#1027179 First bug was added in commit ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") : Tom missed that ipv4 udp messages could be received on AF_INET6 socket. ip_cmsg_recv(msg, skb) should have been replaced by ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); Then commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") forgot to adjust the offsets now UDP headers are pulled before skb are put in receive queue. Fixes: ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Cc: Sam Kumar Cc: Willem de Bruijn Tested-by: Willem de Bruijn Signed-off-by: David S. Miller Acked-by: Michal Kubecek openSUSE-42.1: pick only the part fixing the udpv6_recvmsg() issue, commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") is not present. --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1173557ea551..49766bf20cd5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -493,7 +493,7 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); -- 2.12.0 patches.fixes/ip-fix-IP_CHECKSUM-handling.patch From: Paolo Abeni Date: Tue, 21 Feb 2017 09:33:18 +0100 Subject: ip: fix IP_CHECKSUM handling Patch-mainline: v4.11-rc1 Git-commit: ca4ef4574f1ee5252e2cd365f8f5d5bafd048f32 References: CVE-2017-6347 bsc#1027179 The skbs processed by ip_cmsg_recv() are not guaranteed to be linear e.g. when sending UDP packets over loopback with MSGMORE. Using csum_partial() on [potentially] the whole skb len is dangerous; instead be on the safe side and use skb_checksum(). Thanks to syzkaller team to detect the issue and provide the reproducer. v1 -> v2: - move the variable declaration in a tighter scope Fixes: ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") Reported-by: Andrey Konovalov Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Michal Kubecek --- net/ipv4/ip_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6a06a3d0f28c..6b3d4eeda568 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -106,7 +106,7 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + csum = csum_sub(csum, skb_checksum(skb, 0, offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } -- 2.12.0 patches.fixes/packet-fix-races-in-fanout_add.patch From: Eric Dumazet Date: Tue, 14 Feb 2017 09:03:51 -0800 Subject: packet: fix races in fanout_add() Patch-mainline: v4.10 Git-commit: d199fab63c11998a602205f7ee7ff7c05c97164b References: CVE-2017-6346 bsc#1027189 Multiple threads can call fanout_add() at the same time. We need to grab fanout_mutex earlier to avoid races that could lead to one thread freeing po->rollover that was set by another thread. Do the same in fanout_release(), for peace of mind, and to help us finding lockdep issues earlier. Fixes: dc99f600698d ("packet: Add fanout support.") Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Michal Kubecek openSUSE-42.1: no rollover stuff from commit 0648ab70afe6 ("packet: rollover prepare: per-socket state"); only extend the area covered by fanout_mutex. --- net/packet/af_packet.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b9d1baaa8bdc..3d6b2e111a55 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1447,13 +1447,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } + mutex_lock(&fanout_mutex); + + err = -EINVAL; if (!po->running) - return -EINVAL; + goto out; + err = -EALREADY; if (po->fanout) - return -EALREADY; + goto out; - mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1509,18 +1512,18 @@ static void fanout_release(struct sock *sk) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; - f = po->fanout; - if (!f) - return; - mutex_lock(&fanout_mutex); - po->fanout = NULL; + f = po->fanout; + if (f) { + po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - kfree(f); + if (atomic_dec_and_test(&f->sk_ref)) { + list_del(&f->list); + dev_remove_pack(&f->prot_hook); + kfree(f); + } } + mutex_unlock(&fanout_mutex); } -- 2.12.0 patches.fixes/net-llc-avoid-BUG_ON-in-skb_orphan.patch From: Eric Dumazet Date: Sun, 12 Feb 2017 14:03:52 -0800 Subject: net/llc: avoid BUG_ON() in skb_orphan() Patch-mainline: v4.10 Git-commit: 8b74d439e1697110c5e5c600643e823eb1dd0762 References: CVE-2017-6345 bsc#1027190 It seems nobody used LLC since linux-3.12. Fortunately fuzzers like syzkaller still know how to run this code, otherwise it would be no fun. Setting skb->sk without skb->destructor leads to all kinds of bugs, we now prefer to be very strict about it. Ideally here we would use skb_set_owner() but this helper does not exist yet, only CAN seems to have a private helper for that. Fixes: 376c7311bdb6 ("net: add a temporary sanity check in skb_orphan()") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/llc/llc_conn.c | 3 +++ net/llc/llc_sap.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 81a61fce3afb..841026e02ce8 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) * another trick required to cope with how the PROCOM state * machine works. -acme */ + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d0e1e804ebd7..5404d0d195cc 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } -- 2.12.0 patches.fixes/l2tp-fix-racy-SOCK_ZAPPED-flag-check-in-l2tp_ip-6-_b.patch From: Guillaume Nault Date: Fri, 18 Nov 2016 22:13:00 +0100 Subject: l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind() Patch-mainline: v4.9-rc7 Git-commit: 32c231164b762dddefa13af5a0101032c70b50ef References: CVE-2016-10200 bsc#1028415 Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind(). Without lock, a concurrent call could modify the socket flags between the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way, a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it would then leave a stale pointer there, generating use-after-free errors when walking through the list or modifying adjacent entries. BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8 Write of size 8 by task syz-executor/10987 CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0 ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0 Call Trace: [] dump_stack+0xb3/0x118 lib/dump_stack.c:15 [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 [< inline >] print_address_description mm/kasan/report.c:194 [] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283 [< inline >] kasan_report mm/kasan/report.c:303 [] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329 [< inline >] __write_once_size ./include/linux/compiler.h:249 [< inline >] __hlist_del ./include/linux/list.h:622 [< inline >] hlist_del_init ./include/linux/list.h:637 [] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239 [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [] sock_release+0x8d/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x28c/0x780 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf9/0x170 [] do_exit+0x85e/0x2a00 [] do_group_exit+0x108/0x330 [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [] do_signal+0x7f/0x18f0 [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448 Allocated: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_kmalloc+0xad/0xe0 [ 1116.897025] [] kasan_slab_alloc+0x12/0x20 [ 1116.897025] [< inline >] slab_post_alloc_hook mm/slab.h:417 [ 1116.897025] [< inline >] slab_alloc_node mm/slub.c:2708 [ 1116.897025] [< inline >] slab_alloc mm/slub.c:2716 [ 1116.897025] [] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721 [ 1116.897025] [] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326 [ 1116.897025] [] sk_alloc+0x38/0xae0 net/core/sock.c:1388 [ 1116.897025] [] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182 [ 1116.897025] [] __sock_create+0x37b/0x640 net/socket.c:1153 [ 1116.897025] [< inline >] sock_create net/socket.c:1193 [ 1116.897025] [< inline >] SYSC_socket net/socket.c:1223 [ 1116.897025] [] SyS_socket+0xef/0x1b0 net/socket.c:1203 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0x23/0xc6 Freed: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_slab_free+0x71/0xb0 [ 1116.897025] [< inline >] slab_free_hook mm/slub.c:1352 [ 1116.897025] [< inline >] slab_free_freelist_hook mm/slub.c:1374 [ 1116.897025] [< inline >] slab_free mm/slub.c:2951 [ 1116.897025] [] kmem_cache_free+0xc8/0x330 mm/slub.c:2973 [ 1116.897025] [< inline >] sk_prot_free net/core/sock.c:1369 [ 1116.897025] [] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444 [ 1116.897025] [] sk_destruct+0x44/0x80 net/core/sock.c:1452 [ 1116.897025] [] __sk_free+0x53/0x220 net/core/sock.c:1460 [ 1116.897025] [] sk_free+0x23/0x30 net/core/sock.c:1471 [ 1116.897025] [] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589 [ 1116.897025] [] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243 [ 1116.897025] [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [ 1116.897025] [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [ 1116.897025] [] sock_release+0x8d/0x1d0 net/socket.c:570 [ 1116.897025] [] sock_close+0x16/0x20 net/socket.c:1017 [ 1116.897025] [] __fput+0x28c/0x780 fs/file_table.c:208 [ 1116.897025] [] ____fput+0x15/0x20 fs/file_table.c:244 [ 1116.897025] [] task_work_run+0xf9/0x170 [ 1116.897025] [] do_exit+0x85e/0x2a00 [ 1116.897025] [] do_group_exit+0x108/0x330 [ 1116.897025] [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [ 1116.897025] [] do_signal+0x7f/0x18f0 [ 1116.897025] [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [ 1116.897025] [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [ 1116.897025] [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Memory state around the buggy address: ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table. Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case") Reported-by: Baozeng Ding Reported-by: Andrey Konovalov Tested-by: Baozeng Ding Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 44ee0683b14b..1a8281de81d5 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) @@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) read_unlock_bh(&l2tp_ip_lock); lock_sock(sk); + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 36f8fa223a78..d3c9eb8241b1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -266,8 +266,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -293,6 +291,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; -- 2.12.0 patches.fixes/xfrm_user-validate-XFRM_MSG_NEWAE-XFRMA_REPLAY_ESN_V.patch From: Andy Whitcroft Date: Wed, 22 Mar 2017 07:29:31 +0000 Subject: xfrm_user: validate XFRM_MSG_NEWAE XFRMA_REPLAY_ESN_VAL replay_window Patch-mainline: v4.11-rc5 Git-commit: 677e806da4d916052585301785d847c3b3e6186a References: CVE-2017-7184 bsc#1030573 When a new xfrm state is created during an XFRM_MSG_NEWSA call we validate the user supplied replay_esn to ensure that the size is valid and to ensure that the replay_window size is within the allocated buffer. However later it is possible to update this replay_esn via a XFRM_MSG_NEWAE call. There we again validate the size of the supplied buffer matches the existing state and if so inject the contents. We do not at this point check that the replay_window is within the allocated memory. This leads to out-of-bounds reads and writes triggered by netlink packets. This leads to memory corruption and the potential for priviledge escalation. We already attempt to validate the incoming replay information in xfrm_new_ae() via xfrm_replay_verify_len(). This confirms that the user is not trying to change the size of the replay state buffer which includes the replay_esn. It however does not check the replay_window remains within that buffer. Add validation of the contained replay_window. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds Acked-by: Michal Kubecek --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2091664295ba..1c1b9ed94719 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -389,6 +389,9 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) + return -EINVAL; + return 0; } -- 2.12.2 patches.fixes/xfrm_user-validate-XFRM_MSG_NEWAE-incoming-ESN-size-.patch From: Andy Whitcroft Date: Thu, 23 Mar 2017 07:45:44 +0000 Subject: xfrm_user: validate XFRM_MSG_NEWAE incoming ESN size harder Patch-mainline: v4.11-rc5 Git-commit: f843ee6dd019bcece3e74e76ad9df0155655d0df References: CVE-2017-7184 bsc#1030573 Kees Cook has pointed out that xfrm_replay_state_esn_len() is subject to wrapping issues. To ensure we are correctly ensuring that the two ESN structures are the same size compare both the overall size as reported by xfrm_replay_state_esn_len() and the internal length are the same. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds Acked-by: Michal Kubecek --- net/xfrm/xfrm_user.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1c1b9ed94719..2dc882637cdf 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -386,7 +386,11 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) return -EINVAL; if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) -- 2.12.2 patches.fixes/net-socket-fix-recvmmsg-not-returning-error-from-soc.patch From: Maxime Jayat Date: Tue, 21 Feb 2017 18:35:51 +0100 Subject: net: socket: fix recvmmsg not returning error from sock_error Patch-mainline: v4.11-rc1 Git-commit: e623a9e9dec29ae811d11f83d0074ba254aba374 References: CVE-2016-7117 bsc#1003077 Commit 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path"), changed the exit path of recvmmsg to always return the datagrams variable and modified the error paths to set the variable to the error code returned by recvmsg if necessary. However in the case sock_error returned an error, the error code was then ignored, and recvmmsg returned 0. Change the error path of recvmmsg to correctly return the error code of sock_error. The bug was triggered by using recvmmsg on a CAN interface which was not up. Linux 4.6 and later return 0 in this case while earlier releases returned -ENETDOWN. Fixes: 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path") Signed-off-by: Maxime Jayat Signed-off-by: David S. Miller Acked-by: Michal Kubecek --- net/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index e66e4f357506..8327df0448f4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2192,8 +2192,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, return err; err = sock_error(sock->sk); - if (err) + if (err) { + datagrams = err; goto out_put; + } entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; -- 2.12.2 patches.fixes/netfilter-nfnetlink-correctly-validate-length-of-bat.patch From: Michal Kubecek Date: Tue, 3 Jan 2017 11:25:59 +0100 Subject: netfilter: nfnetlink: correctly validate length of batch messages Patch-mainline: v4.5-rc6 Git-commit: c58d6c93680f28ac58984af61d0a7ebf4319c241 References: CVE-2016-7917 bsc#1010444 If nlh->nlmsg_len is zero then an infinite loop is triggered because 'skb_pull(skb, msglen);' pulls zero bytes. The calculation in nlmsg_len() underflows if 'nlh->nlmsg_len < NLMSG_HDRLEN' which bypasses the length validation and will later trigger an out-of-bound read. If the length validation does fail then the malformed batch message is copied back to userspace. However, we cannot do this because the nlh->nlmsg_len can be invalid. This leads to an out-of-bounds read in netlink_ack: [ 41.455421] ================================================================== [ 41.456431] BUG: KASAN: slab-out-of-bounds in memcpy+0x1d/0x40 at addr ffff880119e79340 [ 41.456431] Read of size 4294967280 by task a.out/987 [ 41.456431] ============================================================================= [ 41.456431] BUG kmalloc-512 (Not tainted): kasan: bad access detected [ 41.456431] ----------------------------------------------------------------------------- ... [ 41.456431] Bytes b4 ffff880119e79310: 00 00 00 00 d5 03 00 00 b0 fb fe ff 00 00 00 00 ................ [ 41.456431] Object ffff880119e79320: 20 00 00 00 10 00 05 00 00 00 00 00 00 00 00 00 ............... [ 41.456431] Object ffff880119e79330: 14 00 0a 00 01 03 fc 40 45 56 11 22 33 10 00 05 .......@EV."3... [ 41.456431] Object ffff880119e79340: f0 ff ff ff 88 99 aa bb 00 14 00 0a 00 06 fe fb ................ ^^ start of batch nlmsg with nlmsg_len=4294967280 ... [ 41.456431] Memory state around the buggy address: [ 41.456431] ffff880119e79400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 41.456431] ffff880119e79480: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 41.456431] >ffff880119e79500: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc [ 41.456431] ^ [ 41.456431] ffff880119e79580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 41.456431] ffff880119e79600: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb [ 41.456431] ================================================================== Fix this with better validation of nlh->nlmsg_len and by setting NFNL_BATCH_FAILURE if any batch message fails length validation. CAP_NET_ADMIN is required to trigger the bugs. Fixes: 9ea2aa8b7dba ("netfilter: nfnetlink: validate nfnetlink header from batch") Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso Acked-by: Michal Kubecek openSUSE-42.1: NFNL_BATCH_FAILURE hasn't been introduced yet so just set success to false which has the same effect (immediate bailout which is what we also do for other malformed messages). --- net/netfilter/nfnetlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 69e3ceffa14d..d8276282bccb 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -320,10 +320,12 @@ replay: nlh = nlmsg_hdr(skb); err = 0; - if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || - skb->len < nlh->nlmsg_len) { - err = -EINVAL; - goto ack; + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len || + nlmsg_len(nlh) < sizeof(struct nfgenmsg)) { + nfnl_err_reset(&err_list); + success = false; + goto done; } /* Only requests are handled by the kernel */ -- 2.11.0 patches.fixes/netfilter-arp_tables-fix-invoking-32bit-iptable-P-IN.patch From: Hongxu Jia Date: Tue, 29 Nov 2016 21:56:26 -0500 Subject: netfilter: arp_tables: fix invoking 32bit "iptable -P INPUT ACCEPT" failed in 64bit kernel Patch-mainline: v4.9-rc8 Git-commit: 17a49cd549d9dc8707dc9262210166455c612dde References: CVE-2016-4997 CVE-2016-4998 bsc#986362 bsc#986365 Since 09d9686047db ("netfilter: x_tables: do compat validation via translate_table"), it used compatr structure to assign newinfo structure. In translate_compat_table of ip_tables.c and ip6_tables.c, it used compatr->hook_entry to replace info->hook_entry and compatr->underflow to replace info->underflow, but not do the same replacement in arp_tables.c. It caused invoking 32-bit "arptbale -P INPUT ACCEPT" failed in 64bit kernel. -------------------------------------- root@qemux86-64:~# arptables -P INPUT ACCEPT root@qemux86-64:~# arptables -P INPUT ACCEPT ERROR: Policy for `INPUT' offset 448 != underflow 0 arptables: Incompatible with this kernel -------------------------------------- Fixes: 09d9686047db ("netfilter: x_tables: do compat validation via translate_table") Signed-off-by: Hongxu Jia Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Acked-by: Michal Kubecek --- net/ipv4/netfilter/arp_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ebf5821caefb..7510a851d316 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1330,8 +1330,8 @@ static int translate_compat_table(struct xt_table_info **pinfo, newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; -- 2.12.2 patches.fixes/ext4-validate-s_first_meta_bg-at-mount-time.patch From 3a4b77cd47bb837b8557595ec7425f281f2ca1fe Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 1 Dec 2016 15:08:37 -0500 Subject: [PATCH] ext4: validate s_first_meta_bg at mount time Git-commit: 3a4b77cd47bb837b8557595ec7425f281f2ca1fe Patch-mainline: v4.10-rc1 References: bsc#1023377 CVE-2016-10208 Ralf Spenneberg reported that he hit a kernel crash when mounting a modified ext4 image. And it turns out that kernel crashed when calculating fs overhead (ext4_calculate_overhead()), this is because the image has very large s_first_meta_bg (debug code shows it's 842150400), and ext4 overruns the memory in count_overhead() when setting bitmap buffer, which is PAGE_SIZE. Ext4_calculate_overhead(): buf = get_zeroed_page(GFP_NOFS); <=== PAGE_SIZE buffer blks = count_overhead(sb, i, buf); Count_overhead(): for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { <=== j = 842150400 ext4_set_bit(EXT4_B2C(sbi, s++), buf); <=== buffer overrun count++; } This can be reproduced easily for me by this script: #!/bin/bash rm -f fs.img mkdir -p /mnt/ext4 fallocate -l 16M fs.img mke2fs -t ext4 -O bigalloc,meta_bg,^resize_inode -F fs.img debugfs -w -R "ssv first_meta_bg 842150400" fs.img mount -o loop fs.img /mnt/ext4 Fix it by validating s_first_meta_bg first at mount time, and refusing to mount if its value exceeds the largest possible meta_bg number. Reported-by: Ralf Spenneberg Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Jan Kara --- fs/ext4/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3997,6 +3997,15 @@ static int ext4_fill_super(struct super_ (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG)) { + if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + ext4_msg(sb, KERN_WARNING, + "first meta block group too large: %u " + "(group descriptor block count %u)", + le32_to_cpu(es->s_first_meta_bg), db_count); + goto failed_mount; + } + } sbi->s_group_desc = ext4_kvmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); patches.fixes/cuse-fix-memory-leak.patch From: Miklos Szeredi Date: Tue, 10 Nov 2015 10:32:36 +0100 Subject: cuse: fix memory leak Git-commit: 2c5816b4beccc8ba709144539f6fdd764f8fa49c Patch-mainline: v4.4-rc5 References: bsc#969356, CVE-2015-1339 The problem is that fuse_dev_alloc() acquires an extra reference to cc.fc, and the original ref count is never dropped. Reported-by: Colin Ian King Signed-off-by: Miklos Szeredi Fixes: cc080e9e9be1 ("fuse: introduce per-instance fuse_dev structure") Cc: # v4.2+ Acked-by: Borislav Petkov --- fs/fuse/cuse.c | 2 ++ 1 file changed, 2 insertions(+) --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -542,6 +542,8 @@ static int cuse_channel_release(struct i unregister_chrdev_region(cc->cdev->dev, 1); cdev_del(cc->cdev); } + /* Base reference is now owned by "fud" */ + fuse_conn_put(&cc->fc); rc = fuse_dev_release(inode, file); /* puts the base reference */ patches.fixes/tmpfs-clear-S_ISGID-when-setting-posix-ACLs.patch From 497de07d89c1410d76a15bec2bb41f24a2a89f31 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 9 Jan 2017 09:34:48 +0800 Subject: [PATCH] tmpfs: clear S_ISGID when setting posix ACLs Git-commit: 497de07d89c1410d76a15bec2bb41f24a2a89f31 Patch-mainline: v4.10-rc4 References: bsc#1021258 CVE-2017-5551 This change was missed the tmpfs modification in In CVE-2016-7097 commit 073931017b49 ("posix_acl: Clear SGID bit when setting file permissions") It can test by xfstest generic/375, which failed to clear setgid bit in the following test case on tmpfs: touch $testfile chown 100:100 $testfile chmod 2755 $testfile _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile Signed-off-by: Gu Zheng Signed-off-by: Al Viro Signed-off-by: Jan Kara --- fs/posix_acl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -903,11 +903,10 @@ int simple_set_acl(struct inode *inode, int error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return 0; - if (error == 0) - acl = NULL; + error = posix_acl_update_mode(inode, + &inode->i_mode, &acl); + if (error) + return error; } inode->i_ctime = CURRENT_TIME; patches.fixes/sg-fix-double-free-when-drives-detach-during-sg_io.patch From: Calvin Owens Date: Fri, 30 Oct 2015 16:57:00 -0700 Subject: sg: Fix double-free when drives detach during SG_IO Git-commit: f3951a3709ff50990bf3e188c27d346792103432 Patch-mainline: v4.4-rc1 References: CVE-2015-8962 bsc#1010501 In sg_common_write(), we free the block request and return -ENODEV if the device is detached in the middle of the SG_IO ioctl(). Unfortunately, sg_finish_rem_req() also tries to free srp->rq, so we end up freeing rq->cmd in the already free rq object, and then free the object itself out from under the current user. This ends up corrupting random memory via the list_head on the rq object. The most common crash trace I saw is this: ------------[ cut here ]------------ kernel BUG at block/blk-core.c:1420! Call Trace: [] blk_put_request+0x5b/0x80 [] sg_finish_rem_req+0x6b/0x120 [sg] [] sg_common_write.isra.14+0x459/0x5a0 [sg] [] ? selinux_file_alloc_security+0x48/0x70 [] sg_new_write.isra.17+0x195/0x2d0 [sg] [] sg_ioctl+0x644/0xdb0 [sg] [] do_vfs_ioctl+0x90/0x520 [] ? file_has_perm+0x97/0xb0 [] SyS_ioctl+0x91/0xb0 [] tracesys+0xdd/0xe2 RIP [] __blk_put_request+0x154/0x1a0 The solution is straightforward: just set srp->rq to NULL in the failure branch so that sg_finish_rem_req() doesn't attempt to re-free it. Additionally, since sg_rq_end_io() will never be called on the object when this happens, we need to free memory backing ->cmd if it isn't embedded in the object itself. KASAN was extremely helpful in finding the root cause of this bug. Signed-off-by: Calvin Owens Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Acked-by: Johannes Thumshirn --- drivers/scsi/sg.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9d7b7db..503ab8b 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -787,8 +787,14 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, return k; /* probably out of space --> ENOMEM */ } if (atomic_read(&sdp->detaching)) { - if (srp->bio) + if (srp->bio) { + if (srp->rq->cmd != srp->rq->__cmd) + kfree(srp->rq->cmd); + blk_end_request_all(srp->rq, -EIO); + srp->rq = NULL; + } + sg_finish_rem_req(srp); return -ENODEV; } patches.fixes/don-t-feed-anything-but-regular-iovec-s-to-blk_rq_map_user_iov.patch From: Linus Torvalds Date: Tue, 6 Dec 2016 16:18:14 -0800 Subject: Don't feed anything but regular iovec's to blk_rq_map_user_iov Git-commit: a0ac402cfcdc904f9772e1762b3fda112dcc56a0 Patch-mainline: v4.10 or v4.9-rc9 (next release) References: CVE-2016-9576 bsc#1013604 In theory we could map other things, but there's a reason that function is called "user_iov". Using anything else (like splice can do) just confuses it. Reported-and-tested-by: Johannes Thumshirn Cc: Al Viro Signed-off-by: Linus Torvalds Acked-by: Johannes Thumshirn --- block/blk-map.c | 3 +++ 1 file changed, 3 insertions(+) --- a/block/blk-map.c +++ b/block/blk-map.c @@ -90,6 +90,9 @@ int blk_rq_map_user_iov(struct request_q if (!iter || !iter->count) return -EINVAL; + if (!iter_is_iovec((struct iov_iter *) iter)) + return -EINVAL; + iov_for_each(iov, i, *iter) { unsigned long uaddr = (unsigned long) iov.iov_base; patches.fixes/irda-fix-lockdep-annotations-in-hashbin_delete.patch From: "David S. Miller" Date: Fri, 17 Feb 2017 16:19:39 -0500 Subject: irda: Fix lockdep annotations in hashbin_delete(). Git-commit: 4c03b862b12f980456f9de92db6d508a4999b788 Patch-mainline: v4.10 References: bsc#1027178, CVE-2017-6348 A nested lock depth was added to the hasbin_delete() code but it doesn't actually work some well and results in tons of lockdep splats. Fix the code instead to properly drop the lock around the operation and just keep peeking the head of the hashbin queue. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller Acked-by: Borislav Petkov --- net/irda/irqueue.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index acbe61c7e683..160dc89335e2 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -383,9 +383,6 @@ hashbin_t *hashbin_new(int type) * for deallocating this structure if it's complex. If not the user can * just supply kfree, which should take care of the job. */ -#ifdef CONFIG_LOCKDEP -static int hashbin_lock_depth = 0; -#endif int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) { irda_queue_t* queue; @@ -396,22 +393,27 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;); /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags, - hashbin_lock_depth++); - } + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); /* * Free the entries in the hashbin, TODO: use hashbin_clear when * it has been shown to work */ for (i = 0; i < HASHBIN_SIZE; i ++ ) { - queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); - while (queue ) { - if (free_func) - (*free_func)(queue); - queue = dequeue_first( - (irda_queue_t**) &hashbin->hb_queue[i]); + while (1) { + queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); + + if (!queue) + break; + + if (free_func) { + if (hashbin->hb_type & HB_LOCK) + spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); + free_func(queue); + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); + } } } @@ -420,12 +422,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) hashbin->magic = ~HB_MAGIC; /* Release lock */ - if ( hashbin->hb_type & HB_LOCK) { + if (hashbin->hb_type & HB_LOCK) spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); -#ifdef CONFIG_LOCKDEP - hashbin_lock_depth--; -#endif - } /* * Free the hashbin structure patches.fixes/brcmfmac-avoid-potential-stack-overflow-in-brcmf_cfg.patch From: Arend Van Spriel Date: Mon, 5 Sep 2016 10:45:47 +0100 Subject: brcmfmac: avoid potential stack overflow in brcmf_cfg80211_start_ap() Patch-mainline: v4.8-rc7 Git-commit: ded89912156b1a47d940a0c954c43afbabd0c42c References: bsc#1004462 CVE-2016-8658 User-space can choose to omit NL80211_ATTR_SSID and only provide raw IE TLV data. When doing so it can provide SSID IE with length exceeding the allowed size. The driver further processes this IE copying it into a local variable without checking the length. Hence stack can be corrupted and used as exploit. Cc: stable@vger.kernel.org # v4.7 Reported-by: Daxing Guo Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Acked-by: Benjamin Poirier --- drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c @@ -4014,7 +4014,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wi (u8 *)&settings->beacon.head[ie_offset], settings->beacon.head_len - ie_offset, WLAN_EID_SSID); - if (!ssid_ie) + if (!ssid_ie || ssid_ie->len > IEEE80211_MAX_SSID_LEN) return -EINVAL; memcpy(ssid_le.SSID, ssid_ie->data, ssid_ie->len); patches.fixes/tty-Prevent-ldisc-drivers-from-re-using-stale-tty-fi.patch From: Peter Hurley Date: Fri, 27 Nov 2015 14:30:21 -0500 Subject: tty: Prevent ldisc drivers from re-using stale tty fields Git-commit: dd42bf1197144ede075a9d4793123f7689e164bc Patch-mainline: v4.5-rc1 References: bnc#1010507 CVE-2015-8964 Line discipline drivers may mistakenly misuse ldisc-related fields when initializing. For example, a failure to initialize tty->receive_room in the N_GIGASET_M101 line discipline was recently found and fixed [1]. Now, the N_X25 line discipline has been discovered accessing the previous line discipline's already-freed private data [2]. Harden the ldisc interface against misuse by initializing revelant tty fields before instancing the new line discipline. [1] commit fd98e9419d8d622a4de91f76b306af6aa627aa9c Author: Tilman Schmidt Date: Tue Jul 14 00:37:13 2015 +0200 isdn/gigaset: reset tty->receive_room when attaching ser_gigaset [2] Report from Sasha Levin [ 634.336761] ================================================================== [ 634.338226] BUG: KASAN: use-after-free in x25_asy_open_tty+0x13d/0x490 at addr ffff8800a743efd0 [ 634.339558] Read of size 4 by task syzkaller_execu/8981 [ 634.340359] ============================================================================= [ 634.341598] BUG kmalloc-512 (Not tainted): kasan: bad access detected ... [ 634.405018] Call Trace: [ 634.405277] dump_stack (lib/dump_stack.c:52) [ 634.405775] print_trailer (mm/slub.c:655) [ 634.406361] object_err (mm/slub.c:662) [ 634.406824] kasan_report_error (mm/kasan/report.c:138 mm/kasan/report.c:236) [ 634.409581] __asan_report_load4_noabort (mm/kasan/report.c:279) [ 634.411355] x25_asy_open_tty (drivers/net/wan/x25_asy.c:559 (discriminator 1)) [ 634.413997] tty_ldisc_open.isra.2 (drivers/tty/tty_ldisc.c:447) [ 634.414549] tty_set_ldisc (drivers/tty/tty_ldisc.c:567) [ 634.415057] tty_ioctl (drivers/tty/tty_io.c:2646 drivers/tty/tty_io.c:2879) [ 634.423524] do_vfs_ioctl (fs/ioctl.c:43 fs/ioctl.c:607) [ 634.427491] SyS_ioctl (fs/ioctl.c:622 fs/ioctl.c:613) [ 634.427945] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:188) Cc: Tilman Schmidt Cc: Sasha Levin Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jiri Slaby --- drivers/tty/tty_ldisc.c | 7 +++++++ 1 file changed, 7 insertions(+) --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -420,6 +420,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); * they are not on hot paths so a little discipline won't do * any harm. * + * The line discipline-related tty_struct fields are reset to + * prevent the ldisc driver from re-using stale information for + * the new ldisc instance. + * * Locking: takes termios_rwsem */ @@ -428,6 +432,9 @@ static void tty_set_termios_ldisc(struct down_write(&tty->termios_rwsem); tty->termios.c_line = num; up_write(&tty->termios_rwsem); + + tty->disc_data = NULL; + tty->receive_room = 0; } /** patches.fixes/tty-n_hdlc-get-rid-of-racy-n_hdlc.tbuf.patch From: Alexander Popov Date: Tue, 28 Feb 2017 19:54:40 +0300 Subject: tty: n_hdlc: get rid of racy n_hdlc.tbuf Patch-mainline: v4.11-rc2 Git-commit: 82f2341c94d270421f383641b7cd670e474db56b Git-tree: https://git.kernel.org/cgit/linux/kernel/git/gregkh/tty.git References: bnc#1027565 CVE-2017-2636 Currently N_HDLC line discipline uses a self-made singly linked list for data buffers and has n_hdlc.tbuf pointer for buffer retransmitting after an error. The commit be10eb7589337e5defbe214dae038a53dd21add8 ("tty: n_hdlc add buffer flushing") introduced racy access to n_hdlc.tbuf. After tx error concurrent flush_tx_queue() and n_hdlc_send_frames() can put one data buffer to tx_free_buf_list twice. That causes double free in n_hdlc_release(). Let's use standard kernel linked list and get rid of n_hdlc.tbuf: in case of tx error put current data buffer after the head of tx_buf_list. Signed-off-by: Alexander Popov Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jiri Slaby --- drivers/tty/n_hdlc.c | 132 ++++++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 63 deletions(-) --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -114,7 +114,7 @@ #define DEFAULT_TX_BUF_COUNT 3 struct n_hdlc_buf { - struct n_hdlc_buf *link; + struct list_head list_item; int count; char buf[1]; }; @@ -122,8 +122,7 @@ struct n_hdlc_buf { #define N_HDLC_BUF_SIZE (sizeof(struct n_hdlc_buf) + maxframe) struct n_hdlc_buf_list { - struct n_hdlc_buf *head; - struct n_hdlc_buf *tail; + struct list_head list; int count; spinlock_t spinlock; }; @@ -136,7 +135,6 @@ struct n_hdlc_buf_list { * @backup_tty - TTY to use if tty gets closed * @tbusy - reentrancy flag for tx wakeup code * @woke_up - FIXME: describe this field - * @tbuf - currently transmitting tx buffer * @tx_buf_list - list of pending transmit frame buffers * @rx_buf_list - list of received frame buffers * @tx_free_buf_list - list unused transmit frame buffers @@ -149,7 +147,6 @@ struct n_hdlc { struct tty_struct *backup_tty; int tbusy; int woke_up; - struct n_hdlc_buf *tbuf; struct n_hdlc_buf_list tx_buf_list; struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; @@ -159,6 +156,8 @@ struct n_hdlc { /* * HDLC buffer list manipulation functions */ +static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, + struct n_hdlc_buf *buf); static void n_hdlc_buf_list_init(struct n_hdlc_buf_list *list); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); @@ -209,16 +208,9 @@ static void flush_tx_queue(struct tty_st { struct n_hdlc *n_hdlc = tty2n_hdlc(tty); struct n_hdlc_buf *buf; - unsigned long flags; while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list))) n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf); - spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); - if (n_hdlc->tbuf) { - n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf); - n_hdlc->tbuf = NULL; - } - spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); } static struct tty_ldisc_ops n_hdlc_ldisc = { @@ -284,7 +276,6 @@ static void n_hdlc_release(struct n_hdlc } else break; } - kfree(n_hdlc->tbuf); kfree(n_hdlc); } /* end of n_hdlc_release() */ @@ -403,13 +394,7 @@ static void n_hdlc_send_frames(struct n_ n_hdlc->woke_up = 0; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); - /* get current transmit buffer or get new transmit */ - /* buffer from list of pending transmit buffers */ - - tbuf = n_hdlc->tbuf; - if (!tbuf) - tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); - + tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); while (tbuf) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)sending frame %p, count=%d\n", @@ -421,7 +406,7 @@ static void n_hdlc_send_frames(struct n_ /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { - n_hdlc->tbuf = tbuf; + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } /* if transmit error, throw frame away by */ @@ -436,10 +421,7 @@ static void n_hdlc_send_frames(struct n_ /* free current transmit buffer */ n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf); - - /* this tx buffer is done */ - n_hdlc->tbuf = NULL; - + /* wait up sleeping writers */ wake_up_interruptible(&tty->write_wait); @@ -449,10 +431,12 @@ static void n_hdlc_send_frames(struct n_ if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)frame %p pending\n", __FILE__,__LINE__,tbuf); - - /* buffer not accepted by driver */ - /* set this buffer as pending buffer */ - n_hdlc->tbuf = tbuf; + + /* + * the buffer was not accepted by driver, + * return it back into tx queue + */ + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } } @@ -750,7 +734,8 @@ static int n_hdlc_tty_ioctl(struct tty_s int error = 0; int count; unsigned long flags; - + struct n_hdlc_buf *buf = NULL; + if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)n_hdlc_tty_ioctl() called %d\n", __FILE__,__LINE__,cmd); @@ -764,8 +749,10 @@ static int n_hdlc_tty_ioctl(struct tty_s /* report count of read data available */ /* in next available frame (if any) */ spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags); - if (n_hdlc->rx_buf_list.head) - count = n_hdlc->rx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count = buf->count; else count = 0; spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags); @@ -777,8 +764,10 @@ static int n_hdlc_tty_ioctl(struct tty_s count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags); - if (n_hdlc->tx_buf_list.head) - count += n_hdlc->tx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count += buf->count; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags); error = put_user(count, (int __user *)arg); break; @@ -826,14 +815,14 @@ static unsigned int n_hdlc_tty_poll(stru poll_wait(filp, &tty->write_wait, wait); /* set bits for operations that won't block */ - if (n_hdlc->rx_buf_list.head) + if (!list_empty(&n_hdlc->rx_buf_list.list)) mask |= POLLIN | POLLRDNORM; /* readable */ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= POLLHUP; if (tty_hung_up_p(filp)) mask |= POLLHUP; if (!tty_is_writelocked(tty) && - n_hdlc->tx_free_buf_list.head) + !list_empty(&n_hdlc->tx_free_buf_list.list)) mask |= POLLOUT | POLLWRNORM; /* writable */ } return mask; @@ -857,7 +846,12 @@ static struct n_hdlc *n_hdlc_alloc(void) n_hdlc_buf_list_init(&n_hdlc->tx_free_buf_list); n_hdlc_buf_list_init(&n_hdlc->rx_buf_list); n_hdlc_buf_list_init(&n_hdlc->tx_buf_list); - + + INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list); + /* allocate free rx buffer list */ for(i=0;ispinlock, flags); + + list_add(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); +} + +/** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list - * @list - pointer to buffer list + * @buf_list - pointer to buffer list * @buf - pointer to buffer */ -static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, +static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; - spin_lock_irqsave(&list->spinlock,flags); - - buf->link=NULL; - if (list->tail) - list->tail->link = buf; - else - list->head = buf; - list->tail = buf; - (list->count)++; - - spin_unlock_irqrestore(&list->spinlock,flags); - + + spin_lock_irqsave(&buf_list->spinlock, flags); + + list_add_tail(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); } /* end of n_hdlc_buf_put() */ /** * n_hdlc_buf_get - remove and return an HDLC buffer from list - * @list - pointer to HDLC buffer list + * @buf_list - pointer to HDLC buffer list * * Remove and return an HDLC buffer from the head of the specified HDLC buffer * list. * Returns a pointer to HDLC buffer if available, otherwise %NULL. */ -static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list) +static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list) { unsigned long flags; struct n_hdlc_buf *buf; - spin_lock_irqsave(&list->spinlock,flags); - - buf = list->head; + + spin_lock_irqsave(&buf_list->spinlock, flags); + + buf = list_first_entry_or_null(&buf_list->list, + struct n_hdlc_buf, list_item); if (buf) { - list->head = buf->link; - (list->count)--; + list_del(&buf->list_item); + buf_list->count--; } - if (!list->head) - list->tail = NULL; - - spin_unlock_irqrestore(&list->spinlock,flags); + + spin_unlock_irqrestore(&buf_list->spinlock, flags); return buf; - } /* end of n_hdlc_buf_get() */ static char hdlc_banner[] __initdata = patches.fixes/keys-don-t-permit-request_key-to-construct-a-new-keyring From: David Howells Date: Mon, 19 Oct 2015 11:20:28 +0100 Subject: KEYS: Don't permit request_key() to construct a new keyring Git-commit: 911b79cde95c7da0ec02f48105358a36636b7a71 Patch-mainline: v4.4 or v4.3-rc7 (next release) References: CVE-2015-7872 bsc#951440 If request_key() is used to find a keyring, only do the search part - don't do the construction part if the keyring was not found by the search. We don't really want keyrings in the negative instantiated state since the rejected/negative instantiation error value in the payload is unioned with keyring metadata. Now the kernel gives an error: request_key("keyring", "#selinux,bdekeyring", "keyring", KEY_SPEC_USER_SESSION_KEYRING) = -1 EPERM (Operation not permitted) Signed-off-by: David Howells Acked-by: Jeff Mahoney --- security/keys/request_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 486ef6f..0d62531 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -440,6 +440,9 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, kenter(""); + if (ctx->index_key.type == &key_type_keyring) + return ERR_PTR(-EPERM); + user = key_user_lookup(current_fsuid()); if (!user) return ERR_PTR(-ENOMEM); patches.fixes/0001-ASN.1-Fix-non-match-detection-failure-on-data-overru.patch From 0d62e9dd6da45bbf0f33a8617afc5fe774c8f45f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Aug 2015 12:54:46 +0100 Subject: [PATCH] ASN.1: Fix non-match detection failure on data overrun Git-commit: 0d62e9dd6da45bbf0f33a8617afc5fe774c8f45f Patch-mainline: v4.3-rc1 References: bsc#963762, CVE-2016-2053 If the ASN.1 decoder is asked to parse a sequence of objects, non-optional matches get skipped if there's no more data to be had rather than a data-overrun error being reported. This is due to the code segment that decides whether to skip optional matches (ie. matches that could get ignored because an element is marked OPTIONAL in the grammar) due to a lack of data also skips non-optional elements if the data pointer has reached the end of the buffer. This can be tested with the data decoder for the new RSA akcipher algorithm that takes three non-optional integers. Currently, it skips the last integer if there is insufficient data. Without the fix, #defining DEBUG in asn1_decoder.c will show something like: next_op: pc=0/13 dp=0/270 C=0 J=0 - match? 30 30 00 - TAG: 30 266 CONS next_op: pc=2/13 dp=4/270 C=1 J=0 - match? 02 02 00 - TAG: 02 257 - LEAF: 257 next_op: pc=5/13 dp=265/270 C=1 J=0 - match? 02 02 00 - TAG: 02 3 - LEAF: 3 next_op: pc=8/13 dp=270/270 C=1 J=0 next_op: pc=11/13 dp=270/270 C=1 J=0 - end cons t=4 dp=270 l=270/270 The next_op line for pc=8/13 should be followed by a match line. This is not exploitable for X.509 certificates by means of shortening the message and fixing up the ASN.1 CONS tags because: (1) The relevant records being built up are cleared before use. (2) If the message is shortened sufficiently to remove the public key, the ASN.1 parse of the RSA key will fail quickly due to a lack of data. (3) Extracted signature data is either turned into MPIs (which cope with a 0 length) or is simpler integers specifying algoritms and suchlike (which can validly be 0); and (4) The AKID and SKID extensions are optional and their removal is handled without risking passing a NULL to asymmetric_key_generate_id(). (5) If the certificate is truncated sufficiently to remove the subject, issuer or serialNumber then the ASN.1 decoder will fail with a 'Cons stack underflow' return. This is not exploitable for PKCS#7 messages by means of removal of elements from such a message from the tail end of a sequence: (1) Any shortened X.509 certs embedded in the PKCS#7 message are survivable as detailed above. (2) The message digest content isn't used if it shows a NULL pointer, similarly, the authattrs aren't used if that shows a NULL pointer. (3) A missing signature results in a NULL MPI - which the MPI routines deal with. (4) If data is NULL, it is expected that the message has detached content and that is handled appropriately. (5) If the serialNumber is excised, the unconditional action associated with it will pick up the containing SEQUENCE instead, so no NULL pointer will be seen here. If both the issuer and the serialNumber are excised, the ASN.1 decode will fail with an 'Unexpected tag' return. In either case, there's no way to get to asymmetric_key_generate_id() with a NULL pointer. (6) Other fields are decoded to simple integers. Shortening the message to omit an algorithm ID field will cause checks on this to fail early in the verification process. This can also be tested by snipping objects off of the end of the ASN.1 stream such that mandatory tags are removed - or even from the end of internal SEQUENCEs. If any mandatory tag is missing, the error EBADMSG *should* be produced. Without this patch ERANGE or ENOPKG might be produced or the parse may apparently succeed, perhaps with ENOKEY or EKEYREJECTED being produced later, depending on what gets snipped. Just snipping off the final BIT_STRING or OCTET_STRING from either sample should be a start since both are mandatory and neither will cause an EBADMSG without the patches Reported-by: Marcel Holtmann Signed-off-by: David Howells Tested-by: Marcel Holtmann Reviewed-by: David Woodhouse Acked-by: Lee, Chun-Yi --- lib/asn1_decoder.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -208,9 +208,8 @@ next_op: unsigned char tmp; /* Skip conditional matches if possible */ - if ((op & ASN1_OP_MATCH__COND && - flags & FLAG_MATCHED) || - dp == datalen) { + if ((op & ASN1_OP_MATCH__COND && flags & FLAG_MATCHED) || + (op & ASN1_OP_MATCH__SKIP && dp == datalen)) { pc += asn1_op_lengths[op]; goto next_op; } patches.fixes/0001-KEYS-Fix-ASN.1-indefinite-length-object-parsing.patch From 23c8a812dc3c621009e4f0e5342aa4e2ede1ceaa Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Feb 2016 11:03:12 +0000 Subject: [PATCH] KEYS: Fix ASN.1 indefinite length object parsing Git-commit: 23c8a812dc3c621009e4f0e5342aa4e2ede1ceaa Patch-mainline: v4.6 References: bsc#979867, CVE-2016-0758 This fixes CVE-2016-0758. In the ASN.1 decoder, when the length field of an ASN.1 value is extracted, it isn't validated against the remaining amount of data before being added to the cursor. With a sufficiently large size indicated, the check: datalen - dp < 2 may then fail due to integer overflow. Fix this by checking the length indicated against the amount of remaining data in both places a definite length is determined. Whilst we're at it, make the following changes: (1) Check the maximum size of extended length does not exceed the capacity of the variable it's being stored in (len) rather than the type that variable is assumed to be (size_t). (2) Compare the EOC tag to the symbolic constant ASN1_EOC rather than the integer 0. (3) To reduce confusion, move the initialisation of len outside of: for (len = 0; n > 0; n--) { since it doesn't have anything to do with the loop counter n. Signed-off-by: David Howells Reviewed-by: Mimi Zohar Acked-by: David Woodhouse Acked-by: Peter Jones Acked-by: Lee, Chun-Yi --- lib/asn1_decoder.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -69,7 +69,7 @@ next_tag: /* Extract a tag from the data */ tag = data[dp++]; - if (tag == 0) { + if (tag == ASN1_EOC) { /* It appears to be an EOC. */ if (data[dp++] != 0) goto invalid_eoc; @@ -91,10 +91,8 @@ next_tag: /* Extract the length */ len = data[dp++]; - if (len <= 0x7f) { - dp += len; - goto next_tag; - } + if (len <= 0x7f) + goto check_length; if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ @@ -105,14 +103,18 @@ next_tag: } n = len - 0x80; - if (unlikely(n > sizeof(size_t) - 1)) + if (unlikely(n > sizeof(len) - 1)) goto length_too_long; if (unlikely(n > datalen - dp)) goto data_overrun_error; - for (len = 0; n > 0; n--) { + len = 0; + for (; n > 0; n--) { len <<= 8; len |= data[dp++]; } +check_length: + if (len > datalen - dp) + goto data_overrun_error; dp += len; goto next_tag; patches.fixes/crypto-algif_hash-Only-export-and-import-on-sockets- From 4afa5f9617927453ac04b24b584f6c718dfb4f45 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 1 Nov 2015 17:11:19 +0800 Subject: [PATCH] crypto: algif_hash - Only export and import on sockets with data Git-commit: 4afa5f9617927453ac04b24b584f6c718dfb4f45 Patch-mainline: 4.4-rc1 References: CVE-2016-8646,bsc#1010150 The hash_accept call fails to work on sockets that have not received any data. For some algorithm implementations it may cause crashes. This patch fixes this by ensuring that we only export and import on sockets that have received data. Cc: stable@vger.kernel.org Reported-by: Harsh Jain Signed-off-by: Herbert Xu Tested-by: Stephan Mueller Acked-by: Takashi Iwai --- crypto/algif_hash.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 1396ad0787fc..b4c24fe3dcfb 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -181,9 +181,14 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; + bool more; int err; - err = crypto_ahash_export(req, state); + lock_sock(sk); + more = ctx->more; + err = more ? crypto_ahash_export(req, state) : 0; + release_sock(sk); + if (err) return err; @@ -194,7 +199,10 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) sk2 = newsock->sk; ask2 = alg_sk(sk2); ctx2 = ask2->private; - ctx2->more = 1; + ctx2->more = more; + + if (!more) + return err; err = crypto_ahash_import(&ctx2->req, state); if (err) { -- 2.10.2 patches.fixes/crypto-mcryptd-Check-mcryptd-algorithm-compatibility From 48a992727d82cb7db076fa15d372178743b1f4cd Mon Sep 17 00:00:00 2001 From: tim Date: Mon, 5 Dec 2016 11:46:31 -0800 Subject: [PATCH] crypto: mcryptd - Check mcryptd algorithm compatibility Git-commit: 48a992727d82cb7db076fa15d372178743b1f4cd Patch-mainline: 4.9 References: CVE-2016-10147 bsc#1020381 Algorithms not compatible with mcryptd could be spawned by mcryptd with a direct crypto_alloc_tfm invocation using a "mcryptd(alg)" name construct. This causes mcryptd to crash the kernel if an arbitrary "alg" is incompatible and not intended to be used with mcryptd. It is an issue if AF_ALG tries to spawn mcryptd(alg) to expose it externally. But such algorithms must be used internally and not be exposed. We added a check to enforce that only internal algorithms are allowed with mcryptd at the time mcryptd is spawning an algorithm. Link: http://marc.info/?l=linux-crypto-vger&m=148063683310477&w=2 Cc: stable@vger.kernel.org Reported-by: Mikulas Patocka Signed-off-by: Tim Chen Signed-off-by: Herbert Xu Acked-by: Takashi Iwai --- crypto/mcryptd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -258,18 +258,22 @@ out_free_inst: goto out; } -static inline void mcryptd_check_internal(struct rtattr **tb, u32 *type, +static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type, u32 *mask) { struct crypto_attr_type *algt; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) - return; - if ((algt->type & CRYPTO_ALG_INTERNAL)) - *type |= CRYPTO_ALG_INTERNAL; - if ((algt->mask & CRYPTO_ALG_INTERNAL)) - *mask |= CRYPTO_ALG_INTERNAL; + return false; + + *type |= algt->type & CRYPTO_ALG_INTERNAL; + *mask |= algt->mask & CRYPTO_ALG_INTERNAL; + + if (*type & *mask & CRYPTO_ALG_INTERNAL) + return true; + else + return false; } static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) @@ -498,7 +502,8 @@ static int mcryptd_create_hash(struct cr u32 mask = 0; int err; - mcryptd_check_internal(tb, &type, &mask); + if (!mcryptd_check_internal(tb, &type, &mask)) + return -EINVAL; salg = shash_attr_alg(tb[1], type, mask); if (IS_ERR(salg)) patches.fixes/0001-mpi-Fix-NULL-ptr-dereference-in-mpi_powm-ver-3.patch From f5527fffff3f002b0a6b376163613b82f69de073 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 24 Nov 2016 13:23:10 +0000 Subject: [PATCH] mpi: Fix NULL ptr dereference in mpi_powm() [ver #3] Git-commit: f5527fffff3f002b0a6b376163613b82f69de073 Patch-mainline: v4.9-rc7 References: bsc#1011820 This fixes CVE-2016-8650. If mpi_powm() is given a zero exponent, it wants to immediately return either 1 or 0, depending on the modulus. However, if the result was initalised with zero limb space, no limbs space is allocated and a NULL-pointer exception ensues. Fix this by allocating a minimal amount of limb space for the result when the 0-exponent case when the result is 1 and not touching the limb space when the result is 0. This affects the use of RSA keys and X.509 certificates that carry them. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] mpi_powm+0x32/0x7e6 PGD 0 Oops: 0002 [#1] SMP Modules linked in: CPU: 3 PID: 3014 Comm: keyctl Not tainted 4.9.0-rc6-fscache+ #278 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff8804011944c0 task.stack: ffff880401294000 RIP: 0010:[] [] mpi_powm+0x32/0x7e6 RSP: 0018:ffff880401297ad8 EFLAGS: 00010212 RAX: 0000000000000000 RBX: ffff88040868bec0 RCX: ffff88040868bba0 RDX: ffff88040868b260 RSI: ffff88040868bec0 RDI: ffff88040868bee0 RBP: ffff880401297ba8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000047 R11: ffffffff8183b210 R12: 0000000000000000 R13: ffff8804087c7600 R14: 000000000000001f R15: ffff880401297c50 FS: 00007f7a7918c700(0000) GS:ffff88041fb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000401250000 CR4: 00000000001406e0 Stack: ffff88040868bec0 0000000000000020 ffff880401297b00 ffffffff81376cd4 0000000000000100 ffff880401297b10 ffffffff81376d12 ffff880401297b30 ffffffff81376f37 0000000000000100 0000000000000000 ffff880401297ba8 Call Trace: [] ? __sg_page_iter_next+0x43/0x66 [] ? sg_miter_get_next_page+0x1b/0x5d [] ? sg_miter_next+0x17/0xbd [] ? mpi_read_raw_from_sgl+0xf2/0x146 [] rsa_verify+0x9d/0xee [] ? pkcs1pad_sg_set_buf+0x2e/0xbb [] pkcs1pad_verify+0xc0/0xe1 [] public_key_verify_signature+0x1b0/0x228 [] x509_check_for_self_signed+0xa1/0xc4 [] x509_cert_parse+0x167/0x1a1 [] x509_key_preparse+0x21/0x1a1 [] asymmetric_key_preparse+0x34/0x61 [] key_create_or_update+0x145/0x399 [] SyS_add_key+0x154/0x19e [] do_syscall_64+0x80/0x191 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: 56 41 55 41 54 53 48 81 ec a8 00 00 00 44 8b 71 04 8b 42 04 4c 8b 67 18 45 85 f6 89 45 80 0f 84 b4 06 00 00 85 c0 75 2f 41 ff ce <49> c7 04 24 01 00 00 00 b0 01 75 0b 48 8b 41 18 48 83 38 01 0f RIP [] mpi_powm+0x32/0x7e6 RSP CR2: 0000000000000000 ---[ end trace d82015255d4a5d8d ]--- Basically, this is a backport of a libgcrypt patch: http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=patch;h=6e1adb05d290aeeb1c230c763970695f4a538526 Fixes: cdec9cb5167a ("crypto: GnuPG based MPI lib - source files (part 1)") Signed-off-by: Andrey Ryabinin Signed-off-by: David Howells cc: Dmitry Kasatkin cc: linux-ima-devel@lists.sourceforge.net cc: stable@vger.kernel.org Signed-off-by: James Morris Acked-by: Lee, Chun-Yi --- lib/mpi/mpi-pow.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -64,8 +64,13 @@ int mpi_powm(MPI res, MPI base, MPI exp, if (!esize) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 * depending on if MOD equals 1. */ - rp[0] = 1; res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + if (res->nlimbs) { + if (mpi_resize(res, 1) < 0) + goto enomem; + rp = res->d; + rp[0] = 1; + } res->sign = 0; goto leave; } patches.fixes/perf-fix-race-in-swevent-hash.patch From: Peter Zijlstra Date: Tue Dec 15 13:49:05 2015 +0100 Subject: perf: Fix race in swevent hash Git-commit: 12ca6ad2e3a896256f086497a7c7406a547ee373 Patch-mainline: v4.4 References: bsc#1010502, CVE-2015-8963 Signed-off-by: Tony Jones perf: Fix race in swevent hash There's a race on CPU unplug where we free the swevent hash array while it can still have events on. This will result in a use-after-free which is BAD. Simply do not free the hash array on unplug. This leaves the thing around and no use-after-free takes place. When the last swevent dies, we do a for_each_possible_cpu() iteration anyway to clean these up, at which time we'll free it, so no leakage will occur. Reported-by: Sasha Levin Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index fd7de04..0a791a2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6488,9 +6488,6 @@ struct swevent_htable { /* Recursion avoidance in each contexts */ int recursion[PERF_NR_CONTEXTS]; - - /* Keeps track of cpu being initialized/exited */ - bool online; }; static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); @@ -6748,14 +6745,8 @@ static int perf_swevent_add(struct perf_event *event, int flags) hwc->state = !(flags & PERF_EF_START); head = find_swevent_head(swhash, event); - if (!head) { - /* - * We can race with cpu hotplug code. Do not - * WARN if the cpu just got unplugged. - */ - WARN_ON_ONCE(swhash->online); + if (WARN_ON_ONCE(!head)) return -EINVAL; - } hlist_add_head_rcu(&event->hlist_entry, head); perf_event_update_userpage(event); @@ -6823,7 +6814,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) int err = 0; mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { struct swevent_hlist *hlist; @@ -9286,7 +9276,6 @@ static void perf_event_init_cpu(int cpu) struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); - swhash->online = true; if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; @@ -9328,14 +9317,7 @@ static void perf_event_exit_cpu_context(int cpu) static void perf_event_exit_cpu(int cpu) { - struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - perf_event_exit_cpu_context(cpu); - - mutex_lock(&swhash->hlist_mutex); - swhash->online = false; - swevent_hlist_release(swhash); - mutex_unlock(&swhash->hlist_mutex); } #else static inline void perf_event_exit_cpu(int cpu) { } patches.fixes/kvm-x86-Check-memopp-before-dereference-CVE-2016-863 From d9092f52d7e61dd1557f2db2400ddb430e85937e Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Thu, 27 Oct 2016 11:25:52 -0700 Subject: [PATCH] kvm: x86: Check memopp before dereference (CVE-2016-8630) Git-commit: d9092f52d7e61dd1557f2db2400ddb430e85937e Patch-mainline: 4.9-rc4 References: CVE-2016-8630,bsc#1009222 Commit 41061cdb98 ("KVM: emulate: do not initialize memopp") removes a check for non-NULL under incorrect assumptions. An undefined instruction with a ModR/M byte with Mod=0 and R/M-5 (e.g. 0xc7 0x15) will attempt to dereference a null pointer here. Fixes: 41061cdb98a0bec464278b4db8e894a3121671f5 Message-id: <1477592752-126650-2-git-send-email-osh@google.com> Signed-off-by: Owen Hofmann Signed-off-by: Paolo Bonzini Acked-by: Takashi Iwai --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4732,7 +4732,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); - if (ctxt->rip_relative) + if (ctxt->rip_relative && likely(ctxt->memopp)) ctxt->memopp->addr.mem.ea = address_mask(ctxt, ctxt->memopp->addr.mem.ea + ctxt->_eip); patches.fixes/kvm-x86-drop-error-recovery-in-em_jmp_far-and-em_ret_far.patch From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 23 Nov 2016 21:15:00 +0100 Subject: KVM: x86: drop error recovery in em_jmp_far and em_ret_far MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Git-commit: 2117d5398c81554fbf803f5fd1dc55eb78216c0c Patch-mainline: v4.9-rc7 References: bsc#1013038 CVE-2016-9756 em_jmp_far and em_ret_far assumed that setting IP can only fail in 64 bit mode, but syzkaller proved otherwise (and SDM agrees). Code segment was restored upon failure, but it was left uninitialized outside of long mode, which could lead to a leak of host kernel stack. We could have fixed that by always saving and restoring the CS, but we take a simpler approach and just break any guest that manages to fail as the error recovery is error-prone and modern CPUs don't need emulator for this. Found by syzkaller: WARNING: CPU: 2 PID: 3668 at arch/x86/kvm/emulate.c:2217 em_ret_far+0x428/0x480 Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 3668 Comm: syz-executor Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [...] Call Trace: [...] __dump_stack lib/dump_stack.c:15 [...] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [...] panic+0x1b7/0x3a3 kernel/panic.c:179 [...] __warn+0x1c4/0x1e0 kernel/panic.c:542 [...] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [...] em_ret_far+0x428/0x480 arch/x86/kvm/emulate.c:2217 [...] em_ret_far_imm+0x17/0x70 arch/x86/kvm/emulate.c:2227 [...] x86_emulate_insn+0x87a/0x3730 arch/x86/kvm/emulate.c:5294 [...] x86_emulate_instruction+0x520/0x1ba0 arch/x86/kvm/x86.c:5545 [...] emulate_instruction arch/x86/include/asm/kvm_host.h:1116 [...] complete_emulated_io arch/x86/kvm/x86.c:6870 [...] complete_emulated_mmio+0x4e9/0x710 arch/x86/kvm/x86.c:6934 [...] kvm_arch_vcpu_ioctl_run+0x3b7a/0x5a90 arch/x86/kvm/x86.c:6978 [...] kvm_vcpu_ioctl+0x61e/0xdd0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2557 [...] vfs_ioctl fs/ioctl.c:43 [...] do_vfs_ioctl+0x18c/0x1040 fs/ioctl.c:679 [...] SYSC_ioctl fs/ioctl.c:694 [...] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685 [...] entry_SYSCALL_64_fastpath+0x1f/0xc2 Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: d1442d85cc30 ("KVM: x86: Handle errors when RIP is set during far jumps") Signed-off-by: Radim Krčmář Acked-by: Borislav Petkov --- arch/x86/kvm/emulate.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2096,16 +2096,10 @@ static int em_iret(struct x86_emulate_ct static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel, old_sel; - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; + unsigned short sel; + struct desc_struct new_desc; u8 cpl = ctxt->ops->cpl(ctxt); - /* Assignment of RIP may only fail in 64-bit mode */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_sel, &old_desc, NULL, - VCPU_SREG_CS); - memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, @@ -2115,12 +2109,10 @@ static int em_jmp_far(struct x86_emulate return rc; rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - /* assigning eip failed; restore the old cs */ - ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); - return rc; - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } @@ -2180,14 +2172,8 @@ static int em_ret_far(struct x86_emulate { int rc; unsigned long eip, cs; - u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; - - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_cs, &old_desc, NULL, - VCPU_SREG_CS); + struct desc_struct new_desc; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2204,10 +2190,10 @@ static int em_ret_far(struct x86_emulate if (rc != X86EMUL_CONTINUE) return rc; rc = assign_eip_far(ctxt, eip, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } patches.fixes/kvm-x86-introduce-segmented_write_std From: Steve Rutherford Date: Wed, 11 Jan 2017 18:28:29 -0800 Subject: KVM: x86: Introduce segmented_write_std Git-commit: 129a72a0d3c8e139a04512325384fe5ac119e74d Patch-mainline: v4.10-rc4 References: CVE-2017-2584 bsc#1019851 Introduces segemented_write_std. Switches from emulated reads/writes to standard read/writes in fxsave, fxrstor, sgdt, and sidt. This fixes CVE-2017-2584, a longstanding kernel memory leak. Since commit 283c95d0e389 ("KVM: x86: emulate FXSAVE and FXRSTOR", 2016-11-09), which is luckily not yet in any final release, this would also be an exploitable kernel memory *write*! Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: 96051572c819194c37a8367624b285be10297eca Fixes: 283c95d0e3891b64087706b344a4b545d04a6e62 Suggested-by: Paolo Bonzini Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini Acked-by: Joerg Roedel --- arch/x86/kvm/emulate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -782,6 +782,20 @@ static int segmented_read_std(struct x86 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } +static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned int size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); +} + /* * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. @@ -3345,8 +3359,8 @@ static int emulate_store_desc_ptr(struct } /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return segmented_write(ctxt, ctxt->dst.addr.mem, - &desc_ptr, 2 + ctxt->op_bytes); + return segmented_write_std(ctxt, ctxt->dst.addr.mem, + &desc_ptr, 2 + ctxt->op_bytes); } static int em_sgdt(struct x86_emulate_ctxt *ctxt) patches.fixes/kvm-x86-fix-emulation-of-mov-ss-null-selector From: Paolo Bonzini Date: Thu, 12 Jan 2017 15:02:32 +0100 Subject: KVM: x86: fix emulation of "MOV SS, null selector" Git-commit: 33ab91103b3415e12457e3104f0e4517ce12d0f3 Patch-mainline: v4.10-rc4 References: CVE-2017-2583 bsc#1020602 This is CVE-2017-2583. On Intel this causes a failed vmentry because SS's type is neither 3 nor 7 (even though the manual says this check is only done for usable SS, and the dmesg splat says that SS is unusable!). On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb. The fix fabricates a data segment descriptor when SS is set to a null selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb. Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3; this in turn ensures CPL < 3 because RPL must be equal to CPL. Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing the bug and deciphering the manuals. Reported-by: Xiaohan Zhang Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011 Cc: stable@nongnu.org Signed-off-by: Paolo Bonzini Acked-by: Joerg Roedel --- arch/x86/kvm/emulate.c | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1549,7 +1549,6 @@ static int write_segment_descriptor(stru &ctxt->exception); } -/* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1586,20 +1585,34 @@ static int __load_segment_descriptor(str rpl = selector & 3; - /* NULL selector is not valid for TR, CS and SS (except for long mode) */ - if ((seg == VCPU_SREG_CS - || (seg == VCPU_SREG_SS - && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) - || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - /* TR should be in GDT only */ if (seg == VCPU_SREG_TR && (selector & (1 << 2))) goto exception; - if (null_selector) /* for NULL selector skip all following checks */ + /* NULL selector is not valid for TR, CS and (except for long mode) SS */ + if (null_selector) { + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) + goto exception; + + if (seg == VCPU_SREG_SS) { + if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) + goto exception; + + /* + * ctxt->ops->set_segment expects the CPL to be in + * SS.DPL, so fake an expand-up 32-bit data segment. + */ + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = cpl; + seg_desc.d = 1; + seg_desc.g = 1; + } + + /* Skip all following checks */ goto load; + } ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); if (ret != X86EMUL_CONTINUE) @@ -1715,6 +1728,21 @@ static int load_segment_descriptor(struc u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); + + /* + * None of MOV, POP and LSS can load a NULL selector in CPL=3, but + * they can load it at CPL<3 (Intel's manual says only LSS can, + * but it's wrong). + * + * However, the Intel manual says that putting IST=1/DPL=3 in + * an interrupt gate will result in SS=3 (the AMD manual instead + * says it doesn't), so allow SS=3 in __load_segment_descriptor + * and only forbid it here. + */ + if (seg == VCPU_SREG_SS && selector == 3 && + ctxt->mode == X86EMUL_MODE_PROT64) + return emulate_exception(ctxt, GP_VECTOR, 0, true); + return __load_segment_descriptor(ctxt, selector, seg, cpl, X86_TRANSFER_NONE, NULL); }