exit
fi
done
+From e820d55cb99dd93ac2dc949cf486bb187e5cd70d Mon Sep 17 00:00:00 2001
+From: Guoqing Jiang <gqjiang@suse.com>
+Date: Wed, 19 Dec 2018 14:19:25 +0800
+Subject: md: fix raid10 hang issue caused by barrier
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
-From 84ac7260236a49c79eede91617700174c2c19b0c Mon Sep 17 00:00:00 2001
-From: Philip Pettersson <philip.pettersson@gmail.com>
-Date: Wed, 30 Nov 2016 14:55:36 -0800
-Subject: packet: fix race condition in packet_set_ring
+When both regular IO and resync IO happen at the same time,
+and if we also need to split regular. Then we can see tasks
+hang due to barrier.
-When packet_set_ring creates a ring buffer it will initialize a
-struct timer_list if the packet version is TPACKET_V3. This value
-can then be raced by a different thread calling setsockopt to
-set the version to TPACKET_V1 before packet_set_ring has finished.
+1. resync thread
+[ 1463.757205] INFO: task md1_resync:5215 blocked for more than 480 seconds.
+[ 1463.757207] Not tainted 4.19.5-1-default #1
+[ 1463.757209] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+[ 1463.757212] md1_resync D 0 5215 2 0x80000000
+[ 1463.757216] Call Trace:
+[ 1463.757223] ? __schedule+0x29a/0x880
+[ 1463.757231] ? raise_barrier+0x8d/0x140 [raid10]
+[ 1463.757236] schedule+0x78/0x110
+[ 1463.757243] raise_barrier+0x8d/0x140 [raid10]
+[ 1463.757248] ? wait_woken+0x80/0x80
+[ 1463.757257] raid10_sync_request+0x1f6/0x1e30 [raid10]
+[ 1463.757265] ? _raw_spin_unlock_irq+0x22/0x40
+[ 1463.757284] ? is_mddev_idle+0x125/0x137 [md_mod]
+[ 1463.757302] md_do_sync.cold.78+0x404/0x969 [md_mod]
+[ 1463.757311] ? wait_woken+0x80/0x80
+[ 1463.757336] ? md_rdev_init+0xb0/0xb0 [md_mod]
+[ 1463.757351] md_thread+0xe9/0x140 [md_mod]
+[ 1463.757358] ? _raw_spin_unlock_irqrestore+0x2e/0x60
+[ 1463.757364] ? __kthread_parkme+0x4c/0x70
+[ 1463.757369] kthread+0x112/0x130
+[ 1463.757374] ? kthread_create_worker_on_cpu+0x40/0x40
+[ 1463.757380] ret_from_fork+0x3a/0x50
-This leads to a use-after-free on a function pointer in the
-struct timer_list when the socket is closed as the previously
-initialized timer will not be deleted.
+2. regular IO
+[ 1463.760679] INFO: task kworker/0:8:5367 blocked for more than 480 seconds.
+[ 1463.760683] Not tainted 4.19.5-1-default #1
+[ 1463.760684] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+[ 1463.760687] kworker/0:8 D 0 5367 2 0x80000000
+[ 1463.760718] Workqueue: md submit_flushes [md_mod]
+[ 1463.760721] Call Trace:
+[ 1463.760731] ? __schedule+0x29a/0x880
+[ 1463.760741] ? wait_barrier+0xdd/0x170 [raid10]
+[ 1463.760746] schedule+0x78/0x110
+[ 1463.760753] wait_barrier+0xdd/0x170 [raid10]
+[ 1463.760761] ? wait_woken+0x80/0x80
+[ 1463.760768] raid10_write_request+0xf2/0x900 [raid10]
+[ 1463.760774] ? wait_woken+0x80/0x80
+[ 1463.760778] ? mempool_alloc+0x55/0x160
+[ 1463.760795] ? md_write_start+0xa9/0x270 [md_mod]
+[ 1463.760801] ? try_to_wake_up+0x44/0x470
+[ 1463.760810] raid10_make_request+0xc1/0x120 [raid10]
+[ 1463.760816] ? wait_woken+0x80/0x80
+[ 1463.760831] md_handle_request+0x121/0x190 [md_mod]
+[ 1463.760851] md_make_request+0x78/0x190 [md_mod]
+[ 1463.760860] generic_make_request+0x1c6/0x470
+[ 1463.760870] raid10_write_request+0x77a/0x900 [raid10]
+[ 1463.760875] ? wait_woken+0x80/0x80
+[ 1463.760879] ? mempool_alloc+0x55/0x160
+[ 1463.760895] ? md_write_start+0xa9/0x270 [md_mod]
+[ 1463.760904] raid10_make_request+0xc1/0x120 [raid10]
+[ 1463.760910] ? wait_woken+0x80/0x80
+[ 1463.760926] md_handle_request+0x121/0x190 [md_mod]
+[ 1463.760931] ? _raw_spin_unlock_irq+0x22/0x40
+[ 1463.760936] ? finish_task_switch+0x74/0x260
+[ 1463.760954] submit_flushes+0x21/0x40 [md_mod]
-The bug is fixed by taking lock_sock(sk) in packet_setsockopt when
-changing the packet version while also taking the lock at the start
-of packet_set_ring.
+So resync io is waiting for regular write io to complete to
+decrease nr_pending (conf->barrier++ is called before waiting).
+The regular write io splits another bio after call wait_barrier
+which call nr_pending++, then the splitted bio would continue
+with raid10_write_request -> wait_barrier, so the splitted bio
+has to wait for barrier to be zero, then deadlock happens as
+follows.
-Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
-Signed-off-by: Philip Pettersson <philip.pettersson@gmail.com>
-Signed-off-by: Eric Dumazet <edumazet@google.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
+ resync io regular io
+
+ raise_barrier
+ wait_barrier
+ generic_make_request
+ wait_barrier
+
+To resolve the issue, we need to call allow_barrier to decrease
+nr_pending before generic_make_request since regular IO is not
+issued to underlying devices, and wait_barrier is called again
+to ensure no internal IO happening.
+
+Fixes: fc9977dd069e ("md/raid10: simplify the splitting of requests.")
+Reported-and-tested-by: SiniĊĦa Bandin <sinisa@4net.rs>
+Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
---
- net/packet/af_packet.c | 18 ++++++++++++------
- 1 file changed, 12 insertions(+), 6 deletions(-)
+ drivers/md/raid10.c | 4 ++++
+ 1 file changed, 4 insertions(+)
-diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
-index d2238b2..dd23323 100644
---- a/net/packet/af_packet.c
-+++ b/net/packet/af_packet.c
-@@ -3648,19 +3648,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
-
- if (optlen != sizeof(val))
- return -EINVAL;
-- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-- return -EBUSY;
- if (copy_from_user(&val, optval, sizeof(val)))
- return -EFAULT;
- switch (val) {
- case TPACKET_V1:
- case TPACKET_V2:
- case TPACKET_V3:
-- po->tp_version = val;
-- return 0;
-+ break;
- default:
- return -EINVAL;
- }
-+ lock_sock(sk);
-+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
-+ ret = -EBUSY;
-+ } else {
-+ po->tp_version = val;
-+ ret = 0;
-+ }
-+ release_sock(sk);
-+ return ret;
- }
- case PACKET_RESERVE:
- {
-@@ -4164,6 +4170,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
- /* Added to avoid minimal code churn */
- struct tpacket_req *req = &req_u->req;
-
-+ lock_sock(sk);
- /* Opening a Tx-ring is NOT supported in TPACKET_V3 */
- if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
- net_warn_ratelimited("Tx-ring is not supported.\n");
-@@ -4245,7 +4252,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
- goto out;
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 76c92e31afc0..abb5d382f64d 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1209,7 +1209,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+ struct bio *split = bio_split(bio, max_sectors,
+ gfp, &conf->bio_split);
+ bio_chain(split, bio);
++ allow_barrier(conf);
+ generic_make_request(bio);
++ wait_barrier(conf);
+ bio = split;
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = max_sectors;
+@@ -1492,7 +1494,9 @@ retry_write:
+ struct bio *split = bio_split(bio, r10_bio->sectors,
+ GFP_NOIO, &conf->bio_split);
+ bio_chain(split, bio);
++ allow_barrier(conf);
+ generic_make_request(bio);
++ wait_barrier(conf);
+ bio = split;
+ r10_bio->master_bio = bio;
}
-
-- lock_sock(sk);
-
- /* Detach socket from network */
- spin_lock(&po->bind_lock);
-@@ -4294,11 +4300,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
- if (!tx_ring)
- prb_shutdown_retire_blk_timer(po, rb_queue);
- }
-- release_sock(sk);
-
- if (pg_vec)
- free_pg_vec(pg_vec, order, req->tp_block_nr);
- out:
-+ release_sock(sk);
- return err;
- }
-
--
-cgit v0.12
+cgit 1.2-0.3.lf.el7
+--- linux-4.14/security/selinux/include/classmap.h 2017-11-12 19:46:13.000000000 +0100
++++ linux-4.20/security/selinux/include/classmap.h 2018-12-24 00:55:59.000000000 +0100
+@@ -238,9 +238,11 @@
+ { "access", NULL } },
+ { "infiniband_endport",
+ { "manage_subnet", NULL } },
++ { "xdp_socket",
++ { COMMON_SOCK_PERMS, NULL } },
+ { NULL }
+ };
+
+-#if PF_MAX > 44
++#if PF_MAX > 45
+ #error New address family defined, please update secclass_map.
+ #endif