From: Jan Rękorajski Date: Wed, 13 Feb 2019 07:40:36 +0000 (+0100) Subject: - 4.20.8 X-Git-Tag: auto/th/kernel-4.20.8-1 X-Git-Url: http://git.pld-linux.org/?p=packages%2Fkernel.git;a=commitdiff_plain;h=025d656026163bb2c81c59dd06542a20f2586b83 - 4.20.8 --- diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch index 1398e42d..b97c7aaf 100644 --- a/kernel-small_fixes.patch +++ b/kernel-small_fixes.patch @@ -38,128 +38,3 @@ index 584cf2d..aea3bc2 100644 /* version 5 superblocks support inode version counters. */ if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) -From e820d55cb99dd93ac2dc949cf486bb187e5cd70d Mon Sep 17 00:00:00 2001 -From: Guoqing Jiang -Date: Wed, 19 Dec 2018 14:19:25 +0800 -Subject: md: fix raid10 hang issue caused by barrier -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When both regular IO and resync IO happen at the same time, -and if we also need to split regular. Then we can see tasks -hang due to barrier. - -1. resync thread -[ 1463.757205] INFO: task md1_resync:5215 blocked for more than 480 seconds. -[ 1463.757207] Not tainted 4.19.5-1-default #1 -[ 1463.757209] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -[ 1463.757212] md1_resync D 0 5215 2 0x80000000 -[ 1463.757216] Call Trace: -[ 1463.757223] ? __schedule+0x29a/0x880 -[ 1463.757231] ? raise_barrier+0x8d/0x140 [raid10] -[ 1463.757236] schedule+0x78/0x110 -[ 1463.757243] raise_barrier+0x8d/0x140 [raid10] -[ 1463.757248] ? wait_woken+0x80/0x80 -[ 1463.757257] raid10_sync_request+0x1f6/0x1e30 [raid10] -[ 1463.757265] ? _raw_spin_unlock_irq+0x22/0x40 -[ 1463.757284] ? is_mddev_idle+0x125/0x137 [md_mod] -[ 1463.757302] md_do_sync.cold.78+0x404/0x969 [md_mod] -[ 1463.757311] ? wait_woken+0x80/0x80 -[ 1463.757336] ? md_rdev_init+0xb0/0xb0 [md_mod] -[ 1463.757351] md_thread+0xe9/0x140 [md_mod] -[ 1463.757358] ? _raw_spin_unlock_irqrestore+0x2e/0x60 -[ 1463.757364] ? __kthread_parkme+0x4c/0x70 -[ 1463.757369] kthread+0x112/0x130 -[ 1463.757374] ? kthread_create_worker_on_cpu+0x40/0x40 -[ 1463.757380] ret_from_fork+0x3a/0x50 - -2. regular IO -[ 1463.760679] INFO: task kworker/0:8:5367 blocked for more than 480 seconds. -[ 1463.760683] Not tainted 4.19.5-1-default #1 -[ 1463.760684] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -[ 1463.760687] kworker/0:8 D 0 5367 2 0x80000000 -[ 1463.760718] Workqueue: md submit_flushes [md_mod] -[ 1463.760721] Call Trace: -[ 1463.760731] ? __schedule+0x29a/0x880 -[ 1463.760741] ? wait_barrier+0xdd/0x170 [raid10] -[ 1463.760746] schedule+0x78/0x110 -[ 1463.760753] wait_barrier+0xdd/0x170 [raid10] -[ 1463.760761] ? wait_woken+0x80/0x80 -[ 1463.760768] raid10_write_request+0xf2/0x900 [raid10] -[ 1463.760774] ? wait_woken+0x80/0x80 -[ 1463.760778] ? mempool_alloc+0x55/0x160 -[ 1463.760795] ? md_write_start+0xa9/0x270 [md_mod] -[ 1463.760801] ? try_to_wake_up+0x44/0x470 -[ 1463.760810] raid10_make_request+0xc1/0x120 [raid10] -[ 1463.760816] ? wait_woken+0x80/0x80 -[ 1463.760831] md_handle_request+0x121/0x190 [md_mod] -[ 1463.760851] md_make_request+0x78/0x190 [md_mod] -[ 1463.760860] generic_make_request+0x1c6/0x470 -[ 1463.760870] raid10_write_request+0x77a/0x900 [raid10] -[ 1463.760875] ? wait_woken+0x80/0x80 -[ 1463.760879] ? mempool_alloc+0x55/0x160 -[ 1463.760895] ? md_write_start+0xa9/0x270 [md_mod] -[ 1463.760904] raid10_make_request+0xc1/0x120 [raid10] -[ 1463.760910] ? wait_woken+0x80/0x80 -[ 1463.760926] md_handle_request+0x121/0x190 [md_mod] -[ 1463.760931] ? _raw_spin_unlock_irq+0x22/0x40 -[ 1463.760936] ? finish_task_switch+0x74/0x260 -[ 1463.760954] submit_flushes+0x21/0x40 [md_mod] - -So resync io is waiting for regular write io to complete to -decrease nr_pending (conf->barrier++ is called before waiting). -The regular write io splits another bio after call wait_barrier -which call nr_pending++, then the splitted bio would continue -with raid10_write_request -> wait_barrier, so the splitted bio -has to wait for barrier to be zero, then deadlock happens as -follows. - - resync io regular io - - raise_barrier - wait_barrier - generic_make_request - wait_barrier - -To resolve the issue, we need to call allow_barrier to decrease -nr_pending before generic_make_request since regular IO is not -issued to underlying devices, and wait_barrier is called again -to ensure no internal IO happening. - -Fixes: fc9977dd069e ("md/raid10: simplify the splitting of requests.") -Reported-and-tested-by: SiniÅ¡a Bandin -Signed-off-by: Guoqing Jiang -Signed-off-by: Shaohua Li ---- - drivers/md/raid10.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 76c92e31afc0..abb5d382f64d 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1209,7 +1209,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - struct bio *split = bio_split(bio, max_sectors, - gfp, &conf->bio_split); - bio_chain(split, bio); -+ allow_barrier(conf); - generic_make_request(bio); -+ wait_barrier(conf); - bio = split; - r10_bio->master_bio = bio; - r10_bio->sectors = max_sectors; -@@ -1492,7 +1494,9 @@ retry_write: - struct bio *split = bio_split(bio, r10_bio->sectors, - GFP_NOIO, &conf->bio_split); - bio_chain(split, bio); -+ allow_barrier(conf); - generic_make_request(bio); -+ wait_barrier(conf); - bio = split; - r10_bio->master_bio = bio; - } --- -cgit 1.2-0.3.lf.el7 - - diff --git a/kernel.spec b/kernel.spec index 4f4bfe22..01daa610 100644 --- a/kernel.spec +++ b/kernel.spec @@ -69,7 +69,7 @@ %define rel 1 %define basever 4.20 -%define postver .7 +%define postver .8 # define this to '-%{basever}' for longterm branch %define versuffix %{nil} @@ -121,7 +121,7 @@ Source0: https://www.kernel.org/pub/linux/kernel/v4.x/linux-%{basever}.tar.xz # Source0-md5: d39dd4ba2d5861c54b90d49be19eaf31 %if "%{postver}" != ".0" Patch0: https://www.kernel.org/pub/linux/kernel/v4.x/patch-%{version}.xz -# Patch0-md5: ea5939ea08cf6f11964465ab884741d0 +# Patch0-md5: 737555b69c43ef91956733d89d7457a5 %endif Source1: kernel.sysconfig