kernel-small_fixes.patch

   1 --- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100
   2 +++ linux-2.6.33/scripts/mod/modpost.c  2010-03-07 14:26:47.242168558 +0100
   3 @@ -15,7 +15,8 @@
   4  #include <stdio.h>
   5  #include <ctype.h>
   6  #include "modpost.h"
   7 -#include "../../include/generated/autoconf.h"
   8 +// PLD architectures don't use CONFIG_SYMBOL_PREFIX
   9 +//#include "../../include/generated/autoconf.h"
  10  #include "../../include/linux/license.h"
  11
  12  /* Some toolchains use a `_' prefix for all user symbols. */
  13
  14 --- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~       2011-07-22 04:17:23.000000000 +0200
  15 +++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh        2011-08-25 21:26:04.799150642 +0200
  16 @@ -9,6 +9,12 @@
  17                         $cc -print-file-name=lib${lib}.${ext} | grep -q /
  18                         if [ $? -eq 0 ]; then
  19                                 echo "-l${lib}"
  20 +                               for libt in tinfow tinfo ; do
  21 +                                       $cc -print-file-name=lib${libt}.${ext} | grep -q /
  22 +                                       if [ $? -eq 0 ]; then
  23 +                                               echo "-l${libt}"
  24 +                                       fi
  25 +                               done
  26                                 exit
  27                         fi
  28                 done
  29 From e820d55cb99dd93ac2dc949cf486bb187e5cd70d Mon Sep 17 00:00:00 2001
  30 From: Guoqing Jiang <gqjiang@suse.com>
  31 Date: Wed, 19 Dec 2018 14:19:25 +0800
  32 Subject: md: fix raid10 hang issue caused by barrier
  33 MIME-Version: 1.0
  34 Content-Type: text/plain; charset=UTF-8
  35 Content-Transfer-Encoding: 8bit
  36
  37 When both regular IO and resync IO happen at the same time,
  38 and if we also need to split regular. Then we can see tasks
  39 hang due to barrier.
  40
  41 1. resync thread
  42 [ 1463.757205] INFO: task md1_resync:5215 blocked for more than 480 seconds.
  43 [ 1463.757207]       Not tainted 4.19.5-1-default #1
  44 [ 1463.757209] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  45 [ 1463.757212] md1_resync      D    0  5215      2 0x80000000
  46 [ 1463.757216] Call Trace:
  47 [ 1463.757223]  ? __schedule+0x29a/0x880
  48 [ 1463.757231]  ? raise_barrier+0x8d/0x140 [raid10]
  49 [ 1463.757236]  schedule+0x78/0x110
  50 [ 1463.757243]  raise_barrier+0x8d/0x140 [raid10]
  51 [ 1463.757248]  ? wait_woken+0x80/0x80
  52 [ 1463.757257]  raid10_sync_request+0x1f6/0x1e30 [raid10]
  53 [ 1463.757265]  ? _raw_spin_unlock_irq+0x22/0x40
  54 [ 1463.757284]  ? is_mddev_idle+0x125/0x137 [md_mod]
  55 [ 1463.757302]  md_do_sync.cold.78+0x404/0x969 [md_mod]
  56 [ 1463.757311]  ? wait_woken+0x80/0x80
  57 [ 1463.757336]  ? md_rdev_init+0xb0/0xb0 [md_mod]
  58 [ 1463.757351]  md_thread+0xe9/0x140 [md_mod]
  59 [ 1463.757358]  ? _raw_spin_unlock_irqrestore+0x2e/0x60
  60 [ 1463.757364]  ? __kthread_parkme+0x4c/0x70
  61 [ 1463.757369]  kthread+0x112/0x130
  62 [ 1463.757374]  ? kthread_create_worker_on_cpu+0x40/0x40
  63 [ 1463.757380]  ret_from_fork+0x3a/0x50
  64
  65 2. regular IO
  66 [ 1463.760679] INFO: task kworker/0:8:5367 blocked for more than 480 seconds.
  67 [ 1463.760683]       Not tainted 4.19.5-1-default #1
  68 [ 1463.760684] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  69 [ 1463.760687] kworker/0:8     D    0  5367      2 0x80000000
  70 [ 1463.760718] Workqueue: md submit_flushes [md_mod]
  71 [ 1463.760721] Call Trace:
  72 [ 1463.760731]  ? __schedule+0x29a/0x880
  73 [ 1463.760741]  ? wait_barrier+0xdd/0x170 [raid10]
  74 [ 1463.760746]  schedule+0x78/0x110
  75 [ 1463.760753]  wait_barrier+0xdd/0x170 [raid10]
  76 [ 1463.760761]  ? wait_woken+0x80/0x80
  77 [ 1463.760768]  raid10_write_request+0xf2/0x900 [raid10]
  78 [ 1463.760774]  ? wait_woken+0x80/0x80
  79 [ 1463.760778]  ? mempool_alloc+0x55/0x160
  80 [ 1463.760795]  ? md_write_start+0xa9/0x270 [md_mod]
  81 [ 1463.760801]  ? try_to_wake_up+0x44/0x470
  82 [ 1463.760810]  raid10_make_request+0xc1/0x120 [raid10]
  83 [ 1463.760816]  ? wait_woken+0x80/0x80
  84 [ 1463.760831]  md_handle_request+0x121/0x190 [md_mod]
  85 [ 1463.760851]  md_make_request+0x78/0x190 [md_mod]
  86 [ 1463.760860]  generic_make_request+0x1c6/0x470
  87 [ 1463.760870]  raid10_write_request+0x77a/0x900 [raid10]
  88 [ 1463.760875]  ? wait_woken+0x80/0x80
  89 [ 1463.760879]  ? mempool_alloc+0x55/0x160
  90 [ 1463.760895]  ? md_write_start+0xa9/0x270 [md_mod]
  91 [ 1463.760904]  raid10_make_request+0xc1/0x120 [raid10]
  92 [ 1463.760910]  ? wait_woken+0x80/0x80
  93 [ 1463.760926]  md_handle_request+0x121/0x190 [md_mod]
  94 [ 1463.760931]  ? _raw_spin_unlock_irq+0x22/0x40
  95 [ 1463.760936]  ? finish_task_switch+0x74/0x260
  96 [ 1463.760954]  submit_flushes+0x21/0x40 [md_mod]
  97
  98 So resync io is waiting for regular write io to complete to
  99 decrease nr_pending (conf->barrier++ is called before waiting).
 100 The regular write io splits another bio after call wait_barrier
 101 which call nr_pending++, then the splitted bio would continue
 102 with raid10_write_request -> wait_barrier, so the splitted bio
 103 has to wait for barrier to be zero, then deadlock happens as
 104 follows.
 105
 106         resync io               regular io
 107
 108         raise_barrier
 109                                 wait_barrier
 110                                 generic_make_request
 111                                 wait_barrier
 112
 113 To resolve the issue, we need to call allow_barrier to decrease
 114 nr_pending before generic_make_request since regular IO is not
 115 issued to underlying devices, and wait_barrier is called again
 116 to ensure no internal IO happening.
 117
 118 Fixes: fc9977dd069e ("md/raid10: simplify the splitting of requests.")
 119 Reported-and-tested-by: Siniša Bandin <sinisa@4net.rs>
 120 Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
 121 Signed-off-by: Shaohua Li <shli@fb.com>
 122 ---
 123  drivers/md/raid10.c | 4 ++++
 124  1 file changed, 4 insertions(+)
 125
 126 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
 127 index 76c92e31afc0..abb5d382f64d 100644
 128 --- a/drivers/md/raid10.c
 129 +++ b/drivers/md/raid10.c
 130 @@ -1209,7 +1209,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 131                 struct bio *split = bio_split(bio, max_sectors,
 132                                               gfp, &conf->bio_split);
 133                 bio_chain(split, bio);
 134 +               allow_barrier(conf);
 135                 generic_make_request(bio);
 136 +               wait_barrier(conf);
 137                 bio = split;
 138                 r10_bio->master_bio = bio;
 139                 r10_bio->sectors = max_sectors;
 140 @@ -1492,7 +1494,9 @@ retry_write:
 141                 struct bio *split = bio_split(bio, r10_bio->sectors,
 142                                               GFP_NOIO, &conf->bio_split);
 143                 bio_chain(split, bio);
 144 +               allow_barrier(conf);
 145                 generic_make_request(bio);
 146 +               wait_barrier(conf);
 147                 bio = split;
 148                 r10_bio->master_bio = bio;
 149         }
 150 --
 151 cgit 1.2-0.3.lf.el7
 152