1 From: Shaohua Li <shli@fb.com>
3 Basically this is a copy of commit 001e4a8775f6(ext4: implement cgroup
4 writeback support). Tested with a fio test, verified writeback is
5 throttled against cgroup io.max write bandwidth, also verified moving
6 the fio test to another cgroup and the writeback is throttled against
9 Cc: Tejun Heo <tj@kernel.org>
10 Signed-off-by: Shaohua Li <shli@fb.com>
12 fs/xfs/xfs_aops.c | 2 ++
13 fs/xfs/xfs_super.c | 1 +
14 2 files changed, 3 insertions(+)
16 --- linux-4.19/fs/xfs/xfs_aops.c.org 2018-11-21 10:31:12.348955352 +0100
17 +++ linux-4.19/fs/xfs/xfs_aops.c 2018-11-21 10:34:35.241764742 +0100
18 @@ -613,8 +613,10 @@ xfs_add_to_ioend(
19 list_add(&wpc->ioend->io_list, iolist);
20 wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
22 + wbc_init_bio(wbc, wpc->ioend->io_bio);
25 + wbc_account_io(wbc, page, len);
26 if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
28 atomic_inc(&iop->write_count);
29 diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
30 index 584cf2d..aea3bc2 100644
31 --- a/fs/xfs/xfs_super.c
32 +++ b/fs/xfs/xfs_super.c
33 @@ -1634,6 +1634,7 @@ xfs_fs_fill_super(
34 sb->s_max_links = XFS_MAXLINK;
36 set_posix_acl_flag(sb);
37 + sb->s_iflags |= SB_I_CGROUPWB;
39 /* version 5 superblocks support inode version counters. */
40 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
41 From e820d55cb99dd93ac2dc949cf486bb187e5cd70d Mon Sep 17 00:00:00 2001
42 From: Guoqing Jiang <gqjiang@suse.com>
43 Date: Wed, 19 Dec 2018 14:19:25 +0800
44 Subject: md: fix raid10 hang issue caused by barrier
46 Content-Type: text/plain; charset=UTF-8
47 Content-Transfer-Encoding: 8bit
49 When both regular IO and resync IO happen at the same time,
50 and if we also need to split regular. Then we can see tasks
54 [ 1463.757205] INFO: task md1_resync:5215 blocked for more than 480 seconds.
55 [ 1463.757207] Not tainted 4.19.5-1-default #1
56 [ 1463.757209] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
57 [ 1463.757212] md1_resync D 0 5215 2 0x80000000
58 [ 1463.757216] Call Trace:
59 [ 1463.757223] ? __schedule+0x29a/0x880
60 [ 1463.757231] ? raise_barrier+0x8d/0x140 [raid10]
61 [ 1463.757236] schedule+0x78/0x110
62 [ 1463.757243] raise_barrier+0x8d/0x140 [raid10]
63 [ 1463.757248] ? wait_woken+0x80/0x80
64 [ 1463.757257] raid10_sync_request+0x1f6/0x1e30 [raid10]
65 [ 1463.757265] ? _raw_spin_unlock_irq+0x22/0x40
66 [ 1463.757284] ? is_mddev_idle+0x125/0x137 [md_mod]
67 [ 1463.757302] md_do_sync.cold.78+0x404/0x969 [md_mod]
68 [ 1463.757311] ? wait_woken+0x80/0x80
69 [ 1463.757336] ? md_rdev_init+0xb0/0xb0 [md_mod]
70 [ 1463.757351] md_thread+0xe9/0x140 [md_mod]
71 [ 1463.757358] ? _raw_spin_unlock_irqrestore+0x2e/0x60
72 [ 1463.757364] ? __kthread_parkme+0x4c/0x70
73 [ 1463.757369] kthread+0x112/0x130
74 [ 1463.757374] ? kthread_create_worker_on_cpu+0x40/0x40
75 [ 1463.757380] ret_from_fork+0x3a/0x50
78 [ 1463.760679] INFO: task kworker/0:8:5367 blocked for more than 480 seconds.
79 [ 1463.760683] Not tainted 4.19.5-1-default #1
80 [ 1463.760684] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
81 [ 1463.760687] kworker/0:8 D 0 5367 2 0x80000000
82 [ 1463.760718] Workqueue: md submit_flushes [md_mod]
83 [ 1463.760721] Call Trace:
84 [ 1463.760731] ? __schedule+0x29a/0x880
85 [ 1463.760741] ? wait_barrier+0xdd/0x170 [raid10]
86 [ 1463.760746] schedule+0x78/0x110
87 [ 1463.760753] wait_barrier+0xdd/0x170 [raid10]
88 [ 1463.760761] ? wait_woken+0x80/0x80
89 [ 1463.760768] raid10_write_request+0xf2/0x900 [raid10]
90 [ 1463.760774] ? wait_woken+0x80/0x80
91 [ 1463.760778] ? mempool_alloc+0x55/0x160
92 [ 1463.760795] ? md_write_start+0xa9/0x270 [md_mod]
93 [ 1463.760801] ? try_to_wake_up+0x44/0x470
94 [ 1463.760810] raid10_make_request+0xc1/0x120 [raid10]
95 [ 1463.760816] ? wait_woken+0x80/0x80
96 [ 1463.760831] md_handle_request+0x121/0x190 [md_mod]
97 [ 1463.760851] md_make_request+0x78/0x190 [md_mod]
98 [ 1463.760860] generic_make_request+0x1c6/0x470
99 [ 1463.760870] raid10_write_request+0x77a/0x900 [raid10]
100 [ 1463.760875] ? wait_woken+0x80/0x80
101 [ 1463.760879] ? mempool_alloc+0x55/0x160
102 [ 1463.760895] ? md_write_start+0xa9/0x270 [md_mod]
103 [ 1463.760904] raid10_make_request+0xc1/0x120 [raid10]
104 [ 1463.760910] ? wait_woken+0x80/0x80
105 [ 1463.760926] md_handle_request+0x121/0x190 [md_mod]
106 [ 1463.760931] ? _raw_spin_unlock_irq+0x22/0x40
107 [ 1463.760936] ? finish_task_switch+0x74/0x260
108 [ 1463.760954] submit_flushes+0x21/0x40 [md_mod]
110 So resync io is waiting for regular write io to complete to
111 decrease nr_pending (conf->barrier++ is called before waiting).
112 The regular write io splits another bio after call wait_barrier
113 which call nr_pending++, then the splitted bio would continue
114 with raid10_write_request -> wait_barrier, so the splitted bio
115 has to wait for barrier to be zero, then deadlock happens as
125 To resolve the issue, we need to call allow_barrier to decrease
126 nr_pending before generic_make_request since regular IO is not
127 issued to underlying devices, and wait_barrier is called again
128 to ensure no internal IO happening.
130 Fixes: fc9977dd069e ("md/raid10: simplify the splitting of requests.")
131 Reported-and-tested-by: SiniĊĦa Bandin <sinisa@4net.rs>
132 Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
133 Signed-off-by: Shaohua Li <shli@fb.com>
135 drivers/md/raid10.c | 4 ++++
136 1 file changed, 4 insertions(+)
138 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
139 index 76c92e31afc0..abb5d382f64d 100644
140 --- a/drivers/md/raid10.c
141 +++ b/drivers/md/raid10.c
142 @@ -1209,7 +1209,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
143 struct bio *split = bio_split(bio, max_sectors,
144 gfp, &conf->bio_split);
145 bio_chain(split, bio);
146 + allow_barrier(conf);
147 generic_make_request(bio);
148 + wait_barrier(conf);
150 r10_bio->master_bio = bio;
151 r10_bio->sectors = max_sectors;
152 @@ -1492,7 +1494,9 @@ retry_write:
153 struct bio *split = bio_split(bio, r10_bio->sectors,
154 GFP_NOIO, &conf->bio_split);
155 bio_chain(split, bio);
156 + allow_barrier(conf);
157 generic_make_request(bio);
158 + wait_barrier(conf);
160 r10_bio->master_bio = bio;