]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.4.21-andrea-9980_fix-pausing-5.patch
- added description of djurban's branch
[packages/kernel.git] / linux-2.4.21-andrea-9980_fix-pausing-5.patch
CommitLineData
7b9a9945
JR
1diff -urNp --exclude CVS --exclude BitKeeper x-ref/drivers/block/ll_rw_blk.c x/drivers/block/ll_rw_blk.c
2--- x-ref/drivers/block/ll_rw_blk.c 2003-06-12 04:47:41.000000000 +0200
3+++ x/drivers/block/ll_rw_blk.c 2003-06-12 04:47:55.000000000 +0200
4@@ -596,12 +596,20 @@ static struct request *__get_request_wai
5 register struct request *rq;
6 DECLARE_WAITQUEUE(wait, current);
7
8- add_wait_queue(&q->wait_for_requests[rw], &wait);
9+ add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait);
10 do {
11 set_current_state(TASK_UNINTERRUPTIBLE);
12- generic_unplug_device(q);
13- if (q->rq[rw].count == 0)
14+ if (q->rq[rw].count == 0) {
15+ /*
16+ * All we care about is not to stall if any request
17+ * is been released after we set TASK_UNINTERRUPTIBLE.
18+ * This is the most efficient place to unplug the queue
19+ * in case we hit the race and we can get the request
20+ * without waiting.
21+ */
22+ generic_unplug_device(q);
23 schedule();
24+ }
25 spin_lock_irq(&io_request_lock);
26 rq = get_request(q, rw);
27 spin_unlock_irq(&io_request_lock);
28@@ -611,6 +619,17 @@ static struct request *__get_request_wai
29 return rq;
30 }
31
32+static void get_request_wait_wakeup(request_queue_t *q, int rw)
33+{
34+ /*
35+ * avoid losing an unplug if a second __get_request_wait did the
36+ * generic_unplug_device while our __get_request_wait was running
37+ * w/o the queue_lock held and w/ our request out of the queue.
38+ */
39+ if (q->rq[rw].count == 0 && waitqueue_active(&q->wait_for_requests[rw]))
40+ __generic_unplug_device(q);
41+}
42+
43 /* RO fail safe mechanism */
44
45 static long ro_bits[MAX_BLKDEV][8];
46@@ -835,8 +854,11 @@ void blkdev_release_request(struct reque
47 */
48 if (q) {
49 list_add(&req->queue, &q->rq[rw].free);
50- if (++q->rq[rw].count >= q->batch_requests)
51- wake_up(&q->wait_for_requests[rw]);
52+ if (++q->rq[rw].count >= q->batch_requests) {
53+ smp_mb();
54+ if (waitqueue_active(&q->wait_for_requests[rw]))
55+ wake_up(&q->wait_for_requests[rw]);
56+ }
57 }
58 }
59
60@@ -907,6 +929,7 @@ static inline void attempt_front_merge(r
61 static int __make_request(request_queue_t * q, int rw,
62 struct buffer_head * bh)
63 {
64+ int need_unplug = 0;
65 unsigned int sector, count;
66 int max_segments = MAX_SEGMENTS;
67 struct request * req, *freereq = NULL;
68@@ -954,7 +977,6 @@ static int __make_request(request_queue_
69 */
70 max_sectors = get_max_sectors(bh->b_rdev);
71
72-again:
73 req = NULL;
74 head = &q->queue_head;
75 /*
76@@ -963,6 +985,7 @@ again:
77 */
78 spin_lock_irq(&io_request_lock);
79
80+again:
81 insert_here = head->prev;
82 if (list_empty(head)) {
83 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
84@@ -1048,6 +1071,9 @@ get_rq:
85 if (req == NULL) {
86 spin_unlock_irq(&io_request_lock);
87 freereq = __get_request_wait(q, rw);
88+ head = &q->queue_head;
89+ need_unplug = 1;
ec771035 90+ spin_lock_irq(&q->queue_lock);
7b9a9945
JR
91 goto again;
92 }
93 }
94@@ -1074,6 +1100,8 @@ get_rq:
95 out:
96 if (freereq)
97 blkdev_release_request(freereq);
98+ if (need_unplug)
99+ get_request_wait_wakeup(q, rw);
100 spin_unlock_irq(&io_request_lock);
101 return 0;
102 end_io:
103@@ -1202,8 +1230,21 @@ void __submit_bh(int rw, struct buffer_h
104 bh->b_rdev = bh->b_dev;
105 bh->b_rsector = bh->b_blocknr * count;
106
107+ /*
108+ * Really we could read random memory in the waitqueue
109+ * check and as worse we would trigger a false positive
110+ * queue unplug, however getting the reference
111+ * on the bh and reading allocated memory is cleaner.
112+ */
113+ get_bh(bh);
114 generic_make_request(rw, bh);
115
116+ /* fix race condition with wait_on_buffer() */
117+ smp_mb(); /* spin_unlock may have inclusive semantics */
118+ if (waitqueue_active(&bh->b_wait))
119+ run_task_queue(&tq_disk);
120+ put_bh(bh);
121+
122 switch (rw) {
123 case WRITE:
124 kstat.pgpgout += count;
125diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/buffer.c x/fs/buffer.c
126--- x-ref/fs/buffer.c 2003-06-12 04:47:41.000000000 +0200
127+++ x/fs/buffer.c 2003-06-12 04:47:44.000000000 +0200
128@@ -158,10 +158,23 @@ void __wait_on_buffer(struct buffer_head
129 get_bh(bh);
130 add_wait_queue(&bh->b_wait, &wait);
131 do {
132- run_task_queue(&tq_disk);
133 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
134 if (!buffer_locked(bh))
135 break;
136+ /*
137+ * We must read tq_disk in TQ_ACTIVE after the
138+ * add_wait_queue effect is visible to other cpus.
139+ * We could unplug some line above it wouldn't matter
140+ * but we can't do that right after add_wait_queue
141+ * without an smp_mb() in between because spin_unlock
142+ * has inclusive semantics.
143+ * Doing it here is the most efficient place so we
144+ * don't do a suprious unplug if we get a racy
145+ * wakeup that make buffer_locked to return 0, and
146+ * doing it here avoids an explicit smp_mb() we
147+ * rely on the implicit one in set_task_state.
148+ */
149+ run_task_queue(&tq_disk);
150 schedule();
151 } while (buffer_locked(bh));
152 tsk->state = TASK_RUNNING;
153@@ -1471,6 +1484,7 @@ static int __block_write_full_page(struc
154
155 if (!page->buffers)
156 create_empty_buffers(page, inode->i_dev, 1 << inode->i_blkbits);
157+ BUG_ON(page_count(page) < 3);
158 head = page->buffers;
159
160 block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
161@@ -1517,6 +1531,9 @@ static int __block_write_full_page(struc
162
163 /* Done - end_buffer_io_async will unlock */
164 SetPageUptodate(page);
165+
166+ wakeup_page_waiters(page);
167+
168 return 0;
169
170 out:
171@@ -1548,6 +1565,7 @@ out:
172 } while (bh != head);
173 if (need_unlock)
174 UnlockPage(page);
175+ wakeup_page_waiters(page);
176 return err;
177 }
178
179@@ -1721,6 +1739,7 @@ int block_read_full_page(struct page *pa
180 blocksize = 1 << inode->i_blkbits;
181 if (!page->buffers)
182 create_empty_buffers(page, inode->i_dev, blocksize);
183+ BUG_ON(page_count(page) < 3);
184 head = page->buffers;
185
186 blocks = PAGE_CACHE_SIZE >> inode->i_blkbits;
187@@ -1781,6 +1800,8 @@ int block_read_full_page(struct page *pa
188 else
189 submit_bh(READ, bh);
190 }
191+
192+ wakeup_page_waiters(page);
193
194 return 0;
195 }
196@@ -2400,6 +2421,7 @@ int brw_page(int rw, struct page *page,
197
198 if (!page->buffers)
199 create_empty_buffers(page, dev, size);
200+ BUG_ON(page_count(page) < 3);
201 head = bh = page->buffers;
202
203 /* Stage 1: lock all the buffers */
204@@ -2417,6 +2439,7 @@ int brw_page(int rw, struct page *page,
205 submit_bh(rw, bh);
206 bh = next;
207 } while (bh != head);
208+ wakeup_page_waiters(page);
209 return 0;
210 }
211
212diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/reiserfs/inode.c x/fs/reiserfs/inode.c
213--- x-ref/fs/reiserfs/inode.c 2003-06-12 04:47:35.000000000 +0200
214+++ x/fs/reiserfs/inode.c 2003-06-12 04:47:44.000000000 +0200
215@@ -2048,6 +2048,7 @@ static int reiserfs_write_full_page(stru
216 */
217 if (nr) {
218 submit_bh_for_writepage(page, arr, nr) ;
219+ wakeup_page_waiters(page);
220 } else {
221 UnlockPage(page) ;
222 }
223diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/pagemap.h x/include/linux/pagemap.h
224--- x-ref/include/linux/pagemap.h 2003-06-12 04:47:41.000000000 +0200
225+++ x/include/linux/pagemap.h 2003-06-12 04:47:44.000000000 +0200
226@@ -98,6 +98,8 @@ static inline void wait_on_page(struct p
227 ___wait_on_page(page);
228 }
229
230+extern void FASTCALL(wakeup_page_waiters(struct page * page));
231+
232 /*
233 * Returns locked page at given index in given cache, creating it if needed.
234 */
235diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/ksyms.c x/kernel/ksyms.c
236--- x-ref/kernel/ksyms.c 2003-06-12 04:47:41.000000000 +0200
237+++ x/kernel/ksyms.c 2003-06-12 04:47:44.000000000 +0200
238@@ -319,6 +319,7 @@ EXPORT_SYMBOL(filemap_fdatasync);
239 EXPORT_SYMBOL(filemap_fdatawait);
240 EXPORT_SYMBOL(lock_page);
241 EXPORT_SYMBOL(unlock_page);
242+EXPORT_SYMBOL(wakeup_page_waiters);
243
244 /* device registration */
245 EXPORT_SYMBOL(register_chrdev);
246diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/filemap.c x/mm/filemap.c
247--- x-ref/mm/filemap.c 2003-06-12 04:47:41.000000000 +0200
248+++ x/mm/filemap.c 2003-06-12 04:47:44.000000000 +0200
249@@ -779,6 +779,20 @@ inline wait_queue_head_t * page_waitqueu
250 return wait_table_hashfn(page, &pgdat->wait_table);
251 }
252
253+/*
254+ * This must be called after every submit_bh with end_io
255+ * callbacks that would result into the blkdev layer waking
256+ * up the page after a queue unplug.
257+ */
258+void wakeup_page_waiters(struct page * page)
259+{
260+ wait_queue_head_t * head;
261+
262+ head = page_waitqueue(page);
263+ if (waitqueue_active(head))
264+ sync_page(page);
265+}
266+
267 /*
268 * Wait for a page to get unlocked.
269 *
270diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/swapfile.c x/mm/swapfile.c
271--- x-ref/mm/swapfile.c 2003-06-12 04:47:41.000000000 +0200
272+++ x/mm/swapfile.c 2003-06-12 04:47:44.000000000 +0200
273@@ -984,8 +984,10 @@ asmlinkage long sys_swapon(const char *
274 goto bad_swap;
275 }
276
277+ get_page(virt_to_page(swap_header));
278 lock_page(virt_to_page(swap_header));
279 rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header);
280+ put_page(virt_to_page(swap_header));
281
282 if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
283 swap_header_version = 1;
This page took 3.914324 seconds and 4 git commands to generate.