]>
Commit | Line | Data |
---|---|---|
7b9a9945 JR |
1 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/drivers/block/ll_rw_blk.c x/drivers/block/ll_rw_blk.c |
2 | --- x-ref/drivers/block/ll_rw_blk.c 2003-06-12 04:47:41.000000000 +0200 | |
3 | +++ x/drivers/block/ll_rw_blk.c 2003-06-12 04:47:55.000000000 +0200 | |
4 | @@ -596,12 +596,20 @@ static struct request *__get_request_wai | |
5 | register struct request *rq; | |
6 | DECLARE_WAITQUEUE(wait, current); | |
7 | ||
8 | - add_wait_queue(&q->wait_for_requests[rw], &wait); | |
9 | + add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); | |
10 | do { | |
11 | set_current_state(TASK_UNINTERRUPTIBLE); | |
12 | - generic_unplug_device(q); | |
13 | - if (q->rq[rw].count == 0) | |
14 | + if (q->rq[rw].count == 0) { | |
15 | + /* | |
16 | + * All we care about is not to stall if any request | |
17 | + * is been released after we set TASK_UNINTERRUPTIBLE. | |
18 | + * This is the most efficient place to unplug the queue | |
19 | + * in case we hit the race and we can get the request | |
20 | + * without waiting. | |
21 | + */ | |
22 | + generic_unplug_device(q); | |
23 | schedule(); | |
24 | + } | |
25 | spin_lock_irq(&io_request_lock); | |
26 | rq = get_request(q, rw); | |
27 | spin_unlock_irq(&io_request_lock); | |
28 | @@ -611,6 +619,17 @@ static struct request *__get_request_wai | |
29 | return rq; | |
30 | } | |
31 | ||
32 | +static void get_request_wait_wakeup(request_queue_t *q, int rw) | |
33 | +{ | |
34 | + /* | |
35 | + * avoid losing an unplug if a second __get_request_wait did the | |
36 | + * generic_unplug_device while our __get_request_wait was running | |
37 | + * w/o the queue_lock held and w/ our request out of the queue. | |
38 | + */ | |
39 | + if (q->rq[rw].count == 0 && waitqueue_active(&q->wait_for_requests[rw])) | |
40 | + __generic_unplug_device(q); | |
41 | +} | |
42 | + | |
43 | /* RO fail safe mechanism */ | |
44 | ||
45 | static long ro_bits[MAX_BLKDEV][8]; | |
46 | @@ -835,8 +854,11 @@ void blkdev_release_request(struct reque | |
47 | */ | |
48 | if (q) { | |
49 | list_add(&req->queue, &q->rq[rw].free); | |
50 | - if (++q->rq[rw].count >= q->batch_requests) | |
51 | - wake_up(&q->wait_for_requests[rw]); | |
52 | + if (++q->rq[rw].count >= q->batch_requests) { | |
53 | + smp_mb(); | |
54 | + if (waitqueue_active(&q->wait_for_requests[rw])) | |
55 | + wake_up(&q->wait_for_requests[rw]); | |
56 | + } | |
57 | } | |
58 | } | |
59 | ||
60 | @@ -907,6 +929,7 @@ static inline void attempt_front_merge(r | |
61 | static int __make_request(request_queue_t * q, int rw, | |
62 | struct buffer_head * bh) | |
63 | { | |
64 | + int need_unplug = 0; | |
65 | unsigned int sector, count; | |
66 | int max_segments = MAX_SEGMENTS; | |
67 | struct request * req, *freereq = NULL; | |
68 | @@ -954,7 +977,6 @@ static int __make_request(request_queue_ | |
69 | */ | |
70 | max_sectors = get_max_sectors(bh->b_rdev); | |
71 | ||
72 | -again: | |
73 | req = NULL; | |
74 | head = &q->queue_head; | |
75 | /* | |
76 | @@ -963,6 +985,7 @@ again: | |
77 | */ | |
78 | spin_lock_irq(&io_request_lock); | |
79 | ||
80 | +again: | |
81 | insert_here = head->prev; | |
82 | if (list_empty(head)) { | |
83 | q->plug_device_fn(q, bh->b_rdev); /* is atomic */ | |
84 | @@ -1048,6 +1071,9 @@ get_rq: | |
85 | if (req == NULL) { | |
86 | spin_unlock_irq(&io_request_lock); | |
87 | freereq = __get_request_wait(q, rw); | |
88 | + head = &q->queue_head; | |
89 | + need_unplug = 1; | |
ec771035 | 90 | + spin_lock_irq(&q->queue_lock); |
7b9a9945 JR |
91 | goto again; |
92 | } | |
93 | } | |
94 | @@ -1074,6 +1100,8 @@ get_rq: | |
95 | out: | |
96 | if (freereq) | |
97 | blkdev_release_request(freereq); | |
98 | + if (need_unplug) | |
99 | + get_request_wait_wakeup(q, rw); | |
100 | spin_unlock_irq(&io_request_lock); | |
101 | return 0; | |
102 | end_io: | |
103 | @@ -1202,8 +1230,21 @@ void __submit_bh(int rw, struct buffer_h | |
104 | bh->b_rdev = bh->b_dev; | |
105 | bh->b_rsector = bh->b_blocknr * count; | |
106 | ||
107 | + /* | |
108 | + * Really we could read random memory in the waitqueue | |
109 | + * check and as worse we would trigger a false positive | |
110 | + * queue unplug, however getting the reference | |
111 | + * on the bh and reading allocated memory is cleaner. | |
112 | + */ | |
113 | + get_bh(bh); | |
114 | generic_make_request(rw, bh); | |
115 | ||
116 | + /* fix race condition with wait_on_buffer() */ | |
117 | + smp_mb(); /* spin_unlock may have inclusive semantics */ | |
118 | + if (waitqueue_active(&bh->b_wait)) | |
119 | + run_task_queue(&tq_disk); | |
120 | + put_bh(bh); | |
121 | + | |
122 | switch (rw) { | |
123 | case WRITE: | |
124 | kstat.pgpgout += count; | |
125 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/buffer.c x/fs/buffer.c | |
126 | --- x-ref/fs/buffer.c 2003-06-12 04:47:41.000000000 +0200 | |
127 | +++ x/fs/buffer.c 2003-06-12 04:47:44.000000000 +0200 | |
128 | @@ -158,10 +158,23 @@ void __wait_on_buffer(struct buffer_head | |
129 | get_bh(bh); | |
130 | add_wait_queue(&bh->b_wait, &wait); | |
131 | do { | |
132 | - run_task_queue(&tq_disk); | |
133 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | |
134 | if (!buffer_locked(bh)) | |
135 | break; | |
136 | + /* | |
137 | + * We must read tq_disk in TQ_ACTIVE after the | |
138 | + * add_wait_queue effect is visible to other cpus. | |
139 | + * We could unplug some line above it wouldn't matter | |
140 | + * but we can't do that right after add_wait_queue | |
141 | + * without an smp_mb() in between because spin_unlock | |
142 | + * has inclusive semantics. | |
143 | + * Doing it here is the most efficient place so we | |
144 | + * don't do a suprious unplug if we get a racy | |
145 | + * wakeup that make buffer_locked to return 0, and | |
146 | + * doing it here avoids an explicit smp_mb() we | |
147 | + * rely on the implicit one in set_task_state. | |
148 | + */ | |
149 | + run_task_queue(&tq_disk); | |
150 | schedule(); | |
151 | } while (buffer_locked(bh)); | |
152 | tsk->state = TASK_RUNNING; | |
153 | @@ -1471,6 +1484,7 @@ static int __block_write_full_page(struc | |
154 | ||
155 | if (!page->buffers) | |
156 | create_empty_buffers(page, inode->i_dev, 1 << inode->i_blkbits); | |
157 | + BUG_ON(page_count(page) < 3); | |
158 | head = page->buffers; | |
159 | ||
160 | block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | |
161 | @@ -1517,6 +1531,9 @@ static int __block_write_full_page(struc | |
162 | ||
163 | /* Done - end_buffer_io_async will unlock */ | |
164 | SetPageUptodate(page); | |
165 | + | |
166 | + wakeup_page_waiters(page); | |
167 | + | |
168 | return 0; | |
169 | ||
170 | out: | |
171 | @@ -1548,6 +1565,7 @@ out: | |
172 | } while (bh != head); | |
173 | if (need_unlock) | |
174 | UnlockPage(page); | |
175 | + wakeup_page_waiters(page); | |
176 | return err; | |
177 | } | |
178 | ||
179 | @@ -1721,6 +1739,7 @@ int block_read_full_page(struct page *pa | |
180 | blocksize = 1 << inode->i_blkbits; | |
181 | if (!page->buffers) | |
182 | create_empty_buffers(page, inode->i_dev, blocksize); | |
183 | + BUG_ON(page_count(page) < 3); | |
184 | head = page->buffers; | |
185 | ||
186 | blocks = PAGE_CACHE_SIZE >> inode->i_blkbits; | |
187 | @@ -1781,6 +1800,8 @@ int block_read_full_page(struct page *pa | |
188 | else | |
189 | submit_bh(READ, bh); | |
190 | } | |
191 | + | |
192 | + wakeup_page_waiters(page); | |
193 | ||
194 | return 0; | |
195 | } | |
196 | @@ -2400,6 +2421,7 @@ int brw_page(int rw, struct page *page, | |
197 | ||
198 | if (!page->buffers) | |
199 | create_empty_buffers(page, dev, size); | |
200 | + BUG_ON(page_count(page) < 3); | |
201 | head = bh = page->buffers; | |
202 | ||
203 | /* Stage 1: lock all the buffers */ | |
204 | @@ -2417,6 +2439,7 @@ int brw_page(int rw, struct page *page, | |
205 | submit_bh(rw, bh); | |
206 | bh = next; | |
207 | } while (bh != head); | |
208 | + wakeup_page_waiters(page); | |
209 | return 0; | |
210 | } | |
211 | ||
212 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/reiserfs/inode.c x/fs/reiserfs/inode.c | |
213 | --- x-ref/fs/reiserfs/inode.c 2003-06-12 04:47:35.000000000 +0200 | |
214 | +++ x/fs/reiserfs/inode.c 2003-06-12 04:47:44.000000000 +0200 | |
215 | @@ -2048,6 +2048,7 @@ static int reiserfs_write_full_page(stru | |
216 | */ | |
217 | if (nr) { | |
218 | submit_bh_for_writepage(page, arr, nr) ; | |
219 | + wakeup_page_waiters(page); | |
220 | } else { | |
221 | UnlockPage(page) ; | |
222 | } | |
223 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/pagemap.h x/include/linux/pagemap.h | |
224 | --- x-ref/include/linux/pagemap.h 2003-06-12 04:47:41.000000000 +0200 | |
225 | +++ x/include/linux/pagemap.h 2003-06-12 04:47:44.000000000 +0200 | |
226 | @@ -98,6 +98,8 @@ static inline void wait_on_page(struct p | |
227 | ___wait_on_page(page); | |
228 | } | |
229 | ||
230 | +extern void FASTCALL(wakeup_page_waiters(struct page * page)); | |
231 | + | |
232 | /* | |
233 | * Returns locked page at given index in given cache, creating it if needed. | |
234 | */ | |
235 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/ksyms.c x/kernel/ksyms.c | |
236 | --- x-ref/kernel/ksyms.c 2003-06-12 04:47:41.000000000 +0200 | |
237 | +++ x/kernel/ksyms.c 2003-06-12 04:47:44.000000000 +0200 | |
238 | @@ -319,6 +319,7 @@ EXPORT_SYMBOL(filemap_fdatasync); | |
239 | EXPORT_SYMBOL(filemap_fdatawait); | |
240 | EXPORT_SYMBOL(lock_page); | |
241 | EXPORT_SYMBOL(unlock_page); | |
242 | +EXPORT_SYMBOL(wakeup_page_waiters); | |
243 | ||
244 | /* device registration */ | |
245 | EXPORT_SYMBOL(register_chrdev); | |
246 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/filemap.c x/mm/filemap.c | |
247 | --- x-ref/mm/filemap.c 2003-06-12 04:47:41.000000000 +0200 | |
248 | +++ x/mm/filemap.c 2003-06-12 04:47:44.000000000 +0200 | |
249 | @@ -779,6 +779,20 @@ inline wait_queue_head_t * page_waitqueu | |
250 | return wait_table_hashfn(page, &pgdat->wait_table); | |
251 | } | |
252 | ||
253 | +/* | |
254 | + * This must be called after every submit_bh with end_io | |
255 | + * callbacks that would result into the blkdev layer waking | |
256 | + * up the page after a queue unplug. | |
257 | + */ | |
258 | +void wakeup_page_waiters(struct page * page) | |
259 | +{ | |
260 | + wait_queue_head_t * head; | |
261 | + | |
262 | + head = page_waitqueue(page); | |
263 | + if (waitqueue_active(head)) | |
264 | + sync_page(page); | |
265 | +} | |
266 | + | |
267 | /* | |
268 | * Wait for a page to get unlocked. | |
269 | * | |
270 | diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/swapfile.c x/mm/swapfile.c | |
271 | --- x-ref/mm/swapfile.c 2003-06-12 04:47:41.000000000 +0200 | |
272 | +++ x/mm/swapfile.c 2003-06-12 04:47:44.000000000 +0200 | |
273 | @@ -984,8 +984,10 @@ asmlinkage long sys_swapon(const char * | |
274 | goto bad_swap; | |
275 | } | |
276 | ||
277 | + get_page(virt_to_page(swap_header)); | |
278 | lock_page(virt_to_page(swap_header)); | |
279 | rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header); | |
280 | + put_page(virt_to_page(swap_header)); | |
281 | ||
282 | if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) | |
283 | swap_header_version = 1; |