]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.4.21-andrea-9980_fix-pausing-5.patch
- ported from linux-2.4.25-atmdd.patch
[packages/kernel.git] / linux-2.4.21-andrea-9980_fix-pausing-5.patch
1 diff -urNp --exclude CVS --exclude BitKeeper x-ref/drivers/block/ll_rw_blk.c x/drivers/block/ll_rw_blk.c
2 --- x-ref/drivers/block/ll_rw_blk.c     2003-06-12 04:47:41.000000000 +0200
3 +++ x/drivers/block/ll_rw_blk.c 2003-06-12 04:47:55.000000000 +0200
4 @@ -596,12 +596,20 @@ static struct request *__get_request_wai
5         register struct request *rq;
6         DECLARE_WAITQUEUE(wait, current);
7  
8 -       add_wait_queue(&q->wait_for_requests[rw], &wait);
9 +       add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait);
10         do {
11                 set_current_state(TASK_UNINTERRUPTIBLE);
12 -               generic_unplug_device(q);
13 -               if (q->rq[rw].count == 0)
14 +               if (q->rq[rw].count == 0) {
15 +                       /*
16 +                        * All we care about is not to stall if any request
17 +                        * is been released after we set TASK_UNINTERRUPTIBLE.
18 +                        * This is the most efficient place to unplug the queue
19 +                        * in case we hit the race and we can get the request
20 +                        * without waiting.
21 +                        */
22 +                       generic_unplug_device(q);
23                         schedule();
24 +               }
25                 spin_lock_irq(&io_request_lock);
26                 rq = get_request(q, rw);
27                 spin_unlock_irq(&io_request_lock);
28 @@ -611,6 +619,17 @@ static struct request *__get_request_wai
29         return rq;
30  }
31  
32 +static void get_request_wait_wakeup(request_queue_t *q, int rw)
33 +{
34 +       /*
35 +        * avoid losing an unplug if a second __get_request_wait did the
36 +        * generic_unplug_device while our __get_request_wait was running
37 +        * w/o the queue_lock held and w/ our request out of the queue.
38 +        */
39 +       if (q->rq[rw].count == 0 && waitqueue_active(&q->wait_for_requests[rw]))
40 +               __generic_unplug_device(q);
41 +}
42 +
43  /* RO fail safe mechanism */
44  
45  static long ro_bits[MAX_BLKDEV][8];
46 @@ -835,8 +854,11 @@ void blkdev_release_request(struct reque
47          */
48         if (q) {
49                 list_add(&req->queue, &q->rq[rw].free);
50 -               if (++q->rq[rw].count >= q->batch_requests)
51 -                       wake_up(&q->wait_for_requests[rw]);
52 +               if (++q->rq[rw].count >= q->batch_requests) {
53 +                       smp_mb();
54 +                       if (waitqueue_active(&q->wait_for_requests[rw]))
55 +                               wake_up(&q->wait_for_requests[rw]);
56 +               }
57         }
58  }
59  
60 @@ -907,6 +929,7 @@ static inline void attempt_front_merge(r
61  static int __make_request(request_queue_t * q, int rw,
62                                   struct buffer_head * bh)
63  {
64 +       int need_unplug = 0;
65         unsigned int sector, count;
66         int max_segments = MAX_SEGMENTS;
67         struct request * req, *freereq = NULL;
68 @@ -954,7 +977,6 @@ static int __make_request(request_queue_
69          */
70         max_sectors = get_max_sectors(bh->b_rdev);
71  
72 -again:
73         req = NULL;
74         head = &q->queue_head;
75         /*
76 @@ -963,6 +985,7 @@ again:
77          */
78         spin_lock_irq(&io_request_lock);
79  
80 +again:
81         insert_here = head->prev;
82         if (list_empty(head)) {
83                 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
84 @@ -1048,6 +1071,9 @@ get_rq:
85                         if (req == NULL) {
86                                 spin_unlock_irq(&io_request_lock);
87                                 freereq = __get_request_wait(q, rw);
88 +                               head = &q->queue_head;
89 +                               need_unplug = 1;
90 +                               spin_lock_irq(&q->queue_lock);
91                                 goto again;
92                         }
93                 }
94 @@ -1074,6 +1100,8 @@ get_rq:
95  out:
96         if (freereq)
97                 blkdev_release_request(freereq);
98 +       if (need_unplug)
99 +               get_request_wait_wakeup(q, rw);
100         spin_unlock_irq(&io_request_lock);
101         return 0;
102  end_io:
103 @@ -1202,8 +1230,21 @@ void __submit_bh(int rw, struct buffer_h
104         bh->b_rdev = bh->b_dev;
105         bh->b_rsector = bh->b_blocknr * count;
106  
107 +       /*
108 +        * Really we could read random memory in the waitqueue
109 +        * check and as worse we would trigger a false positive
110 +        * queue unplug, however getting the reference
111 +        * on the bh and reading allocated memory is cleaner.
112 +        */
113 +       get_bh(bh);
114         generic_make_request(rw, bh);
115  
116 +       /* fix race condition with wait_on_buffer() */
117 +       smp_mb(); /* spin_unlock may have inclusive semantics */
118 +       if (waitqueue_active(&bh->b_wait))
119 +               run_task_queue(&tq_disk);
120 +       put_bh(bh);
121 +
122         switch (rw) {
123                 case WRITE:
124                         kstat.pgpgout += count;
125 diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/buffer.c x/fs/buffer.c
126 --- x-ref/fs/buffer.c   2003-06-12 04:47:41.000000000 +0200
127 +++ x/fs/buffer.c       2003-06-12 04:47:44.000000000 +0200
128 @@ -158,10 +158,23 @@ void __wait_on_buffer(struct buffer_head
129         get_bh(bh);
130         add_wait_queue(&bh->b_wait, &wait);
131         do {
132 -               run_task_queue(&tq_disk);
133                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
134                 if (!buffer_locked(bh))
135                         break;
136 +               /*
137 +                * We must read tq_disk in TQ_ACTIVE after the
138 +                * add_wait_queue effect is visible to other cpus.
139 +                * We could unplug some line above it wouldn't matter
140 +                * but we can't do that right after add_wait_queue
141 +                * without an smp_mb() in between because spin_unlock
142 +                * has inclusive semantics.
143 +                * Doing it here is the most efficient place so we
144 +                * don't do a suprious unplug if we get a racy
145 +                * wakeup that make buffer_locked to return 0, and
146 +                * doing it here avoids an explicit smp_mb() we
147 +                * rely on the implicit one in set_task_state.
148 +                */
149 +               run_task_queue(&tq_disk);
150                 schedule();
151         } while (buffer_locked(bh));
152         tsk->state = TASK_RUNNING;
153 @@ -1471,6 +1484,7 @@ static int __block_write_full_page(struc
154  
155         if (!page->buffers)
156                 create_empty_buffers(page, inode->i_dev, 1 << inode->i_blkbits);
157 +       BUG_ON(page_count(page) < 3);
158         head = page->buffers;
159  
160         block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
161 @@ -1517,6 +1531,9 @@ static int __block_write_full_page(struc
162  
163         /* Done - end_buffer_io_async will unlock */
164         SetPageUptodate(page);
165 +
166 +       wakeup_page_waiters(page);
167 +
168         return 0;
169  
170  out:
171 @@ -1548,6 +1565,7 @@ out:
172         } while (bh != head);
173         if (need_unlock)
174                 UnlockPage(page);
175 +       wakeup_page_waiters(page);
176         return err;
177  }
178  
179 @@ -1721,6 +1739,7 @@ int block_read_full_page(struct page *pa
180         blocksize = 1 << inode->i_blkbits;
181         if (!page->buffers)
182                 create_empty_buffers(page, inode->i_dev, blocksize);
183 +       BUG_ON(page_count(page) < 3);
184         head = page->buffers;
185  
186         blocks = PAGE_CACHE_SIZE >> inode->i_blkbits;
187 @@ -1781,6 +1800,8 @@ int block_read_full_page(struct page *pa
188                 else
189                         submit_bh(READ, bh);
190         }
191 +
192 +       wakeup_page_waiters(page);
193         
194         return 0;
195  }
196 @@ -2400,6 +2421,7 @@ int brw_page(int rw, struct page *page, 
197  
198         if (!page->buffers)
199                 create_empty_buffers(page, dev, size);
200 +       BUG_ON(page_count(page) < 3);
201         head = bh = page->buffers;
202  
203         /* Stage 1: lock all the buffers */
204 @@ -2417,6 +2439,7 @@ int brw_page(int rw, struct page *page, 
205                 submit_bh(rw, bh);
206                 bh = next;
207         } while (bh != head);
208 +       wakeup_page_waiters(page);
209         return 0;
210  }
211  
212 diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/reiserfs/inode.c x/fs/reiserfs/inode.c
213 --- x-ref/fs/reiserfs/inode.c   2003-06-12 04:47:35.000000000 +0200
214 +++ x/fs/reiserfs/inode.c       2003-06-12 04:47:44.000000000 +0200
215 @@ -2048,6 +2048,7 @@ static int reiserfs_write_full_page(stru
216      */
217      if (nr) {
218          submit_bh_for_writepage(page, arr, nr) ;
219 +       wakeup_page_waiters(page);
220      } else {
221          UnlockPage(page) ;
222      }
223 diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/pagemap.h x/include/linux/pagemap.h
224 --- x-ref/include/linux/pagemap.h       2003-06-12 04:47:41.000000000 +0200
225 +++ x/include/linux/pagemap.h   2003-06-12 04:47:44.000000000 +0200
226 @@ -98,6 +98,8 @@ static inline void wait_on_page(struct p
227                 ___wait_on_page(page);
228  }
229  
230 +extern void FASTCALL(wakeup_page_waiters(struct page * page));
231 +
232  /*
233   * Returns locked page at given index in given cache, creating it if needed.
234   */
235 diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/ksyms.c x/kernel/ksyms.c
236 --- x-ref/kernel/ksyms.c        2003-06-12 04:47:41.000000000 +0200
237 +++ x/kernel/ksyms.c    2003-06-12 04:47:44.000000000 +0200
238 @@ -319,6 +319,7 @@ EXPORT_SYMBOL(filemap_fdatasync);
239  EXPORT_SYMBOL(filemap_fdatawait);
240  EXPORT_SYMBOL(lock_page);
241  EXPORT_SYMBOL(unlock_page);
242 +EXPORT_SYMBOL(wakeup_page_waiters);
243  
244  /* device registration */
245  EXPORT_SYMBOL(register_chrdev);
246 diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/filemap.c x/mm/filemap.c
247 --- x-ref/mm/filemap.c  2003-06-12 04:47:41.000000000 +0200
248 +++ x/mm/filemap.c      2003-06-12 04:47:44.000000000 +0200
249 @@ -779,6 +779,20 @@ inline wait_queue_head_t * page_waitqueu
250         return wait_table_hashfn(page, &pgdat->wait_table);
251  }
252  
253 +/*
254 + * This must be called after every submit_bh with end_io
255 + * callbacks that would result into the blkdev layer waking
256 + * up the page after a queue unplug.
257 + */
258 +void wakeup_page_waiters(struct page * page)
259 +{
260 +       wait_queue_head_t * head;
261 +
262 +       head = page_waitqueue(page);
263 +       if (waitqueue_active(head))
264 +               sync_page(page);
265 +}
266 +
267  /* 
268   * Wait for a page to get unlocked.
269   *
270 diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/swapfile.c x/mm/swapfile.c
271 --- x-ref/mm/swapfile.c 2003-06-12 04:47:41.000000000 +0200
272 +++ x/mm/swapfile.c     2003-06-12 04:47:44.000000000 +0200
273 @@ -984,8 +984,10 @@ asmlinkage long sys_swapon(const char * 
274                 goto bad_swap;
275         }
276  
277 +       get_page(virt_to_page(swap_header));
278         lock_page(virt_to_page(swap_header));
279         rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header);
280 +       put_page(virt_to_page(swap_header));
281  
282         if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
283                 swap_header_version = 1;
This page took 0.045568 seconds and 3 git commands to generate.