diff -urNp linux-2450/drivers/block/ll_rw_blk.c linux-2460/drivers/block/ll_rw_blk.c --- linux-2450/drivers/block/ll_rw_blk.c +++ linux-2460/drivers/block/ll_rw_blk.c @@ -595,12 +595,20 @@ static struct request *__get_request_wai register struct request *rq; DECLARE_WAITQUEUE(wait, current); - generic_unplug_device(q); add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - if (q->rq[rw].count == 0) + if (q->rq[rw].count == 0) { + /* + * All we care about is not to stall if any request + * is been released after we set TASK_UNINTERRUPTIBLE. + * This is the most efficient place to unplug the queue + * in case we hit the race and we can get the request + * without waiting. + */ + generic_unplug_device(q); - schedule(); + schedule_timeout(HZ); + } spin_lock_irq(&io_request_lock); rq = get_request(q, rw); spin_unlock_irq(&io_request_lock); @@ -837,8 +845,10 @@ void blkdev_release_request(struct reque if (q) { list_add(&req->queue, &q->rq[rw].free); if (++q->rq[rw].count >= q->batch_requests && - waitqueue_active(&q->wait_for_requests[rw])) + waitqueue_active(&q->wait_for_requests[rw])) { + smp_mb(); wake_up(&q->wait_for_requests[rw]); + } } } @@ -1210,6 +1219,11 @@ void submit_bh(int rw, struct buffer_hea generic_make_request(rw, bh); + /* fix race condition with wait_on_buffer() */ + smp_mb(); /* spin_unlock may have inclusive semantics */ + if (waitqueue_active(&bh->b_wait)) + wake_up(&bh->b_wait); + switch (rw) { case WRITE: kstat.pgpgout += count; diff -urNp linux-2450/fs/buffer.c linux-2460/fs/buffer.c --- linux-2450/fs/buffer.c +++ linux-2460/fs/buffer.c @@ -153,10 +153,23 @@ void __wait_on_buffer(struct buffer_head get_bh(bh); add_wait_queue(&bh->b_wait, &wait); do { - run_task_queue(&tq_disk); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!buffer_locked(bh)) break; + /* + * We must read tq_disk in TQ_ACTIVE after the + * add_wait_queue effect is visible to other cpus. + * We could unplug some line above it wouldn't matter + * but we can't do that right after add_wait_queue + * without an smp_mb() in between because spin_unlock + * has inclusive semantics. + * Doing it here is the most efficient place so we + * don't do a suprious unplug if we get a racy + * wakeup that make buffer_locked to return 0, and + * doing it here avoids an explicit smp_mb() we + * rely on the implicit one in set_task_state. + */ + run_task_queue(&tq_disk); schedule(); } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; @@ -1539,6 +1552,9 @@ static int __block_write_full_page(struc /* Done - end_buffer_io_async will unlock */ SetPageUptodate(page); + + wakeup_page_waiters(page); + return 0; out: @@ -1570,6 +1586,7 @@ out: } while (bh != head); if (need_unlock) UnlockPage(page); + wakeup_page_waiters(page); return err; } @@ -1797,6 +1814,8 @@ int block_read_full_page(struct page *pa else submit_bh(READ, bh); } + + wakeup_page_waiters(page); return 0; } @@ -2410,6 +2429,7 @@ int brw_page(int rw, struct page *page, submit_bh(rw, bh); bh = next; } while (bh != head); + wakeup_page_waiters(page); return 0; } diff -urNp linux-2450/fs/reiserfs/inode.c linux-2460/fs/reiserfs/inode.c --- linux-2450/fs/reiserfs/inode.c +++ linux-2460/fs/reiserfs/inode.c @@ -1993,6 +1993,7 @@ static int reiserfs_write_full_page(stru */ if (nr) { submit_bh_for_writepage(arr, nr) ; + wakeup_page_waiters(page); } else { UnlockPage(page) ; } diff -urNp linux-2450/include/linux/pagemap.h linux-2460/include/linux/pagemap.h --- linux-2450/include/linux/pagemap.h +++ linux-2460/include/linux/pagemap.h @@ -97,6 +97,8 @@ static inline void wait_on_page(struct p ___wait_on_page(page); } +extern void wakeup_page_waiters(struct page * page); + /* * Returns locked page at given index in given cache, creating it if needed. */ diff -urNp linux-2450/kernel/ksyms.c linux-2460/kernel/ksyms.c --- linux-2450/kernel/ksyms.c +++ linux-2460/kernel/ksyms.c @@ -320,6 +320,7 @@ EXPORT_SYMBOL(filemap_fdatasync); EXPORT_SYMBOL(filemap_fdatawait); EXPORT_SYMBOL(lock_page); EXPORT_SYMBOL(unlock_page); +EXPORT_SYMBOL_GPL(wakeup_page_waiters); /* device registration */ EXPORT_SYMBOL(register_chrdev); diff -urNp linux-2450/mm/filemap.c linux-2460/mm/filemap.c --- linux-2450/mm/filemap.c +++ linux-2460/mm/filemap.c @@ -810,6 +810,20 @@ static inline wait_queue_head_t *page_waitqueue(struct page *page) return &wait[hash]; } +/* + * This must be called after every submit_bh with end_io + * callbacks that would result into the blkdev layer waking + * up the page after a queue unplug. + */ +void wakeup_page_waiters(struct page * page) +{ + wait_queue_head_t * head; + + head = page_waitqueue(page); + if (waitqueue_active(head)) + wake_up(head); +} + /* * Wait for a page to get unlocked. *