-commit 37b652ec6445be99d0193047d1eda129a1a315d3
-Author: Dave Chinner <dchinner@redhat.com>
-Date: Thu Aug 25 07:17:01 2011 +0000
-
- xfs: don't serialise direct IO reads on page cache checks
-
- There is no need to grab the i_mutex of the IO lock in exclusive
- mode if we don't need to invalidate the page cache. Taking these
- locks on every direct IO effective serialises them as taking the IO
- lock in exclusive mode has to wait for all shared holders to drop
- the lock. That only happens when IO is complete, so effective it
- prevents dispatch of concurrent direct IO reads to the same inode.
-
- Fix this by taking the IO lock shared to check the page cache state,
- and only then drop it and take the IO lock exclusively if there is
- work to be done. Hence for the normal direct IO case, no exclusive
- locking will occur.
-
- Signed-off-by: Dave Chinner <dchinner@redhat.com>
- Tested-by: Joern Engel <joern@logfs.org>
- Reviewed-by: Christoph Hellwig <hch@lst.de>
- Signed-off-by: Alex Elder <aelder@sgi.com>
-
-diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
-index 7f7b424..8fd4a07 100644
---- a/fs/xfs/linux-2.6/xfs_file.c
-+++ b/fs/xfs/linux-2.6/xfs_file.c
-@@ -317,7 +317,19 @@ xfs_file_aio_read(
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
-- if (unlikely(ioflags & IO_ISDIRECT)) {
-+ /*
-+ * Locking is a bit tricky here. If we take an exclusive lock
-+ * for direct IO, we effectively serialise all new concurrent
-+ * read IO to this file and block it behind IO that is currently in
-+ * progress because IO in progress holds the IO lock shared. We only
-+ * need to hold the lock exclusive to blow away the page cache, so
-+ * only take lock exclusively if the page cache needs invalidation.
-+ * This allows the normal direct IO case of no page cache pages to
-+ * proceeed concurrently without serialisation.
-+ */
-+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-+ if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
-+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
- xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
-
- if (inode->i_mapping->nrpages) {
-@@ -330,8 +342,7 @@ xfs_file_aio_read(
- }
- }
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-- } else
-- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-+ }
-
- trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
-
-
-
-
-Start the periodic sync workers only after we have finished xfs_mountfs
-and thus fully set up the filesystem structures. Without this we can
-call into xfs_qm_sync before the quotainfo strucute is set up if the
-mount takes unusually long, and probably hit other incomplete states
-as well.
-
-Also clean up the xfs_fs_fill_super error path by using consistent
-label names, and removing an impossible to reach case.
-
-Signed-off-by: Christoph Hellwig <hch@lst.de>
-Reported-by: Arkadiusz Miskiewicz <arekm@maven.pl>
-Reviewed-by: Alex Elder <aelder@sgi.com>
-
-diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
-index a1a881e..3ebb458 100644
---- a/fs/xfs/linux-2.6/xfs_super.c
-+++ b/fs/xfs/linux-2.6/xfs_super.c
-@@ -1412,37 +1412,35 @@ xfs_fs_fill_super(
- sb->s_time_gran = 1;
- set_posix_acl_flag(sb);
-
-- error = xfs_syncd_init(mp);
-- if (error)
-- goto out_filestream_unmount;
--
- xfs_inode_shrinker_register(mp);
-
- error = xfs_mountfs(mp);
- if (error)
-- goto out_syncd_stop;
-+ goto out_filestream_unmount;
-+
-+ error = xfs_syncd_init(mp);
-+ if (error)
-+ goto out_unmount;
-
- root = igrab(VFS_I(mp->m_rootip));
- if (!root) {
- error = ENOENT;
-- goto fail_unmount;
-+ goto out_syncd_stop;
- }
- if (is_bad_inode(root)) {
- error = EINVAL;
-- goto fail_vnrele;
-+ goto out_syncd_stop;
- }
- sb->s_root = d_alloc_root(root);
- if (!sb->s_root) {
- error = ENOMEM;
-- goto fail_vnrele;
-+ goto out_iput;
- }
-
- return 0;
-
-- out_syncd_stop:
-- xfs_inode_shrinker_unregister(mp);
-- xfs_syncd_stop(mp);
- out_filestream_unmount:
-+ xfs_inode_shrinker_unregister(mp);
- xfs_filestream_unmount(mp);
- out_free_sb:
- xfs_freesb(mp);
-@@ -1456,17 +1454,12 @@ xfs_fs_fill_super(
- out:
- return -error;
-
-- fail_vnrele:
-- if (sb->s_root) {
-- dput(sb->s_root);
-- sb->s_root = NULL;
-- } else {
-- iput(root);
-- }
--
-- fail_unmount:
-- xfs_inode_shrinker_unregister(mp);
-+ out_iput:
-+ iput(root);
-+ out_syncd_stop:
- xfs_syncd_stop(mp);
-+ out_unmount:
-+ xfs_inode_shrinker_unregister(mp);
-
- /*
- * Blow away any referenced inode in the filestreams cache.
-
-_______________________________________________
-xfs mailing list
-xfs@oss.sgi.com
-http://oss.sgi.com/mailman/listinfo/xfs
-
-
-From: Dave Chinner <dchinner@redhat.com>
-
-commit 1d8c95a363bf8cd4d4182dd19c01693b635311c2 upstream
-
-
-xfs: use a cursor for bulk AIL insertion
-
-Delayed logging can insert tens of thousands of log items into the
-AIL at the same LSN. When the committing of log commit records
-occur, we can get insertions occurring at an LSN that is not at the
-end of the AIL. If there are thousands of items in the AIL on the
-tail LSN, each insertion has to walk the AIL to find the correct
-place to insert the new item into the AIL. This can consume large
-amounts of CPU time and block other operations from occurring while
-the traversals are in progress.
-
-To avoid this repeated walk, use a AIL cursor to record
-where we should be inserting the new items into the AIL without
-having to repeat the walk. The cursor infrastructure already
-provides this functionality for push walks, so is a simple extension
-of existing code. While this will not avoid the initial walk, it
-will avoid repeating it tens of thousands of times during a single
-checkpoint commit.
-
-This version includes logic improvements from Christoph Hellwig.
-
-Signed-off-by: Dave Chinner <dchinner@redhat.com>
-Reviewed-by: Christoph Hellwig <hch@lst.de>
-Signed-off-by: Alex Elder <aelder@sgi.com>
-
-diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
-index c83f63b..efc147f 100644
---- a/fs/xfs/xfs_trans.c
-+++ b/fs/xfs/xfs_trans.c
-@@ -1426,6 +1426,7 @@ xfs_trans_committed(
- static inline void
- xfs_log_item_batch_insert(
- struct xfs_ail *ailp,
-+ struct xfs_ail_cursor *cur,
- struct xfs_log_item **log_items,
- int nr_items,
- xfs_lsn_t commit_lsn)
-@@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
-
- spin_lock(&ailp->xa_lock);
- /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
-- xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
-+ xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
-
- for (i = 0; i < nr_items; i++)
- IOP_UNPIN(log_items[i], 0);
-@@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
- * as an iclog write error even though we haven't started any IO yet. Hence in
- * this case all we need to do is IOP_COMMITTED processing, followed by an
- * IOP_UNPIN(aborted) call.
-+ *
-+ * The AIL cursor is used to optimise the insert process. If commit_lsn is not
-+ * at the end of the AIL, the insert cursor avoids the need to walk
-+ * the AIL to find the insertion point on every xfs_log_item_batch_insert()
-+ * call. This saves a lot of needless list walking and is a net win, even
-+ * though it slightly increases that amount of AIL lock traffic to set it up
-+ * and tear it down.
- */
- void
- xfs_trans_committed_bulk(
-@@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
- #define LOG_ITEM_BATCH_SIZE 32
- struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
- struct xfs_log_vec *lv;
-+ struct xfs_ail_cursor cur;
- int i = 0;
-
-+ spin_lock(&ailp->xa_lock);
-+ xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
-+ spin_unlock(&ailp->xa_lock);
-+
- /* unpin all the log items */
- for (lv = log_vector; lv; lv = lv->lv_next ) {
- struct xfs_log_item *lip = lv->lv_item;
-@@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
- /*
- * Not a bulk update option due to unusual item_lsn.
- * Push into AIL immediately, rechecking the lsn once
-- * we have the ail lock. Then unpin the item.
-+ * we have the ail lock. Then unpin the item. This does
-+ * not affect the AIL cursor the bulk insert path is
-+ * using.
- */
- spin_lock(&ailp->xa_lock);
- if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
-@@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
- /* Item is a candidate for bulk AIL insert. */
- log_items[i++] = lv->lv_item;
- if (i >= LOG_ITEM_BATCH_SIZE) {
-- xfs_log_item_batch_insert(ailp, log_items,
-+ xfs_log_item_batch_insert(ailp, &cur, log_items,
- LOG_ITEM_BATCH_SIZE, commit_lsn);
- i = 0;
- }
-@@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
-
- /* make sure we insert the remainder! */
- if (i)
-- xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
-+ xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
-+
-+ spin_lock(&ailp->xa_lock);
-+ xfs_trans_ail_cursor_done(ailp, &cur);
-+ spin_unlock(&ailp->xa_lock);
- }
-
- /*
-diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
-index 5fc2380..9a69dc0 100644
---- a/fs/xfs/xfs_trans_ail.c
-+++ b/fs/xfs/xfs_trans_ail.c
-@@ -272,9 +272,9 @@ xfs_trans_ail_cursor_clear(
- }
-
- /*
-- * Return the item in the AIL with the current lsn.
-- * Return the current tree generation number for use
-- * in calls to xfs_trans_next_ail().
-+ * Initialise the cursor to the first item in the AIL with the given @lsn.
-+ * This searches the list from lowest LSN to highest. Pass a @lsn of zero
-+ * to initialise the cursor to the first item in the AIL.
- */
- xfs_log_item_t *
- xfs_trans_ail_cursor_first(
-@@ -300,31 +300,97 @@ out:
- }
-
- /*
-- * splice the log item list into the AIL at the given LSN.
-+ * Initialise the cursor to the last item in the AIL with the given @lsn.
-+ * This searches the list from highest LSN to lowest. If there is no item with
-+ * the value of @lsn, then it sets the cursor to the last item with an LSN lower
-+ * than @lsn.
-+ */
-+static struct xfs_log_item *
-+__xfs_trans_ail_cursor_last(
-+ struct xfs_ail *ailp,
-+ xfs_lsn_t lsn)
-+{
-+ xfs_log_item_t *lip;
-+
-+ list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
-+ if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
-+ return lip;
-+ }
-+ return NULL;
-+}
-+
-+/*
-+ * Initialise the cursor to the last item in the AIL with the given @lsn.
-+ * This searches the list from highest LSN to lowest.
-+ */
-+struct xfs_log_item *
-+xfs_trans_ail_cursor_last(
-+ struct xfs_ail *ailp,
-+ struct xfs_ail_cursor *cur,
-+ xfs_lsn_t lsn)
-+{
-+ xfs_trans_ail_cursor_init(ailp, cur);
-+ cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
-+ return cur->item;
-+}
-+
-+/*
-+ * splice the log item list into the AIL at the given LSN. We splice to the
-+ * tail of the given LSN to maintain insert order for push traversals. The
-+ * cursor is optional, allowing repeated updates to the same LSN to avoid
-+ * repeated traversals.
- */
- static void
- xfs_ail_splice(
-- struct xfs_ail *ailp,
-- struct list_head *list,
-- xfs_lsn_t lsn)
-+ struct xfs_ail *ailp,
-+ struct xfs_ail_cursor *cur,
-+ struct list_head *list,
-+ xfs_lsn_t lsn)
- {
-- xfs_log_item_t *next_lip;
-+ struct xfs_log_item *lip = cur ? cur->item : NULL;
-+ struct xfs_log_item *next_lip;
-
-- /* If the list is empty, just insert the item. */
-- if (list_empty(&ailp->xa_ail)) {
-- list_splice(list, &ailp->xa_ail);
-- return;
-+ /*
-+ * Get a new cursor if we don't have a placeholder or the existing one
-+ * has been invalidated.
-+ */
-+ if (!lip || (__psint_t)lip & 1) {
-+ lip = __xfs_trans_ail_cursor_last(ailp, lsn);
-+
-+ if (!lip) {
-+ /* The list is empty, so just splice and return. */
-+ if (cur)
-+ cur->item = NULL;
-+ list_splice(list, &ailp->xa_ail);
-+ return;
-+ }
- }
-
-- list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
-- if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
-- break;
-+ /*
-+ * Our cursor points to the item we want to insert _after_, so we have
-+ * to update the cursor to point to the end of the list we are splicing
-+ * in so that it points to the correct location for the next splice.
-+ * i.e. before the splice
-+ *
-+ * lsn -> lsn -> lsn + x -> lsn + x ...
-+ * ^
-+ * | cursor points here
-+ *
-+ * After the splice we have:
-+ *
-+ * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
-+ * ^ ^
-+ * | cursor points here | needs to move here
-+ *
-+ * So we set the cursor to the last item in the list to be spliced
-+ * before we execute the splice, resulting in the cursor pointing to
-+ * the correct item after the splice occurs.
-+ */
-+ if (cur) {
-+ next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
-+ cur->item = next_lip;
- }
--
-- ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
-- XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
--
-- list_splice_init(list, &next_lip->li_ail);
-+ list_splice(list, &lip->li_ail);
- }
-
- /*
-@@ -645,6 +711,7 @@ xfs_trans_unlocked_item(
- void
- xfs_trans_ail_update_bulk(
- struct xfs_ail *ailp,
-+ struct xfs_ail_cursor *cur,
- struct xfs_log_item **log_items,
- int nr_items,
- xfs_lsn_t lsn) __releases(ailp->xa_lock)
-@@ -674,7 +741,7 @@ xfs_trans_ail_update_bulk(
- list_add(&lip->li_ail, &tmp);
- }
-
-- xfs_ail_splice(ailp, &tmp, lsn);
-+ xfs_ail_splice(ailp, cur, &tmp, lsn);
-
- if (!mlip_changed) {
- spin_unlock(&ailp->xa_lock);
-diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
-index 6b164e9..c0cb408 100644
---- a/fs/xfs/xfs_trans_priv.h
-+++ b/fs/xfs/xfs_trans_priv.h
-@@ -82,6 +82,7 @@ struct xfs_ail {
- extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
-
- void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
-+ struct xfs_ail_cursor *cur,
- struct xfs_log_item **log_items, int nr_items,
- xfs_lsn_t lsn) __releases(ailp->xa_lock);
- static inline void
-@@ -90,7 +91,7 @@ xfs_trans_ail_update(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn) __releases(ailp->xa_lock)
- {
-- xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
-+ xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
- }
-
- void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
-@@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
- void xfs_trans_unlocked_item(struct xfs_ail *,
- xfs_log_item_t *);
-
--struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
-+struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur,
- xfs_lsn_t lsn);
--struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
-+struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
-+ struct xfs_ail_cursor *cur,
-+ xfs_lsn_t lsn);
-+struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur);
- void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur);
-
-_______________________________________________
-xfs mailing list
-xfs@oss.sgi.com
-http://oss.sgi.com/mailman/listinfo/xfs
-
-
-commit bc6e588a8971aa74c02e42db4d6e0248679f3738 upstream
-
-If an item was locked we should not update xa_last_pushed_lsn and thus skip
-it when restarting the AIL scan as we need to be able to lock and write it
-out as soon as possible. Otherwise heavy lock contention might starve AIL
-pushing too easily, especially given the larger backoff once we moved
-xa_last_pushed_lsn all the way to the target lsn.
-
-Signed-off-by: Christoph Hellwig <hch@lst.de>
-Reported-by: Stefan Priebe <s.priebe@profihost.ag>
-Tested-by: Stefan Priebe <s.priebe@profihost.ag>
-
-Index: xfs/fs/xfs/xfs_trans_ail.c
-===================================================================
---- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-14 14:42:03.004395373 +0200
-+++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-14 14:42:22.687898198 +0200
-@@ -491,7 +491,6 @@ xfs_ail_worker(
-
- case XFS_ITEM_LOCKED:
- XFS_STATS_INC(xs_push_ail_locked);
-- ailp->xa_last_pushed_lsn = lsn;
- stuck++;
- break;
-
-
-_______________________________________________
-xfs mailing list
-xfs@oss.sgi.com
-http://oss.sgi.com/mailman/listinfo/xfs
-
-
-commit 17b38471c3c07a49f0bbc2ecc2e92050c164e226 upstream
-
-We need to check for pinned buffers even in .iop_pushbuf given that inode
-items flush into the same buffers that may be pinned directly due operations
-on the unlinked inode list operating directly on buffers. To do this add a
-return value to .iop_pushbuf that tells the AIL push about this and use
-the existing log force mechanisms to unpin it.
-
-Signed-off-by: Christoph Hellwig <hch@lst.de>
-Reported-by: Stefan Priebe <s.priebe@profihost.ag>
-Tested-by: Stefan Priebe <s.priebe@profihost.ag>