--- linux-2.6.32/drivers/infiniband/Kconfig~ 2009-12-05 00:26:03.663774916 +0100 +++ linux-2.6.32/drivers/infiniband/Kconfig 2009-12-05 00:26:05.914179759 +0100 @@ -37,7 +37,6 @@ config INFINIBAND_ADDR_TRANS bool depends on INET - depends on !(INFINIBAND = y && IPV6 = m) default y source "drivers/infiniband/hw/mthca/Kconfig" --- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100 +++ linux-2.6.33/scripts/mod/modpost.c 2010-03-07 14:26:47.242168558 +0100 @@ -15,7 +15,8 @@ #include #include #include "modpost.h" -#include "../../include/generated/autoconf.h" +// PLD architectures don't use CONFIG_SYMBOL_PREFIX +//#include "../../include/generated/autoconf.h" #include "../../include/linux/license.h" /* Some toolchains use a `_' prefix for all user symbols. */ commit 87b09f1f25cd1e01d7c50bf423c7fe33027d7511 Author: stephen hemminger Date: Fri Feb 12 06:58:00 2010 +0000 sky2: dont enable PME legacy mode This bit is not changed by vendor driver, and should be left alone. The documentation implies this a debug bit. 0 = WAKE# only asserted when VMAIN not available 1 = WAKE# is depend on wake events and independent of VMAIN. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git b/drivers/net/sky2.c a/drivers/net/sky2.c index 2494842..edf37aa 100644 --- b/drivers/net/sky2.c +++ a/drivers/net/sky2.c @@ -733,6 +733,7 @@ static void sky2_wol_init(struct sky2_port *sky2) unsigned port = sky2->port; enum flow_control save_mode; u16 ctrl; + u32 reg1; /* Bring hardware out of reset */ sky2_write16(hw, B0_CTST, CS_RST_CLR); @@ -786,6 +787,11 @@ static void sky2_wol_init(struct sky2_port *sky2) /* Disable PiG firmware */ sky2_write16(hw, B0_CTST, Y2_HW_WOL_OFF); + /* Turn on legacy PCI-Express PME mode */ + reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); + reg1 |= PCI_Y2_PME_LEGACY; + sky2_pci_write32(hw, PCI_DEV_REG1, reg1); + /* block receiver */ sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); } Date: Mon, 11 Jul 2011 09:59:57 -0400 From: Christoph Hellwig To: xfs@oss.sgi.com Cc: arekm@maven.pl Subject: [PATCH] xfs: start periodic workers later Message-ID: <20110711135957.GA23737@infradead.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Start the periodic sync workers only after we have finished xfs_mountfs and thus fully set up the filesystem structures. Without this we can call into xfs_qm_sync before the quotainfo strucute is set up if the mount takes unusually long, and probably hit other incomplete states as well. Also clean up the xfs_fs_fill_super error path by using consistent label names, and removing an impossible to reach case. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Christoph Hellwig Index: xfs/fs/xfs/linux-2.6/xfs_super.c =================================================================== --- xfs.orig/fs/xfs/linux-2.6/xfs_super.c 2011-07-11 12:02:56.762758869 +0200 +++ xfs/fs/xfs/linux-2.6/xfs_super.c 2011-07-11 12:09:20.817344934 +0200 @@ -1411,37 +1411,35 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - error = xfs_syncd_init(mp); - if (error) - goto out_filestream_unmount; - xfs_inode_shrinker_register(mp); error = xfs_mountfs(mp); if (error) - goto out_syncd_stop; + goto out_filestream_unmount; + + error = xfs_syncd_init(mp); + if (error) + goto out_unmount; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; - goto fail_unmount; + goto out_syncd_stop; } if (is_bad_inode(root)) { error = EINVAL; - goto fail_vnrele; + goto out_syncd_stop; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { error = ENOMEM; - goto fail_vnrele; + goto out_iput; } return 0; - out_syncd_stop: - xfs_inode_shrinker_unregister(mp); - xfs_syncd_stop(mp); out_filestream_unmount: + xfs_inode_shrinker_unregister(mp); xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); @@ -1455,17 +1453,12 @@ xfs_fs_fill_super( out: return -error; - fail_vnrele: - if (sb->s_root) { - dput(sb->s_root); - sb->s_root = NULL; - } else { - iput(root); - } - - fail_unmount: - xfs_inode_shrinker_unregister(mp); + out_iput: + iput(root); + out_syncd_stop: xfs_syncd_stop(mp); + out_unmount: + xfs_inode_shrinker_unregister(mp); /* * Blow away any referenced inode in the filestreams cache. On Sat, 2 Jul 2011, Andi Kleen wrote: > > The problem is that blk_peek_request() calls scsi_prep_fn(), which > > does this: > > > > struct scsi_device *sdev = q->queuedata; > > int ret = BLKPREP_KILL; > > > > if (req->cmd_type == REQ_TYPE_BLOCK_PC) > > ret = scsi_setup_blk_pc_cmnd(sdev, req); > > return scsi_prep_return(q, req, ret); > > > > It doesn't check to see if sdev is NULL, nor does > > scsi_setup_blk_pc_cmnd(). That accounts for this error: > > I actually added a NULL check in scsi_setup_blk_pc_cmnd early on, > but that just caused RCU CPU stalls afterwards and then eventually > a hung system. The RCU problem is likely to be a separate issue. It might even be a result of the use-after-free problem with the elevator. At any rate, it's clear that the crash in the refcounting log you posted occurred because scsi_setup_blk_pc_cmnd() called scsi_prep_state_check(), which tried to dereference the NULL pointer. Would you like to try this patch to see if it fixes the problem? As I said before, I'm not certain it's the best thing to do, but it worked on my system. Alan Stern Index: usb-3.0/drivers/scsi/scsi_lib.c =================================================================== --- usb-3.0.orig/drivers/scsi/scsi_lib.c +++ usb-3.0/drivers/scsi/scsi_lib.c @@ -1247,6 +1247,8 @@ int scsi_prep_fn(struct request_queue *q struct scsi_device *sdev = q->queuedata; int ret = BLKPREP_KILL; + if (!sdev) + return ret; if (req->cmd_type == REQ_TYPE_BLOCK_PC) ret = scsi_setup_blk_pc_cmnd(sdev, req); return scsi_prep_return(q, req, ret); Index: usb-3.0/drivers/scsi/scsi_sysfs.c =================================================================== --- usb-3.0.orig/drivers/scsi/scsi_sysfs.c +++ usb-3.0/drivers/scsi/scsi_sysfs.c @@ -322,6 +322,8 @@ static void scsi_device_dev_release_user kfree(evt); } + /* Freeing the queue signals to block that we're done */ + scsi_free_queue(sdev->request_queue); blk_put_queue(sdev->request_queue); /* NULL queue means the device can't be used */ sdev->request_queue = NULL; @@ -936,8 +938,6 @@ void __scsi_remove_device(struct scsi_de /* cause the request function to reject all I/O requests */ sdev->request_queue->queuedata = NULL; - /* Freeing the queue signals to block that we're done */ - scsi_free_queue(sdev->request_queue); put_device(dev); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ commit 3326c784c9f492e988617d93f647ae0cfd4c8d09 Author: Jiri Pirko Date: Wed Jul 20 04:54:38 2011 +0000 forcedeth: do vlan cleanup - unify vlan and nonvlan rx path - kill np->vlangrp and nv_vlan_rx_register - allow to turn on/off rx vlan accel via ethtool (set_features) Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 537b695..e64cd9c 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -820,9 +820,6 @@ struct fe_priv { struct nv_skb_map *tx_end_flip; int tx_stop; - /* vlan fields */ - struct vlan_group *vlangrp; - /* msi/msi-x fields */ u32 msi_flags; struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS]; @@ -2766,17 +2763,13 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); prefetch(skb->data); - if (likely(!np->vlangrp)) { - napi_gro_receive(&np->napi, skb); - } else { - vlanflags = le32_to_cpu(np->get_rx.ex->buflow); - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) { - vlan_gro_receive(&np->napi, np->vlangrp, - vlanflags & NV_RX3_VLAN_TAG_MASK, skb); - } else { - napi_gro_receive(&np->napi, skb); - } + vlanflags = le32_to_cpu(np->get_rx.ex->buflow); + if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) { + u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; + + __vlan_hwaccel_put_tag(skb, vid); } + napi_gro_receive(&np->napi, skb); dev->stats.rx_packets++; dev->stats.rx_bytes += len; @@ -4484,6 +4477,27 @@ static u32 nv_fix_features(struct net_device *dev, u32 features) return features; } +static void nv_vlan_mode(struct net_device *dev, u32 features) +{ + struct fe_priv *np = get_nvpriv(dev); + + spin_lock_irq(&np->lock); + + if (features & NETIF_F_HW_VLAN_RX) + np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP; + else + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP; + + if (features & NETIF_F_HW_VLAN_TX) + np->txrxctl_bits |= NVREG_TXRXCTL_VLANINS; + else + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS; + + writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl); + + spin_unlock_irq(&np->lock); +} + static int nv_set_features(struct net_device *dev, u32 features) { struct fe_priv *np = netdev_priv(dev); @@ -4504,6 +4518,9 @@ static int nv_set_features(struct net_device *dev, u32 features) spin_unlock_irq(&np->lock); } + if (changed & (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX)) + nv_vlan_mode(dev, features); + return 0; } @@ -4879,29 +4896,6 @@ static const struct ethtool_ops ops = { .self_test = nv_self_test, }; -static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) -{ - struct fe_priv *np = get_nvpriv(dev); - - spin_lock_irq(&np->lock); - - /* save vlan group */ - np->vlangrp = grp; - - if (grp) { - /* enable vlan on MAC */ - np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS; - } else { - /* disable vlan on MAC */ - np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP; - np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS; - } - - writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl); - - spin_unlock_irq(&np->lock); -} - /* The mgmt unit and driver use a semaphore to access the phy during init */ static int nv_mgmt_acquire_sema(struct net_device *dev) { @@ -5208,7 +5202,6 @@ static const struct net_device_ops nv_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = nv_set_mac_address, .ndo_set_multicast_list = nv_set_multicast, - .ndo_vlan_rx_register = nv_vlan_rx_register, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = nv_poll_controller, #endif @@ -5226,7 +5219,6 @@ static const struct net_device_ops nv_netdev_ops_optimized = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = nv_set_mac_address, .ndo_set_multicast_list = nv_set_multicast, - .ndo_vlan_rx_register = nv_vlan_rx_register, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = nv_poll_controller, #endif commit 0891b0e08937aaec2c4734acb94c5ff8042313bb Author: Jiri Pirko Date: Tue Jul 26 10:19:28 2011 +0000 forcedeth: fix vlans For some reason, when rxaccel is disabled, NV_RX3_VLAN_TAG_PRESENT is still set and some pseudorandom vids appear. So check for NETIF_F_HW_VLAN_RX as well. Also set correctly hw_features and set vlan mode on probe. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index e64cd9c..e55df30 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) prefetch(skb->data); vlanflags = le32_to_cpu(np->get_rx.ex->buflow); - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) { + + /* + * There's need to check for NETIF_F_HW_VLAN_RX here. + * Even if vlan rx accel is disabled, + * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set. + */ + if (dev->features & NETIF_F_HW_VLAN_RX && + vlanflags & NV_RX3_VLAN_TAG_PRESENT) { u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; __vlan_hwaccel_put_tag(skb, vid); @@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK; dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_RXCSUM; - dev->features |= dev->hw_features; } np->vlanctl_bits = 0; if (id->driver_data & DEV_HAS_VLAN) { np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; } + dev->features |= dev->hw_features; + np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG; if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) || (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) || @@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } + nv_vlan_mode(dev, dev->features); + netif_carrier_off(dev); dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", --- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~ 2011-07-22 04:17:23.000000000 +0200 +++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh 2011-08-25 21:26:04.799150642 +0200 @@ -9,6 +9,12 @@ $cc -print-file-name=lib${lib}.${ext} | grep -q / if [ $? -eq 0 ]; then echo "-l${lib}" + for libt in tinfow tinfo ; do + $cc -print-file-name=lib${libt}.${ext} | grep -q / + if [ $? -eq 0 ]; then + echo "-l${libt}" + fi + done exit fi done commit 1d8c95a363bf8cd4d4182dd19c01693b635311c2 Author: Dave Chinner Date: Mon Jul 18 03:40:16 2011 +0000 xfs: use a cursor for bulk AIL insertion Delayed logging can insert tens of thousands of log items into the AIL at the same LSN. When the committing of log commit records occur, we can get insertions occurring at an LSN that is not at the end of the AIL. If there are thousands of items in the AIL on the tail LSN, each insertion has to walk the AIL to find the correct place to insert the new item into the AIL. This can consume large amounts of CPU time and block other operations from occurring while the traversals are in progress. To avoid this repeated walk, use a AIL cursor to record where we should be inserting the new items into the AIL without having to repeat the walk. The cursor infrastructure already provides this functionality for push walks, so is a simple extension of existing code. While this will not avoid the initial walk, it will avoid repeating it tens of thousands of times during a single checkpoint commit. This version includes logic improvements from Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b..efc147f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1426,6 +1426,7 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. + * + * The AIL cursor is used to optimise the insert process. If commit_lsn is not + * at the end of the AIL, the insert cursor avoids the need to walk + * the AIL to find the insertion point on every xfs_log_item_batch_insert() + * call. This saves a lot of needless list walking and is a net win, even + * though it slightly increases that amount of AIL lock traffic to set it up + * and tear it down. */ void xfs_trans_committed_bulk( @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; + struct xfs_ail_cursor cur; int i = 0; + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); + spin_unlock(&ailp->xa_lock); + /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. + * we have the ail lock. Then unpin the item. This does + * not affect the AIL cursor the bulk insert path is + * using. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, log_items, + xfs_log_item_batch_insert(ailp, &cur, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); + + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 5fc2380..9a69dc0 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -272,9 +272,9 @@ xfs_trans_ail_cursor_clear( } /* - * Return the item in the AIL with the current lsn. - * Return the current tree generation number for use - * in calls to xfs_trans_next_ail(). + * Initialise the cursor to the first item in the AIL with the given @lsn. + * This searches the list from lowest LSN to highest. Pass a @lsn of zero + * to initialise the cursor to the first item in the AIL. */ xfs_log_item_t * xfs_trans_ail_cursor_first( @@ -300,31 +300,97 @@ out: } /* - * splice the log item list into the AIL at the given LSN. + * Initialise the cursor to the last item in the AIL with the given @lsn. + * This searches the list from highest LSN to lowest. If there is no item with + * the value of @lsn, then it sets the cursor to the last item with an LSN lower + * than @lsn. + */ +static struct xfs_log_item * +__xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + xfs_lsn_t lsn) +{ + xfs_log_item_t *lip; + + list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) + return lip; + } + return NULL; +} + +/* + * Initialise the cursor to the last item in the AIL with the given @lsn. + * This searches the list from highest LSN to lowest. + */ +struct xfs_log_item * +xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn) +{ + xfs_trans_ail_cursor_init(ailp, cur); + cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); + return cur->item; +} + +/* + * splice the log item list into the AIL at the given LSN. We splice to the + * tail of the given LSN to maintain insert order for push traversals. The + * cursor is optional, allowing repeated updates to the same LSN to avoid + * repeated traversals. */ static void xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct list_head *list, + xfs_lsn_t lsn) { - xfs_log_item_t *next_lip; + struct xfs_log_item *lip = cur ? cur->item : NULL; + struct xfs_log_item *next_lip; - /* If the list is empty, just insert the item. */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; + /* + * Get a new cursor if we don't have a placeholder or the existing one + * has been invalidated. + */ + if (!lip || (__psint_t)lip & 1) { + lip = __xfs_trans_ail_cursor_last(ailp, lsn); + + if (!lip) { + /* The list is empty, so just splice and return. */ + if (cur) + cur->item = NULL; + list_splice(list, &ailp->xa_ail); + return; + } } - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; + /* + * Our cursor points to the item we want to insert _after_, so we have + * to update the cursor to point to the end of the list we are splicing + * in so that it points to the correct location for the next splice. + * i.e. before the splice + * + * lsn -> lsn -> lsn + x -> lsn + x ... + * ^ + * | cursor points here + * + * After the splice we have: + * + * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... + * ^ ^ + * | cursor points here | needs to move here + * + * So we set the cursor to the last item in the list to be spliced + * before we execute the splice, resulting in the cursor pointing to + * the correct item after the splice occurs. + */ + if (cur) { + next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); + cur->item = next_lip; } - - ASSERT(&next_lip->li_ail == &ailp->xa_ail || - XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); - - list_splice_init(list, &next_lip->li_ail); + list_splice(list, &lip->li_ail); } /* @@ -645,6 +711,7 @@ xfs_trans_unlocked_item( void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock) @@ -674,7 +741,7 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, &tmp, lsn); + xfs_ail_splice(ailp, cur, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6b164e9..c0cb408 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -82,6 +82,7 @@ struct xfs_ail { extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); static inline void @@ -90,7 +91,7 @@ xfs_trans_ail_update( struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, @@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn); +struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); commit 37b652ec6445be99d0193047d1eda129a1a315d3 Author: Dave Chinner Date: Thu Aug 25 07:17:01 2011 +0000 xfs: don't serialise direct IO reads on page cache checks There is no need to grab the i_mutex of the IO lock in exclusive mode if we don't need to invalidate the page cache. Taking these locks on every direct IO effective serialises them as taking the IO lock in exclusive mode has to wait for all shared holders to drop the lock. That only happens when IO is complete, so effective it prevents dispatch of concurrent direct IO reads to the same inode. Fix this by taking the IO lock shared to check the page cache state, and only then drop it and take the IO lock exclusively if there is work to be done. Hence for the normal direct IO case, no exclusive locking will occur. Signed-off-by: Dave Chinner Tested-by: Joern Engel Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 7f7b424..8fd4a07 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -317,7 +317,19 @@ xfs_file_aio_read( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (unlikely(ioflags & IO_ISDIRECT)) { + /* + * Locking is a bit tricky here. If we take an exclusive lock + * for direct IO, we effectively serialise all new concurrent + * read IO to this file and block it behind IO that is currently in + * progress because IO in progress holds the IO lock shared. We only + * need to hold the lock exclusive to blow away the page cache, so + * only take lock exclusively if the page cache needs invalidation. + * This allows the normal direct IO case of no page cache pages to + * proceeed concurrently without serialisation. + */ + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { @@ -330,8 +342,7 @@ xfs_file_aio_read( } } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - } else - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + } trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); commit 9e975cc291d80d5e4562d6bed15ec171e896d69b Author: Matt Carlson Date: Wed Jul 20 10:20:50 2011 +0000 tg3: Fix io failures after chip reset Commit f2096f94b514d88593355995d5dd276961e88af1, entitled "tg3: Add 5720 H2BMC support", needed to add code to preserve some bits set by firmware. Unfortunately the new code causes throughput to stop after a chip reset because it enables state machines before they are ready. This patch undoes the problematic code. The bits will be restored later in the init sequence. Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index a5ff82d..5bf7671 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -7412,16 +7412,11 @@ static int tg3_chip_reset(struct tg3 *tp) tw32(TG3PCI_CLOCK_CTRL, tp->pci_clock_ctrl); } - if (tg3_flag(tp, ENABLE_APE)) - tp->mac_mode = MAC_MODE_APE_TX_EN | - MAC_MODE_APE_RX_EN | - MAC_MODE_TDE_ENABLE; - if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES) { - tp->mac_mode |= MAC_MODE_PORT_MODE_TBI; + tp->mac_mode = MAC_MODE_PORT_MODE_TBI; val = tp->mac_mode; } else if (tp->phy_flags & TG3_PHYFLG_MII_SERDES) { - tp->mac_mode |= MAC_MODE_PORT_MODE_GMII; + tp->mac_mode = MAC_MODE_PORT_MODE_GMII; val = tp->mac_mode; } else val = 0; @@ -8559,12 +8554,11 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) udelay(10); } - if (tg3_flag(tp, ENABLE_APE)) - tp->mac_mode = MAC_MODE_APE_TX_EN | MAC_MODE_APE_RX_EN; - else - tp->mac_mode = 0; tp->mac_mode |= MAC_MODE_TXSTAT_ENABLE | MAC_MODE_RXSTAT_ENABLE | - MAC_MODE_TDE_ENABLE | MAC_MODE_RDE_ENABLE | MAC_MODE_FHDE_ENABLE; + MAC_MODE_TDE_ENABLE | MAC_MODE_RDE_ENABLE | + MAC_MODE_FHDE_ENABLE; + if (tg3_flag(tp, ENABLE_APE)) + tp->mac_mode |= MAC_MODE_APE_TX_EN | MAC_MODE_APE_RX_EN; if (!tg3_flag(tp, 5705_PLUS) && !(tp->phy_flags & TG3_PHYFLG_PHY_SERDES) && GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700)