1 --- linux-2.6.32/drivers/infiniband/Kconfig~ 2009-12-05 00:26:03.663774916 +0100
2 +++ linux-2.6.32/drivers/infiniband/Kconfig 2009-12-05 00:26:05.914179759 +0100
4 config INFINIBAND_ADDR_TRANS
7 - depends on !(INFINIBAND = y && IPV6 = m)
10 source "drivers/infiniband/hw/mthca/Kconfig"
11 --- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100
12 +++ linux-2.6.33/scripts/mod/modpost.c 2010-03-07 14:26:47.242168558 +0100
17 -#include "../../include/generated/autoconf.h"
18 +// PLD architectures don't use CONFIG_SYMBOL_PREFIX
19 +//#include "../../include/generated/autoconf.h"
20 #include "../../include/linux/license.h"
22 /* Some toolchains use a `_' prefix for all user symbols. */
24 commit 87b09f1f25cd1e01d7c50bf423c7fe33027d7511
25 Author: stephen hemminger <shemminger@vyatta.com>
26 Date: Fri Feb 12 06:58:00 2010 +0000
28 sky2: dont enable PME legacy mode
30 This bit is not changed by vendor driver, and should be left alone.
31 The documentation implies this a debug bit.
32 0 = WAKE# only asserted when VMAIN not available
33 1 = WAKE# is depend on wake events and independent of VMAIN.
35 Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
36 Signed-off-by: David S. Miller <davem@davemloft.net>
38 diff --git b/drivers/net/sky2.c a/drivers/net/sky2.c
39 index 2494842..edf37aa 100644
40 --- b/drivers/net/sky2.c
41 +++ a/drivers/net/sky2.c
42 @@ -733,6 +733,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
43 unsigned port = sky2->port;
44 enum flow_control save_mode;
48 /* Bring hardware out of reset */
49 sky2_write16(hw, B0_CTST, CS_RST_CLR);
50 @@ -786,6 +787,11 @@ static void sky2_wol_init(struct sky2_port *sky2)
51 /* Disable PiG firmware */
52 sky2_write16(hw, B0_CTST, Y2_HW_WOL_OFF);
54 + /* Turn on legacy PCI-Express PME mode */
55 + reg1 = sky2_pci_read32(hw, PCI_DEV_REG1);
56 + reg1 |= PCI_Y2_PME_LEGACY;
57 + sky2_pci_write32(hw, PCI_DEV_REG1, reg1);
60 sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
62 Date: Mon, 11 Jul 2011 09:59:57 -0400
63 From: Christoph Hellwig <hch@infradead.org>
66 Subject: [PATCH] xfs: start periodic workers later
67 Message-ID: <20110711135957.GA23737@infradead.org>
69 Content-Type: text/plain;
71 Content-Disposition: inline
72 User-Agent: Mutt/1.5.21 (2010-09-15)
74 Start the periodic sync workers only after we have finished xfs_mountfs
75 and thus fully set up the filesystem structures. Without this we can
76 call into xfs_qm_sync before the quotainfo strucute is set up if the
77 mount takes unusually long, and probably hit other incomplete states
80 Also clean up the xfs_fs_fill_super error path by using consistent
81 label names, and removing an impossible to reach case.
83 Reported-by: Arkadiusz Miskiewicz <arekm@maven.pl>
84 Signed-off-by: Christoph Hellwig <hch@lst.de>
86 Index: xfs/fs/xfs/linux-2.6/xfs_super.c
87 ===================================================================
88 --- xfs.orig/fs/xfs/linux-2.6/xfs_super.c 2011-07-11 12:02:56.762758869 +0200
89 +++ xfs/fs/xfs/linux-2.6/xfs_super.c 2011-07-11 12:09:20.817344934 +0200
90 @@ -1411,37 +1411,35 @@ xfs_fs_fill_super(
92 set_posix_acl_flag(sb);
94 - error = xfs_syncd_init(mp);
96 - goto out_filestream_unmount;
98 xfs_inode_shrinker_register(mp);
100 error = xfs_mountfs(mp);
102 - goto out_syncd_stop;
103 + goto out_filestream_unmount;
105 + error = xfs_syncd_init(mp);
109 root = igrab(VFS_I(mp->m_rootip));
113 + goto out_syncd_stop;
115 if (is_bad_inode(root)) {
118 + goto out_syncd_stop;
120 sb->s_root = d_alloc_root(root);
130 - xfs_inode_shrinker_unregister(mp);
131 - xfs_syncd_stop(mp);
132 out_filestream_unmount:
133 + xfs_inode_shrinker_unregister(mp);
134 xfs_filestream_unmount(mp);
137 @@ -1455,17 +1453,12 @@ xfs_fs_fill_super(
150 - xfs_inode_shrinker_unregister(mp);
156 + xfs_inode_shrinker_unregister(mp);
159 * Blow away any referenced inode in the filestreams cache.
160 On Sat, 2 Jul 2011, Andi Kleen wrote:
162 > > The problem is that blk_peek_request() calls scsi_prep_fn(), which
165 > > struct scsi_device *sdev = q->queuedata;
166 > > int ret = BLKPREP_KILL;
168 > > if (req->cmd_type == REQ_TYPE_BLOCK_PC)
169 > > ret = scsi_setup_blk_pc_cmnd(sdev, req);
170 > > return scsi_prep_return(q, req, ret);
172 > > It doesn't check to see if sdev is NULL, nor does
173 > > scsi_setup_blk_pc_cmnd(). That accounts for this error:
175 > I actually added a NULL check in scsi_setup_blk_pc_cmnd early on,
176 > but that just caused RCU CPU stalls afterwards and then eventually
179 The RCU problem is likely to be a separate issue. It might even be a
180 result of the use-after-free problem with the elevator.
182 At any rate, it's clear that the crash in the refcounting log you
183 posted occurred because scsi_setup_blk_pc_cmnd() called
184 scsi_prep_state_check(), which tried to dereference the NULL pointer.
186 Would you like to try this patch to see if it fixes the problem? As I
187 said before, I'm not certain it's the best thing to do, but it worked
195 Index: usb-3.0/drivers/scsi/scsi_lib.c
196 ===================================================================
197 --- usb-3.0.orig/drivers/scsi/scsi_lib.c
198 +++ usb-3.0/drivers/scsi/scsi_lib.c
199 @@ -1247,6 +1247,8 @@ int scsi_prep_fn(struct request_queue *q
200 struct scsi_device *sdev = q->queuedata;
201 int ret = BLKPREP_KILL;
205 if (req->cmd_type == REQ_TYPE_BLOCK_PC)
206 ret = scsi_setup_blk_pc_cmnd(sdev, req);
207 return scsi_prep_return(q, req, ret);
208 Index: usb-3.0/drivers/scsi/scsi_sysfs.c
209 ===================================================================
210 --- usb-3.0.orig/drivers/scsi/scsi_sysfs.c
211 +++ usb-3.0/drivers/scsi/scsi_sysfs.c
212 @@ -322,6 +322,8 @@ static void scsi_device_dev_release_user
216 + /* Freeing the queue signals to block that we're done */
217 + scsi_free_queue(sdev->request_queue);
218 blk_put_queue(sdev->request_queue);
219 /* NULL queue means the device can't be used */
220 sdev->request_queue = NULL;
221 @@ -936,8 +938,6 @@ void __scsi_remove_device(struct scsi_de
222 /* cause the request function to reject all I/O requests */
223 sdev->request_queue->queuedata = NULL;
225 - /* Freeing the queue signals to block that we're done */
226 - scsi_free_queue(sdev->request_queue);
233 To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
234 the body of a message to majordomo@vger.kernel.org
235 More majordomo info at http://vger.kernel.org/majordomo-info.html
236 Please read the FAQ at http://www.tux.org/lkml/
237 commit 3326c784c9f492e988617d93f647ae0cfd4c8d09
238 Author: Jiri Pirko <jpirko@redhat.com>
239 Date: Wed Jul 20 04:54:38 2011 +0000
241 forcedeth: do vlan cleanup
243 - unify vlan and nonvlan rx path
244 - kill np->vlangrp and nv_vlan_rx_register
245 - allow to turn on/off rx vlan accel via ethtool (set_features)
247 Signed-off-by: Jiri Pirko <jpirko@redhat.com>
248 Signed-off-by: David S. Miller <davem@davemloft.net>
250 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
251 index 537b695..e64cd9c 100644
252 --- a/drivers/net/forcedeth.c
253 +++ b/drivers/net/forcedeth.c
254 @@ -820,9 +820,6 @@ struct fe_priv {
255 struct nv_skb_map *tx_end_flip;
259 - struct vlan_group *vlangrp;
261 /* msi/msi-x fields */
263 struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
264 @@ -2766,17 +2763,13 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
265 skb->protocol = eth_type_trans(skb, dev);
268 - if (likely(!np->vlangrp)) {
269 - napi_gro_receive(&np->napi, skb);
271 - vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
272 - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
273 - vlan_gro_receive(&np->napi, np->vlangrp,
274 - vlanflags & NV_RX3_VLAN_TAG_MASK, skb);
276 - napi_gro_receive(&np->napi, skb);
278 + vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
279 + if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
280 + u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
282 + __vlan_hwaccel_put_tag(skb, vid);
284 + napi_gro_receive(&np->napi, skb);
286 dev->stats.rx_packets++;
287 dev->stats.rx_bytes += len;
288 @@ -4484,6 +4477,27 @@ static u32 nv_fix_features(struct net_device *dev, u32 features)
292 +static void nv_vlan_mode(struct net_device *dev, u32 features)
294 + struct fe_priv *np = get_nvpriv(dev);
296 + spin_lock_irq(&np->lock);
298 + if (features & NETIF_F_HW_VLAN_RX)
299 + np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP;
301 + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
303 + if (features & NETIF_F_HW_VLAN_TX)
304 + np->txrxctl_bits |= NVREG_TXRXCTL_VLANINS;
306 + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
308 + writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
310 + spin_unlock_irq(&np->lock);
313 static int nv_set_features(struct net_device *dev, u32 features)
315 struct fe_priv *np = netdev_priv(dev);
316 @@ -4504,6 +4518,9 @@ static int nv_set_features(struct net_device *dev, u32 features)
317 spin_unlock_irq(&np->lock);
320 + if (changed & (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX))
321 + nv_vlan_mode(dev, features);
326 @@ -4879,29 +4896,6 @@ static const struct ethtool_ops ops = {
327 .self_test = nv_self_test,
330 -static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
332 - struct fe_priv *np = get_nvpriv(dev);
334 - spin_lock_irq(&np->lock);
336 - /* save vlan group */
340 - /* enable vlan on MAC */
341 - np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS;
343 - /* disable vlan on MAC */
344 - np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
345 - np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
348 - writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
350 - spin_unlock_irq(&np->lock);
353 /* The mgmt unit and driver use a semaphore to access the phy during init */
354 static int nv_mgmt_acquire_sema(struct net_device *dev)
356 @@ -5208,7 +5202,6 @@ static const struct net_device_ops nv_netdev_ops = {
357 .ndo_validate_addr = eth_validate_addr,
358 .ndo_set_mac_address = nv_set_mac_address,
359 .ndo_set_multicast_list = nv_set_multicast,
360 - .ndo_vlan_rx_register = nv_vlan_rx_register,
361 #ifdef CONFIG_NET_POLL_CONTROLLER
362 .ndo_poll_controller = nv_poll_controller,
364 @@ -5226,7 +5219,6 @@ static const struct net_device_ops nv_netdev_ops_optimized = {
365 .ndo_validate_addr = eth_validate_addr,
366 .ndo_set_mac_address = nv_set_mac_address,
367 .ndo_set_multicast_list = nv_set_multicast,
368 - .ndo_vlan_rx_register = nv_vlan_rx_register,
369 #ifdef CONFIG_NET_POLL_CONTROLLER
370 .ndo_poll_controller = nv_poll_controller,
372 commit 0891b0e08937aaec2c4734acb94c5ff8042313bb
373 Author: Jiri Pirko <jpirko@redhat.com>
374 Date: Tue Jul 26 10:19:28 2011 +0000
378 For some reason, when rxaccel is disabled, NV_RX3_VLAN_TAG_PRESENT is
379 still set and some pseudorandom vids appear. So check for
380 NETIF_F_HW_VLAN_RX as well. Also set correctly hw_features and set vlan
383 Signed-off-by: Jiri Pirko <jpirko@redhat.com>
384 Signed-off-by: David S. Miller <davem@davemloft.net>
386 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
387 index e64cd9c..e55df30 100644
388 --- a/drivers/net/forcedeth.c
389 +++ b/drivers/net/forcedeth.c
390 @@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
393 vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
394 - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
397 + * There's need to check for NETIF_F_HW_VLAN_RX here.
398 + * Even if vlan rx accel is disabled,
399 + * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set.
401 + if (dev->features & NETIF_F_HW_VLAN_RX &&
402 + vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
403 u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
405 __vlan_hwaccel_put_tag(skb, vid);
406 @@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
407 np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
408 dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG |
409 NETIF_F_TSO | NETIF_F_RXCSUM;
410 - dev->features |= dev->hw_features;
413 np->vlanctl_bits = 0;
414 if (id->driver_data & DEV_HAS_VLAN) {
415 np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
416 - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
417 + dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
420 + dev->features |= dev->hw_features;
422 np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
423 if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
424 (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
425 @@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
429 + nv_vlan_mode(dev, dev->features);
431 netif_carrier_off(dev);
433 dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
434 --- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~ 2011-07-22 04:17:23.000000000 +0200
435 +++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh 2011-08-25 21:26:04.799150642 +0200
437 $cc -print-file-name=lib${lib}.${ext} | grep -q /
438 if [ $? -eq 0 ]; then
440 + for libt in tinfow tinfo ; do
441 + $cc -print-file-name=lib${libt}.${ext} | grep -q /
442 + if [ $? -eq 0 ]; then
449 commit 1d8c95a363bf8cd4d4182dd19c01693b635311c2
450 Author: Dave Chinner <dchinner@redhat.com>
451 Date: Mon Jul 18 03:40:16 2011 +0000
453 xfs: use a cursor for bulk AIL insertion
455 Delayed logging can insert tens of thousands of log items into the
456 AIL at the same LSN. When the committing of log commit records
457 occur, we can get insertions occurring at an LSN that is not at the
458 end of the AIL. If there are thousands of items in the AIL on the
459 tail LSN, each insertion has to walk the AIL to find the correct
460 place to insert the new item into the AIL. This can consume large
461 amounts of CPU time and block other operations from occurring while
462 the traversals are in progress.
464 To avoid this repeated walk, use a AIL cursor to record
465 where we should be inserting the new items into the AIL without
466 having to repeat the walk. The cursor infrastructure already
467 provides this functionality for push walks, so is a simple extension
468 of existing code. While this will not avoid the initial walk, it
469 will avoid repeating it tens of thousands of times during a single
472 This version includes logic improvements from Christoph Hellwig.
474 Signed-off-by: Dave Chinner <dchinner@redhat.com>
475 Reviewed-by: Christoph Hellwig <hch@lst.de>
476 Signed-off-by: Alex Elder <aelder@sgi.com>
478 diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
479 index c83f63b..efc147f 100644
480 --- a/fs/xfs/xfs_trans.c
481 +++ b/fs/xfs/xfs_trans.c
482 @@ -1426,6 +1426,7 @@ xfs_trans_committed(
484 xfs_log_item_batch_insert(
485 struct xfs_ail *ailp,
486 + struct xfs_ail_cursor *cur,
487 struct xfs_log_item **log_items,
489 xfs_lsn_t commit_lsn)
490 @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
492 spin_lock(&ailp->xa_lock);
493 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
494 - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
495 + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
497 for (i = 0; i < nr_items; i++)
498 IOP_UNPIN(log_items[i], 0);
499 @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
500 * as an iclog write error even though we haven't started any IO yet. Hence in
501 * this case all we need to do is IOP_COMMITTED processing, followed by an
502 * IOP_UNPIN(aborted) call.
504 + * The AIL cursor is used to optimise the insert process. If commit_lsn is not
505 + * at the end of the AIL, the insert cursor avoids the need to walk
506 + * the AIL to find the insertion point on every xfs_log_item_batch_insert()
507 + * call. This saves a lot of needless list walking and is a net win, even
508 + * though it slightly increases that amount of AIL lock traffic to set it up
509 + * and tear it down.
512 xfs_trans_committed_bulk(
513 @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
514 #define LOG_ITEM_BATCH_SIZE 32
515 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
516 struct xfs_log_vec *lv;
517 + struct xfs_ail_cursor cur;
520 + spin_lock(&ailp->xa_lock);
521 + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
522 + spin_unlock(&ailp->xa_lock);
524 /* unpin all the log items */
525 for (lv = log_vector; lv; lv = lv->lv_next ) {
526 struct xfs_log_item *lip = lv->lv_item;
527 @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
529 * Not a bulk update option due to unusual item_lsn.
530 * Push into AIL immediately, rechecking the lsn once
531 - * we have the ail lock. Then unpin the item.
532 + * we have the ail lock. Then unpin the item. This does
533 + * not affect the AIL cursor the bulk insert path is
536 spin_lock(&ailp->xa_lock);
537 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
538 @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
539 /* Item is a candidate for bulk AIL insert. */
540 log_items[i++] = lv->lv_item;
541 if (i >= LOG_ITEM_BATCH_SIZE) {
542 - xfs_log_item_batch_insert(ailp, log_items,
543 + xfs_log_item_batch_insert(ailp, &cur, log_items,
544 LOG_ITEM_BATCH_SIZE, commit_lsn);
547 @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
549 /* make sure we insert the remainder! */
551 - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
552 + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
554 + spin_lock(&ailp->xa_lock);
555 + xfs_trans_ail_cursor_done(ailp, &cur);
556 + spin_unlock(&ailp->xa_lock);
560 diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
561 index 5fc2380..9a69dc0 100644
562 --- a/fs/xfs/xfs_trans_ail.c
563 +++ b/fs/xfs/xfs_trans_ail.c
564 @@ -272,9 +272,9 @@ xfs_trans_ail_cursor_clear(
568 - * Return the item in the AIL with the current lsn.
569 - * Return the current tree generation number for use
570 - * in calls to xfs_trans_next_ail().
571 + * Initialise the cursor to the first item in the AIL with the given @lsn.
572 + * This searches the list from lowest LSN to highest. Pass a @lsn of zero
573 + * to initialise the cursor to the first item in the AIL.
576 xfs_trans_ail_cursor_first(
577 @@ -300,31 +300,97 @@ out:
581 - * splice the log item list into the AIL at the given LSN.
582 + * Initialise the cursor to the last item in the AIL with the given @lsn.
583 + * This searches the list from highest LSN to lowest. If there is no item with
584 + * the value of @lsn, then it sets the cursor to the last item with an LSN lower
587 +static struct xfs_log_item *
588 +__xfs_trans_ail_cursor_last(
589 + struct xfs_ail *ailp,
592 + xfs_log_item_t *lip;
594 + list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
595 + if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
602 + * Initialise the cursor to the last item in the AIL with the given @lsn.
603 + * This searches the list from highest LSN to lowest.
605 +struct xfs_log_item *
606 +xfs_trans_ail_cursor_last(
607 + struct xfs_ail *ailp,
608 + struct xfs_ail_cursor *cur,
611 + xfs_trans_ail_cursor_init(ailp, cur);
612 + cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
617 + * splice the log item list into the AIL at the given LSN. We splice to the
618 + * tail of the given LSN to maintain insert order for push traversals. The
619 + * cursor is optional, allowing repeated updates to the same LSN to avoid
620 + * repeated traversals.
624 - struct xfs_ail *ailp,
625 - struct list_head *list,
627 + struct xfs_ail *ailp,
628 + struct xfs_ail_cursor *cur,
629 + struct list_head *list,
632 - xfs_log_item_t *next_lip;
633 + struct xfs_log_item *lip = cur ? cur->item : NULL;
634 + struct xfs_log_item *next_lip;
636 - /* If the list is empty, just insert the item. */
637 - if (list_empty(&ailp->xa_ail)) {
638 - list_splice(list, &ailp->xa_ail);
641 + * Get a new cursor if we don't have a placeholder or the existing one
642 + * has been invalidated.
644 + if (!lip || (__psint_t)lip & 1) {
645 + lip = __xfs_trans_ail_cursor_last(ailp, lsn);
648 + /* The list is empty, so just splice and return. */
651 + list_splice(list, &ailp->xa_ail);
656 - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
657 - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
660 + * Our cursor points to the item we want to insert _after_, so we have
661 + * to update the cursor to point to the end of the list we are splicing
662 + * in so that it points to the correct location for the next splice.
663 + * i.e. before the splice
665 + * lsn -> lsn -> lsn + x -> lsn + x ...
667 + * | cursor points here
669 + * After the splice we have:
671 + * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
673 + * | cursor points here | needs to move here
675 + * So we set the cursor to the last item in the list to be spliced
676 + * before we execute the splice, resulting in the cursor pointing to
677 + * the correct item after the splice occurs.
680 + next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
681 + cur->item = next_lip;
684 - ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
685 - XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
687 - list_splice_init(list, &next_lip->li_ail);
688 + list_splice(list, &lip->li_ail);
692 @@ -645,6 +711,7 @@ xfs_trans_unlocked_item(
694 xfs_trans_ail_update_bulk(
695 struct xfs_ail *ailp,
696 + struct xfs_ail_cursor *cur,
697 struct xfs_log_item **log_items,
699 xfs_lsn_t lsn) __releases(ailp->xa_lock)
700 @@ -674,7 +741,7 @@ xfs_trans_ail_update_bulk(
701 list_add(&lip->li_ail, &tmp);
704 - xfs_ail_splice(ailp, &tmp, lsn);
705 + xfs_ail_splice(ailp, cur, &tmp, lsn);
708 spin_unlock(&ailp->xa_lock);
709 diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
710 index 6b164e9..c0cb408 100644
711 --- a/fs/xfs/xfs_trans_priv.h
712 +++ b/fs/xfs/xfs_trans_priv.h
713 @@ -82,6 +82,7 @@ struct xfs_ail {
714 extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
716 void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
717 + struct xfs_ail_cursor *cur,
718 struct xfs_log_item **log_items, int nr_items,
719 xfs_lsn_t lsn) __releases(ailp->xa_lock);
721 @@ -90,7 +91,7 @@ xfs_trans_ail_update(
722 struct xfs_log_item *lip,
723 xfs_lsn_t lsn) __releases(ailp->xa_lock)
725 - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
726 + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
729 void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
730 @@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
731 void xfs_trans_unlocked_item(struct xfs_ail *,
734 -struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
735 +struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
736 struct xfs_ail_cursor *cur,
738 -struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
739 +struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
740 + struct xfs_ail_cursor *cur,
742 +struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
743 struct xfs_ail_cursor *cur);
744 void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
745 struct xfs_ail_cursor *cur);
746 commit 37b652ec6445be99d0193047d1eda129a1a315d3
747 Author: Dave Chinner <dchinner@redhat.com>
748 Date: Thu Aug 25 07:17:01 2011 +0000
750 xfs: don't serialise direct IO reads on page cache checks
752 There is no need to grab the i_mutex of the IO lock in exclusive
753 mode if we don't need to invalidate the page cache. Taking these
754 locks on every direct IO effective serialises them as taking the IO
755 lock in exclusive mode has to wait for all shared holders to drop
756 the lock. That only happens when IO is complete, so effective it
757 prevents dispatch of concurrent direct IO reads to the same inode.
759 Fix this by taking the IO lock shared to check the page cache state,
760 and only then drop it and take the IO lock exclusively if there is
761 work to be done. Hence for the normal direct IO case, no exclusive
764 Signed-off-by: Dave Chinner <dchinner@redhat.com>
765 Tested-by: Joern Engel <joern@logfs.org>
766 Reviewed-by: Christoph Hellwig <hch@lst.de>
767 Signed-off-by: Alex Elder <aelder@sgi.com>
769 diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
770 index 7f7b424..8fd4a07 100644
771 --- a/fs/xfs/linux-2.6/xfs_file.c
772 +++ b/fs/xfs/linux-2.6/xfs_file.c
773 @@ -317,7 +317,19 @@ xfs_file_aio_read(
774 if (XFS_FORCED_SHUTDOWN(mp))
777 - if (unlikely(ioflags & IO_ISDIRECT)) {
779 + * Locking is a bit tricky here. If we take an exclusive lock
780 + * for direct IO, we effectively serialise all new concurrent
781 + * read IO to this file and block it behind IO that is currently in
782 + * progress because IO in progress holds the IO lock shared. We only
783 + * need to hold the lock exclusive to blow away the page cache, so
784 + * only take lock exclusively if the page cache needs invalidation.
785 + * This allows the normal direct IO case of no page cache pages to
786 + * proceeed concurrently without serialisation.
788 + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
789 + if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
790 + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
791 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
793 if (inode->i_mapping->nrpages) {
794 @@ -330,8 +342,7 @@ xfs_file_aio_read(
797 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
799 - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
802 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
805 From: Christoph Hellwig <hch@infradead.org>
806 Subject: [PATCH 1/4] xfs: do not update xa_last_pushed_lsn for locked items
808 If an item was locked we should not update xa_last_pushed_lsn and thus skip
809 it when restarting the AIL scan as we need to be able to lock and write it
810 out as soon as possible. Otherwise heavy lock contention might starve AIL
811 pushing too easily, especially given the larger backoff once we moved
812 xa_last_pushed_lsn all the way to the target lsn.
814 Signed-off-by: Christoph Hellwig <hch@lst.de>
815 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
816 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
818 Index: xfs/fs/xfs/xfs_trans_ail.c
819 ===================================================================
820 --- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-11 15:48:49.302003241 +0200
821 +++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-11 15:49:10.307505812 +0200
822 @@ -440,7 +440,6 @@ xfs_ail_worker(
824 case XFS_ITEM_LOCKED:
825 XFS_STATS_INC(xs_push_ail_locked);
826 - ailp->xa_last_pushed_lsn = lsn;
831 _______________________________________________
834 http://oss.sgi.com/mailman/listinfo/xfs
836 From: Christoph Hellwig <hch@infradead.org>
837 Subject: [PATCH 2/4] xfs: force the log if we encounter pinned buffers in
840 We need to check for pinned buffers even in .iop_pushbuf given that inode
841 items flush into the same buffers that may be pinned directly due operations
842 on the unlinked inode list operating directly on buffers. To do this add a
843 return value to .iop_pushbuf that tells the AIL push about this and use
844 the existing log force mechanisms to unpin it.
846 Signed-off-by: Christoph Hellwig <hch@lst.de>
847 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
848 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
850 Index: xfs/fs/xfs/quota/xfs_dquot_item.c
851 ===================================================================
852 --- xfs.orig/fs/xfs/quota/xfs_dquot_item.c 2011-10-11 15:48:49.290003546 +0200
853 +++ xfs/fs/xfs/quota/xfs_dquot_item.c 2011-10-11 15:49:17.727006849 +0200
854 @@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
855 * search the buffer cache can be a time consuming thing, and AIL lock is a
860 xfs_qm_dquot_logitem_pushbuf(
861 struct xfs_log_item *lip)
863 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
864 struct xfs_dquot *dqp = qlip->qli_dquot;
868 ASSERT(XFS_DQ_IS_LOCKED(dqp));
870 @@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
871 if (completion_done(&dqp->q_flush) ||
872 !(lip->li_flags & XFS_LI_IN_AIL)) {
878 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
879 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
884 if (XFS_BUF_ISDELAYWRITE(bp))
885 xfs_buf_delwri_promote(bp);
886 + if (XFS_BUF_ISPINNED(bp))
893 Index: xfs/fs/xfs/xfs_buf_item.c
894 ===================================================================
895 --- xfs.orig/fs/xfs/xfs_buf_item.c 2011-10-11 15:48:49.286004461 +0200
896 +++ xfs/fs/xfs/xfs_buf_item.c 2011-10-11 15:49:17.727006849 +0200
897 @@ -629,7 +629,7 @@ xfs_buf_item_push(
898 * the xfsbufd to get this buffer written. We have to unlock the buffer
899 * to allow the xfsbufd to write it, too.
903 xfs_buf_item_pushbuf(
904 struct xfs_log_item *lip)
906 @@ -643,6 +643,7 @@ xfs_buf_item_pushbuf(
908 xfs_buf_delwri_promote(bp);
914 Index: xfs/fs/xfs/xfs_inode_item.c
915 ===================================================================
916 --- xfs.orig/fs/xfs/xfs_inode_item.c 2011-10-11 15:48:40.750005198 +0200
917 +++ xfs/fs/xfs/xfs_inode_item.c 2011-10-11 15:49:17.735004729 +0200
918 @@ -708,13 +708,14 @@ xfs_inode_item_committed(
919 * marked delayed write. If that's the case, we'll promote it and that will
920 * allow the caller to write the buffer by triggering the xfsbufd to run.
924 xfs_inode_item_pushbuf(
925 struct xfs_log_item *lip)
927 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
928 struct xfs_inode *ip = iip->ili_inode;
932 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
934 @@ -725,7 +726,7 @@ xfs_inode_item_pushbuf(
935 if (completion_done(&ip->i_flush) ||
936 !(lip->li_flags & XFS_LI_IN_AIL)) {
937 xfs_iunlock(ip, XFS_ILOCK_SHARED);
942 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
943 @@ -733,10 +734,13 @@ xfs_inode_item_pushbuf(
945 xfs_iunlock(ip, XFS_ILOCK_SHARED);
949 if (XFS_BUF_ISDELAYWRITE(bp))
950 xfs_buf_delwri_promote(bp);
951 + if (XFS_BUF_ISPINNED(bp))
958 Index: xfs/fs/xfs/xfs_trans.h
959 ===================================================================
960 --- xfs.orig/fs/xfs/xfs_trans.h 2011-10-11 15:48:40.758004637 +0200
961 +++ xfs/fs/xfs/xfs_trans.h 2011-10-11 15:49:17.743032550 +0200
962 @@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
963 void (*iop_unlock)(xfs_log_item_t *);
964 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
965 void (*iop_push)(xfs_log_item_t *);
966 - void (*iop_pushbuf)(xfs_log_item_t *);
967 + bool (*iop_pushbuf)(xfs_log_item_t *);
968 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
971 Index: xfs/fs/xfs/xfs_trans_ail.c
972 ===================================================================
973 --- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-11 15:49:10.307505812 +0200
974 +++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-11 17:07:49.826504898 +0200
975 @@ -427,8 +427,13 @@ xfs_ail_worker(
977 case XFS_ITEM_PUSHBUF:
978 XFS_STATS_INC(xs_push_ail_pushbuf);
980 - ailp->xa_last_pushed_lsn = lsn;
982 + if (!IOP_PUSHBUF(lip)) {
986 + ailp->xa_last_pushed_lsn = lsn;
992 _______________________________________________
995 http://oss.sgi.com/mailman/listinfo/xfs
997 From: Christoph Hellwig <hch@infradead.org>
998 Subject: [PATCH 3/4] xfs: revert to using a kthread for AIL pushing
1000 Currently we have a few issues with the way the workqueue code is used to
1001 implement AIL pushing:
1003 - it accidentally uses the same workqueue as the syncer action, and thus
1004 can be prevented from running if there are enough sync actions active
1006 - it doesn't use the HIGHPRI flag to queue at the head of the queue of
1009 At this point I'm not confident enough in getting all the workqueue flags and
1010 tweaks right to provide a perfectly reliable execution context for AIL
1011 pushing, which is the most important piece in XFS to make forward progress
1014 Revert back to use a kthread per filesystem which fixes all the above issues
1015 at the cost of having a task struct and stack around for each mounted
1016 filesystem. In addition this also gives us much better ways to diagnose
1017 any issues involving hung AIL pushing and removes a small amount of code.
1019 Signed-off-by: Christoph Hellwig <hch@lst.de>
1020 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
1021 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
1023 Index: xfs/fs/xfs/linux-2.6/xfs_super.c
1024 ===================================================================
1025 --- xfs.orig/fs/xfs/linux-2.6/xfs_super.c 2011-10-11 15:48:49.000000000 +0200
1026 +++ xfs/fs/xfs/linux-2.6/xfs_super.c 2011-10-11 15:52:13.383505329 +0200
1027 @@ -1652,24 +1652,13 @@ xfs_init_workqueues(void)
1029 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1033 - xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1035 - goto out_destroy_syncd;
1041 - destroy_workqueue(xfs_syncd_wq);
1047 xfs_destroy_workqueues(void)
1049 - destroy_workqueue(xfs_ail_wq);
1050 destroy_workqueue(xfs_syncd_wq);
1053 Index: xfs/fs/xfs/xfs_trans_ail.c
1054 ===================================================================
1055 --- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-11 15:51:58.546005158 +0200
1056 +++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-11 15:52:13.383505329 +0200
1058 #include "xfs_trans_priv.h"
1059 #include "xfs_error.h"
1061 -struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
1065 * Check that the list is sorted as it should be.
1066 @@ -356,16 +354,10 @@ xfs_ail_delete(
1067 xfs_trans_ail_cursor_clear(ailp, lip);
1071 - * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
1072 - * to run at a later time if there is more work to do to complete the push.
1076 - struct work_struct *work)
1079 + struct xfs_ail *ailp)
1081 - struct xfs_ail *ailp = container_of(to_delayed_work(work),
1082 - struct xfs_ail, xa_work);
1083 xfs_mount_t *mp = ailp->xa_mount;
1084 struct xfs_ail_cursor cur;
1085 xfs_log_item_t *lip;
1086 @@ -505,20 +497,6 @@ out_done:
1087 /* We're past our target or empty, so idle */
1088 ailp->xa_last_pushed_lsn = 0;
1091 - * We clear the XFS_AIL_PUSHING_BIT first before checking
1092 - * whether the target has changed. If the target has changed,
1093 - * this pushes the requeue race directly onto the result of the
1094 - * atomic test/set bit, so we are guaranteed that either the
1095 - * the pusher that changed the target or ourselves will requeue
1096 - * the work (but not both).
1098 - clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
1100 - if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
1101 - test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
1105 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
1107 @@ -541,9 +519,30 @@ out_done:
1111 - /* There is more to do, requeue us. */
1112 - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
1113 - msecs_to_jiffies(tout));
1121 + struct xfs_ail *ailp = data;
1122 + long tout = 0; /* milliseconds */
1124 + while (!kthread_should_stop()) {
1125 + if (tout && tout <= 20)
1126 + __set_current_state(TASK_KILLABLE);
1128 + __set_current_state(TASK_INTERRUPTIBLE);
1129 + schedule_timeout(tout ?
1130 + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
1134 + tout = xfsaild_push(ailp);
1141 @@ -578,8 +577,9 @@ xfs_ail_push(
1144 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
1145 - if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
1146 - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
1149 + wake_up_process(ailp->xa_task);
1153 @@ -817,9 +817,18 @@ xfs_trans_ail_init(
1154 INIT_LIST_HEAD(&ailp->xa_ail);
1155 INIT_LIST_HEAD(&ailp->xa_cursors);
1156 spin_lock_init(&ailp->xa_lock);
1157 - INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
1159 + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
1160 + ailp->xa_mount->m_fsname);
1161 + if (IS_ERR(ailp->xa_task))
1162 + goto out_free_ailp;
1173 @@ -828,6 +837,6 @@ xfs_trans_ail_destroy(
1175 struct xfs_ail *ailp = mp->m_ail;
1177 - cancel_delayed_work_sync(&ailp->xa_work);
1178 + kthread_stop(ailp->xa_task);
1181 Index: xfs/fs/xfs/xfs_trans_priv.h
1182 ===================================================================
1183 --- xfs.orig/fs/xfs/xfs_trans_priv.h 2011-10-11 15:48:40.000000000 +0200
1184 +++ xfs/fs/xfs/xfs_trans_priv.h 2011-10-11 15:53:35.382504829 +0200
1185 @@ -64,23 +64,17 @@ struct xfs_ail_cursor {
1188 struct xfs_mount *xa_mount;
1189 + struct task_struct *xa_task;
1190 struct list_head xa_ail;
1191 xfs_lsn_t xa_target;
1192 struct xfs_ail_cursor xa_cursors;
1194 - struct delayed_work xa_work;
1195 xfs_lsn_t xa_last_pushed_lsn;
1196 - unsigned long xa_flags;
1199 -#define XFS_AIL_PUSHING_BIT 0
1202 * From xfs_trans_ail.c
1205 -extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
1207 void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
1208 struct xfs_ail_cursor *cur,
1209 struct xfs_log_item **log_items, int nr_items,
1210 Index: xfs/fs/xfs/linux-2.6/xfs_linux.h
1211 ===================================================================
1212 --- xfs.orig/fs/xfs/linux-2.6/xfs_linux.h 2011-10-11 15:48:49.000000000 +0200
1213 +++ xfs/fs/xfs/linux-2.6/xfs_linux.h 2011-10-11 15:52:13.383505329 +0200
1215 #include <linux/ctype.h>
1216 #include <linux/writeback.h>
1217 #include <linux/capability.h>
1218 +#include <linux/kthread.h>
1219 +#include <linux/freezer.h>
1220 #include <linux/list_sort.h>
1222 #include <asm/page.h>
1224 _______________________________________________
1227 http://oss.sgi.com/mailman/listinfo/xfs
1229 --- linux-3.0/include/linux/vermagic.h~ 2011-10-14 19:55:36.000000000 +0200
1230 +++ linux-3.0/include/linux/vermagic.h 2011-10-16 15:45:11.251098590 +0200
1234 #ifdef CONSTIFY_PLUGIN
1235 -#define MODULE_CONSTIFY_PLUGIN "CONSTIFY_PLUGIN "
1236 +#define MODULE_CONSTIFY_PLUGIN ""
1238 #define MODULE_CONSTIFY_PLUGIN ""