1 --- linux-2.6.32/drivers/infiniband/Kconfig~ 2009-12-05 00:26:03.663774916 +0100
2 +++ linux-2.6.32/drivers/infiniband/Kconfig 2009-12-05 00:26:05.914179759 +0100
4 config INFINIBAND_ADDR_TRANS
7 - depends on !(INFINIBAND = y && IPV6 = m)
10 source "drivers/infiniband/hw/mthca/Kconfig"
11 --- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100
12 +++ linux-2.6.33/scripts/mod/modpost.c 2010-03-07 14:26:47.242168558 +0100
17 -#include "../../include/generated/autoconf.h"
18 +// PLD architectures don't use CONFIG_SYMBOL_PREFIX
19 +//#include "../../include/generated/autoconf.h"
20 #include "../../include/linux/license.h"
22 /* Some toolchains use a `_' prefix for all user symbols. */
24 commit 87b09f1f25cd1e01d7c50bf423c7fe33027d7511
25 Author: stephen hemminger <shemminger@vyatta.com>
26 Date: Fri Feb 12 06:58:00 2010 +0000
28 sky2: dont enable PME legacy mode
30 This bit is not changed by vendor driver, and should be left alone.
31 The documentation implies this a debug bit.
32 0 = WAKE# only asserted when VMAIN not available
33 1 = WAKE# is depend on wake events and independent of VMAIN.
35 Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
36 Signed-off-by: David S. Miller <davem@davemloft.net>
38 diff --git b/drivers/net/sky2.c a/drivers/net/sky2.c
39 index 2494842..edf37aa 100644
40 --- b/drivers/net/sky2.c
41 +++ a/drivers/net/sky2.c
42 @@ -733,6 +733,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
43 unsigned port = sky2->port;
44 enum flow_control save_mode;
48 /* Bring hardware out of reset */
49 sky2_write16(hw, B0_CTST, CS_RST_CLR);
50 @@ -786,6 +787,11 @@ static void sky2_wol_init(struct sky2_port *sky2)
51 /* Disable PiG firmware */
52 sky2_write16(hw, B0_CTST, Y2_HW_WOL_OFF);
54 + /* Turn on legacy PCI-Express PME mode */
55 + reg1 = sky2_pci_read32(hw, PCI_DEV_REG1);
56 + reg1 |= PCI_Y2_PME_LEGACY;
57 + sky2_pci_write32(hw, PCI_DEV_REG1, reg1);
60 sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
62 On Sat, 2 Jul 2011, Andi Kleen wrote:
64 > > The problem is that blk_peek_request() calls scsi_prep_fn(), which
67 > > struct scsi_device *sdev = q->queuedata;
68 > > int ret = BLKPREP_KILL;
70 > > if (req->cmd_type == REQ_TYPE_BLOCK_PC)
71 > > ret = scsi_setup_blk_pc_cmnd(sdev, req);
72 > > return scsi_prep_return(q, req, ret);
74 > > It doesn't check to see if sdev is NULL, nor does
75 > > scsi_setup_blk_pc_cmnd(). That accounts for this error:
77 > I actually added a NULL check in scsi_setup_blk_pc_cmnd early on,
78 > but that just caused RCU CPU stalls afterwards and then eventually
81 The RCU problem is likely to be a separate issue. It might even be a
82 result of the use-after-free problem with the elevator.
84 At any rate, it's clear that the crash in the refcounting log you
85 posted occurred because scsi_setup_blk_pc_cmnd() called
86 scsi_prep_state_check(), which tried to dereference the NULL pointer.
88 Would you like to try this patch to see if it fixes the problem? As I
89 said before, I'm not certain it's the best thing to do, but it worked
97 Index: usb-3.0/drivers/scsi/scsi_lib.c
98 ===================================================================
99 --- usb-3.0.orig/drivers/scsi/scsi_lib.c
100 +++ usb-3.0/drivers/scsi/scsi_lib.c
101 @@ -1247,6 +1247,8 @@ int scsi_prep_fn(struct request_queue *q
102 struct scsi_device *sdev = q->queuedata;
103 int ret = BLKPREP_KILL;
107 if (req->cmd_type == REQ_TYPE_BLOCK_PC)
108 ret = scsi_setup_blk_pc_cmnd(sdev, req);
109 return scsi_prep_return(q, req, ret);
110 Index: usb-3.0/drivers/scsi/scsi_sysfs.c
111 ===================================================================
112 --- usb-3.0.orig/drivers/scsi/scsi_sysfs.c
113 +++ usb-3.0/drivers/scsi/scsi_sysfs.c
114 @@ -322,6 +322,8 @@ static void scsi_device_dev_release_user
118 + /* Freeing the queue signals to block that we're done */
119 + scsi_free_queue(sdev->request_queue);
120 blk_put_queue(sdev->request_queue);
121 /* NULL queue means the device can't be used */
122 sdev->request_queue = NULL;
123 @@ -936,8 +938,6 @@ void __scsi_remove_device(struct scsi_de
124 /* cause the request function to reject all I/O requests */
125 sdev->request_queue->queuedata = NULL;
127 - /* Freeing the queue signals to block that we're done */
128 - scsi_free_queue(sdev->request_queue);
135 To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
136 the body of a message to majordomo@vger.kernel.org
137 More majordomo info at http://vger.kernel.org/majordomo-info.html
138 Please read the FAQ at http://www.tux.org/lkml/
139 commit 3326c784c9f492e988617d93f647ae0cfd4c8d09
140 Author: Jiri Pirko <jpirko@redhat.com>
141 Date: Wed Jul 20 04:54:38 2011 +0000
143 forcedeth: do vlan cleanup
145 - unify vlan and nonvlan rx path
146 - kill np->vlangrp and nv_vlan_rx_register
147 - allow to turn on/off rx vlan accel via ethtool (set_features)
149 Signed-off-by: Jiri Pirko <jpirko@redhat.com>
150 Signed-off-by: David S. Miller <davem@davemloft.net>
152 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
153 index 537b695..e64cd9c 100644
154 --- a/drivers/net/forcedeth.c
155 +++ b/drivers/net/forcedeth.c
156 @@ -820,9 +820,6 @@ struct fe_priv {
157 struct nv_skb_map *tx_end_flip;
161 - struct vlan_group *vlangrp;
163 /* msi/msi-x fields */
165 struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
166 @@ -2766,17 +2763,13 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
167 skb->protocol = eth_type_trans(skb, dev);
170 - if (likely(!np->vlangrp)) {
171 - napi_gro_receive(&np->napi, skb);
173 - vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
174 - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
175 - vlan_gro_receive(&np->napi, np->vlangrp,
176 - vlanflags & NV_RX3_VLAN_TAG_MASK, skb);
178 - napi_gro_receive(&np->napi, skb);
180 + vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
181 + if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
182 + u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
184 + __vlan_hwaccel_put_tag(skb, vid);
186 + napi_gro_receive(&np->napi, skb);
188 dev->stats.rx_packets++;
189 dev->stats.rx_bytes += len;
190 @@ -4484,6 +4477,27 @@ static u32 nv_fix_features(struct net_device *dev, u32 features)
194 +static void nv_vlan_mode(struct net_device *dev, u32 features)
196 + struct fe_priv *np = get_nvpriv(dev);
198 + spin_lock_irq(&np->lock);
200 + if (features & NETIF_F_HW_VLAN_RX)
201 + np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP;
203 + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
205 + if (features & NETIF_F_HW_VLAN_TX)
206 + np->txrxctl_bits |= NVREG_TXRXCTL_VLANINS;
208 + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
210 + writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
212 + spin_unlock_irq(&np->lock);
215 static int nv_set_features(struct net_device *dev, u32 features)
217 struct fe_priv *np = netdev_priv(dev);
218 @@ -4504,6 +4518,9 @@ static int nv_set_features(struct net_device *dev, u32 features)
219 spin_unlock_irq(&np->lock);
222 + if (changed & (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX))
223 + nv_vlan_mode(dev, features);
228 @@ -4879,29 +4896,6 @@ static const struct ethtool_ops ops = {
229 .self_test = nv_self_test,
232 -static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
234 - struct fe_priv *np = get_nvpriv(dev);
236 - spin_lock_irq(&np->lock);
238 - /* save vlan group */
242 - /* enable vlan on MAC */
243 - np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS;
245 - /* disable vlan on MAC */
246 - np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
247 - np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
250 - writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
252 - spin_unlock_irq(&np->lock);
255 /* The mgmt unit and driver use a semaphore to access the phy during init */
256 static int nv_mgmt_acquire_sema(struct net_device *dev)
258 @@ -5208,7 +5202,6 @@ static const struct net_device_ops nv_netdev_ops = {
259 .ndo_validate_addr = eth_validate_addr,
260 .ndo_set_mac_address = nv_set_mac_address,
261 .ndo_set_multicast_list = nv_set_multicast,
262 - .ndo_vlan_rx_register = nv_vlan_rx_register,
263 #ifdef CONFIG_NET_POLL_CONTROLLER
264 .ndo_poll_controller = nv_poll_controller,
266 @@ -5226,7 +5219,6 @@ static const struct net_device_ops nv_netdev_ops_optimized = {
267 .ndo_validate_addr = eth_validate_addr,
268 .ndo_set_mac_address = nv_set_mac_address,
269 .ndo_set_multicast_list = nv_set_multicast,
270 - .ndo_vlan_rx_register = nv_vlan_rx_register,
271 #ifdef CONFIG_NET_POLL_CONTROLLER
272 .ndo_poll_controller = nv_poll_controller,
274 commit 0891b0e08937aaec2c4734acb94c5ff8042313bb
275 Author: Jiri Pirko <jpirko@redhat.com>
276 Date: Tue Jul 26 10:19:28 2011 +0000
280 For some reason, when rxaccel is disabled, NV_RX3_VLAN_TAG_PRESENT is
281 still set and some pseudorandom vids appear. So check for
282 NETIF_F_HW_VLAN_RX as well. Also set correctly hw_features and set vlan
285 Signed-off-by: Jiri Pirko <jpirko@redhat.com>
286 Signed-off-by: David S. Miller <davem@davemloft.net>
288 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
289 index e64cd9c..e55df30 100644
290 --- a/drivers/net/forcedeth.c
291 +++ b/drivers/net/forcedeth.c
292 @@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
295 vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
296 - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
299 + * There's need to check for NETIF_F_HW_VLAN_RX here.
300 + * Even if vlan rx accel is disabled,
301 + * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set.
303 + if (dev->features & NETIF_F_HW_VLAN_RX &&
304 + vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
305 u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
307 __vlan_hwaccel_put_tag(skb, vid);
308 @@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
309 np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
310 dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG |
311 NETIF_F_TSO | NETIF_F_RXCSUM;
312 - dev->features |= dev->hw_features;
315 np->vlanctl_bits = 0;
316 if (id->driver_data & DEV_HAS_VLAN) {
317 np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
318 - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
319 + dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
322 + dev->features |= dev->hw_features;
324 np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
325 if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
326 (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
327 @@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
331 + nv_vlan_mode(dev, dev->features);
333 netif_carrier_off(dev);
335 dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
336 --- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~ 2011-07-22 04:17:23.000000000 +0200
337 +++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh 2011-08-25 21:26:04.799150642 +0200
339 $cc -print-file-name=lib${lib}.${ext} | grep -q /
340 if [ $? -eq 0 ]; then
342 + for libt in tinfow tinfo ; do
343 + $cc -print-file-name=lib${libt}.${ext} | grep -q /
344 + if [ $? -eq 0 ]; then
351 commit 37b652ec6445be99d0193047d1eda129a1a315d3
352 Author: Dave Chinner <dchinner@redhat.com>
353 Date: Thu Aug 25 07:17:01 2011 +0000
355 xfs: don't serialise direct IO reads on page cache checks
357 There is no need to grab the i_mutex of the IO lock in exclusive
358 mode if we don't need to invalidate the page cache. Taking these
359 locks on every direct IO effective serialises them as taking the IO
360 lock in exclusive mode has to wait for all shared holders to drop
361 the lock. That only happens when IO is complete, so effective it
362 prevents dispatch of concurrent direct IO reads to the same inode.
364 Fix this by taking the IO lock shared to check the page cache state,
365 and only then drop it and take the IO lock exclusively if there is
366 work to be done. Hence for the normal direct IO case, no exclusive
369 Signed-off-by: Dave Chinner <dchinner@redhat.com>
370 Tested-by: Joern Engel <joern@logfs.org>
371 Reviewed-by: Christoph Hellwig <hch@lst.de>
372 Signed-off-by: Alex Elder <aelder@sgi.com>
374 diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
375 index 7f7b424..8fd4a07 100644
376 --- a/fs/xfs/linux-2.6/xfs_file.c
377 +++ b/fs/xfs/linux-2.6/xfs_file.c
378 @@ -317,7 +317,19 @@ xfs_file_aio_read(
379 if (XFS_FORCED_SHUTDOWN(mp))
382 - if (unlikely(ioflags & IO_ISDIRECT)) {
384 + * Locking is a bit tricky here. If we take an exclusive lock
385 + * for direct IO, we effectively serialise all new concurrent
386 + * read IO to this file and block it behind IO that is currently in
387 + * progress because IO in progress holds the IO lock shared. We only
388 + * need to hold the lock exclusive to blow away the page cache, so
389 + * only take lock exclusively if the page cache needs invalidation.
390 + * This allows the normal direct IO case of no page cache pages to
391 + * proceeed concurrently without serialisation.
393 + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
394 + if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
395 + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
396 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
398 if (inode->i_mapping->nrpages) {
399 @@ -330,8 +342,7 @@ xfs_file_aio_read(
402 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
404 - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
407 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
412 Start the periodic sync workers only after we have finished xfs_mountfs
413 and thus fully set up the filesystem structures. Without this we can
414 call into xfs_qm_sync before the quotainfo strucute is set up if the
415 mount takes unusually long, and probably hit other incomplete states
418 Also clean up the xfs_fs_fill_super error path by using consistent
419 label names, and removing an impossible to reach case.
421 Signed-off-by: Christoph Hellwig <hch@lst.de>
422 Reported-by: Arkadiusz Miskiewicz <arekm@maven.pl>
423 Reviewed-by: Alex Elder <aelder@sgi.com>
425 diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
426 index a1a881e..3ebb458 100644
427 --- a/fs/xfs/linux-2.6/xfs_super.c
428 +++ b/fs/xfs/linux-2.6/xfs_super.c
429 @@ -1412,37 +1412,35 @@ xfs_fs_fill_super(
431 set_posix_acl_flag(sb);
433 - error = xfs_syncd_init(mp);
435 - goto out_filestream_unmount;
437 xfs_inode_shrinker_register(mp);
439 error = xfs_mountfs(mp);
441 - goto out_syncd_stop;
442 + goto out_filestream_unmount;
444 + error = xfs_syncd_init(mp);
448 root = igrab(VFS_I(mp->m_rootip));
452 + goto out_syncd_stop;
454 if (is_bad_inode(root)) {
457 + goto out_syncd_stop;
459 sb->s_root = d_alloc_root(root);
469 - xfs_inode_shrinker_unregister(mp);
470 - xfs_syncd_stop(mp);
471 out_filestream_unmount:
472 + xfs_inode_shrinker_unregister(mp);
473 xfs_filestream_unmount(mp);
476 @@ -1456,17 +1454,12 @@ xfs_fs_fill_super(
489 - xfs_inode_shrinker_unregister(mp);
495 + xfs_inode_shrinker_unregister(mp);
498 * Blow away any referenced inode in the filestreams cache.
500 _______________________________________________
503 http://oss.sgi.com/mailman/listinfo/xfs
506 From: Dave Chinner <dchinner@redhat.com>
508 commit 1d8c95a363bf8cd4d4182dd19c01693b635311c2 upstream
511 xfs: use a cursor for bulk AIL insertion
513 Delayed logging can insert tens of thousands of log items into the
514 AIL at the same LSN. When the committing of log commit records
515 occur, we can get insertions occurring at an LSN that is not at the
516 end of the AIL. If there are thousands of items in the AIL on the
517 tail LSN, each insertion has to walk the AIL to find the correct
518 place to insert the new item into the AIL. This can consume large
519 amounts of CPU time and block other operations from occurring while
520 the traversals are in progress.
522 To avoid this repeated walk, use a AIL cursor to record
523 where we should be inserting the new items into the AIL without
524 having to repeat the walk. The cursor infrastructure already
525 provides this functionality for push walks, so is a simple extension
526 of existing code. While this will not avoid the initial walk, it
527 will avoid repeating it tens of thousands of times during a single
530 This version includes logic improvements from Christoph Hellwig.
532 Signed-off-by: Dave Chinner <dchinner@redhat.com>
533 Reviewed-by: Christoph Hellwig <hch@lst.de>
534 Signed-off-by: Alex Elder <aelder@sgi.com>
536 diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
537 index c83f63b..efc147f 100644
538 --- a/fs/xfs/xfs_trans.c
539 +++ b/fs/xfs/xfs_trans.c
540 @@ -1426,6 +1426,7 @@ xfs_trans_committed(
542 xfs_log_item_batch_insert(
543 struct xfs_ail *ailp,
544 + struct xfs_ail_cursor *cur,
545 struct xfs_log_item **log_items,
547 xfs_lsn_t commit_lsn)
548 @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
550 spin_lock(&ailp->xa_lock);
551 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
552 - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
553 + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
555 for (i = 0; i < nr_items; i++)
556 IOP_UNPIN(log_items[i], 0);
557 @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
558 * as an iclog write error even though we haven't started any IO yet. Hence in
559 * this case all we need to do is IOP_COMMITTED processing, followed by an
560 * IOP_UNPIN(aborted) call.
562 + * The AIL cursor is used to optimise the insert process. If commit_lsn is not
563 + * at the end of the AIL, the insert cursor avoids the need to walk
564 + * the AIL to find the insertion point on every xfs_log_item_batch_insert()
565 + * call. This saves a lot of needless list walking and is a net win, even
566 + * though it slightly increases that amount of AIL lock traffic to set it up
567 + * and tear it down.
570 xfs_trans_committed_bulk(
571 @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
572 #define LOG_ITEM_BATCH_SIZE 32
573 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
574 struct xfs_log_vec *lv;
575 + struct xfs_ail_cursor cur;
578 + spin_lock(&ailp->xa_lock);
579 + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
580 + spin_unlock(&ailp->xa_lock);
582 /* unpin all the log items */
583 for (lv = log_vector; lv; lv = lv->lv_next ) {
584 struct xfs_log_item *lip = lv->lv_item;
585 @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
587 * Not a bulk update option due to unusual item_lsn.
588 * Push into AIL immediately, rechecking the lsn once
589 - * we have the ail lock. Then unpin the item.
590 + * we have the ail lock. Then unpin the item. This does
591 + * not affect the AIL cursor the bulk insert path is
594 spin_lock(&ailp->xa_lock);
595 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
596 @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
597 /* Item is a candidate for bulk AIL insert. */
598 log_items[i++] = lv->lv_item;
599 if (i >= LOG_ITEM_BATCH_SIZE) {
600 - xfs_log_item_batch_insert(ailp, log_items,
601 + xfs_log_item_batch_insert(ailp, &cur, log_items,
602 LOG_ITEM_BATCH_SIZE, commit_lsn);
605 @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
607 /* make sure we insert the remainder! */
609 - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
610 + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
612 + spin_lock(&ailp->xa_lock);
613 + xfs_trans_ail_cursor_done(ailp, &cur);
614 + spin_unlock(&ailp->xa_lock);
618 diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
619 index 5fc2380..9a69dc0 100644
620 --- a/fs/xfs/xfs_trans_ail.c
621 +++ b/fs/xfs/xfs_trans_ail.c
622 @@ -272,9 +272,9 @@ xfs_trans_ail_cursor_clear(
626 - * Return the item in the AIL with the current lsn.
627 - * Return the current tree generation number for use
628 - * in calls to xfs_trans_next_ail().
629 + * Initialise the cursor to the first item in the AIL with the given @lsn.
630 + * This searches the list from lowest LSN to highest. Pass a @lsn of zero
631 + * to initialise the cursor to the first item in the AIL.
634 xfs_trans_ail_cursor_first(
635 @@ -300,31 +300,97 @@ out:
639 - * splice the log item list into the AIL at the given LSN.
640 + * Initialise the cursor to the last item in the AIL with the given @lsn.
641 + * This searches the list from highest LSN to lowest. If there is no item with
642 + * the value of @lsn, then it sets the cursor to the last item with an LSN lower
645 +static struct xfs_log_item *
646 +__xfs_trans_ail_cursor_last(
647 + struct xfs_ail *ailp,
650 + xfs_log_item_t *lip;
652 + list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
653 + if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
660 + * Initialise the cursor to the last item in the AIL with the given @lsn.
661 + * This searches the list from highest LSN to lowest.
663 +struct xfs_log_item *
664 +xfs_trans_ail_cursor_last(
665 + struct xfs_ail *ailp,
666 + struct xfs_ail_cursor *cur,
669 + xfs_trans_ail_cursor_init(ailp, cur);
670 + cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
675 + * splice the log item list into the AIL at the given LSN. We splice to the
676 + * tail of the given LSN to maintain insert order for push traversals. The
677 + * cursor is optional, allowing repeated updates to the same LSN to avoid
678 + * repeated traversals.
682 - struct xfs_ail *ailp,
683 - struct list_head *list,
685 + struct xfs_ail *ailp,
686 + struct xfs_ail_cursor *cur,
687 + struct list_head *list,
690 - xfs_log_item_t *next_lip;
691 + struct xfs_log_item *lip = cur ? cur->item : NULL;
692 + struct xfs_log_item *next_lip;
694 - /* If the list is empty, just insert the item. */
695 - if (list_empty(&ailp->xa_ail)) {
696 - list_splice(list, &ailp->xa_ail);
699 + * Get a new cursor if we don't have a placeholder or the existing one
700 + * has been invalidated.
702 + if (!lip || (__psint_t)lip & 1) {
703 + lip = __xfs_trans_ail_cursor_last(ailp, lsn);
706 + /* The list is empty, so just splice and return. */
709 + list_splice(list, &ailp->xa_ail);
714 - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
715 - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
718 + * Our cursor points to the item we want to insert _after_, so we have
719 + * to update the cursor to point to the end of the list we are splicing
720 + * in so that it points to the correct location for the next splice.
721 + * i.e. before the splice
723 + * lsn -> lsn -> lsn + x -> lsn + x ...
725 + * | cursor points here
727 + * After the splice we have:
729 + * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
731 + * | cursor points here | needs to move here
733 + * So we set the cursor to the last item in the list to be spliced
734 + * before we execute the splice, resulting in the cursor pointing to
735 + * the correct item after the splice occurs.
738 + next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
739 + cur->item = next_lip;
742 - ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
743 - XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
745 - list_splice_init(list, &next_lip->li_ail);
746 + list_splice(list, &lip->li_ail);
750 @@ -645,6 +711,7 @@ xfs_trans_unlocked_item(
752 xfs_trans_ail_update_bulk(
753 struct xfs_ail *ailp,
754 + struct xfs_ail_cursor *cur,
755 struct xfs_log_item **log_items,
757 xfs_lsn_t lsn) __releases(ailp->xa_lock)
758 @@ -674,7 +741,7 @@ xfs_trans_ail_update_bulk(
759 list_add(&lip->li_ail, &tmp);
762 - xfs_ail_splice(ailp, &tmp, lsn);
763 + xfs_ail_splice(ailp, cur, &tmp, lsn);
766 spin_unlock(&ailp->xa_lock);
767 diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
768 index 6b164e9..c0cb408 100644
769 --- a/fs/xfs/xfs_trans_priv.h
770 +++ b/fs/xfs/xfs_trans_priv.h
771 @@ -82,6 +82,7 @@ struct xfs_ail {
772 extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
774 void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
775 + struct xfs_ail_cursor *cur,
776 struct xfs_log_item **log_items, int nr_items,
777 xfs_lsn_t lsn) __releases(ailp->xa_lock);
779 @@ -90,7 +91,7 @@ xfs_trans_ail_update(
780 struct xfs_log_item *lip,
781 xfs_lsn_t lsn) __releases(ailp->xa_lock)
783 - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
784 + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
787 void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
788 @@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
789 void xfs_trans_unlocked_item(struct xfs_ail *,
792 -struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
793 +struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
794 struct xfs_ail_cursor *cur,
796 -struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
797 +struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
798 + struct xfs_ail_cursor *cur,
800 +struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
801 struct xfs_ail_cursor *cur);
802 void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
803 struct xfs_ail_cursor *cur);
805 _______________________________________________
808 http://oss.sgi.com/mailman/listinfo/xfs
811 commit bc6e588a8971aa74c02e42db4d6e0248679f3738 upstream
813 If an item was locked we should not update xa_last_pushed_lsn and thus skip
814 it when restarting the AIL scan as we need to be able to lock and write it
815 out as soon as possible. Otherwise heavy lock contention might starve AIL
816 pushing too easily, especially given the larger backoff once we moved
817 xa_last_pushed_lsn all the way to the target lsn.
819 Signed-off-by: Christoph Hellwig <hch@lst.de>
820 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
821 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
823 Index: xfs/fs/xfs/xfs_trans_ail.c
824 ===================================================================
825 --- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-14 14:42:03.004395373 +0200
826 +++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-14 14:42:22.687898198 +0200
827 @@ -491,7 +491,6 @@ xfs_ail_worker(
829 case XFS_ITEM_LOCKED:
830 XFS_STATS_INC(xs_push_ail_locked);
831 - ailp->xa_last_pushed_lsn = lsn;
836 _______________________________________________
839 http://oss.sgi.com/mailman/listinfo/xfs
842 commit 17b38471c3c07a49f0bbc2ecc2e92050c164e226 upstream
844 We need to check for pinned buffers even in .iop_pushbuf given that inode
845 items flush into the same buffers that may be pinned directly due operations
846 on the unlinked inode list operating directly on buffers. To do this add a
847 return value to .iop_pushbuf that tells the AIL push about this and use
848 the existing log force mechanisms to unpin it.
850 Signed-off-by: Christoph Hellwig <hch@lst.de>
851 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
852 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
854 Index: xfs/fs/xfs/quota/xfs_dquot_item.c
855 ===================================================================
856 --- xfs.orig/fs/xfs/quota/xfs_dquot_item.c 2011-10-14 14:41:41.036231498 +0200
857 +++ xfs/fs/xfs/quota/xfs_dquot_item.c 2011-10-14 14:44:09.276394842 +0200
858 @@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
859 * search the buffer cache can be a time consuming thing, and AIL lock is a
864 xfs_qm_dquot_logitem_pushbuf(
865 struct xfs_log_item *lip)
867 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
868 struct xfs_dquot *dqp = qlip->qli_dquot;
872 ASSERT(XFS_DQ_IS_LOCKED(dqp));
874 @@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
875 if (completion_done(&dqp->q_flush) ||
876 !(lip->li_flags & XFS_LI_IN_AIL)) {
882 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
883 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
888 if (XFS_BUF_ISDELAYWRITE(bp))
889 xfs_buf_delwri_promote(bp);
890 + if (XFS_BUF_ISPINNED(bp))
897 Index: xfs/fs/xfs/xfs_buf_item.c
898 ===================================================================
899 --- xfs.orig/fs/xfs/xfs_buf_item.c 2011-10-14 14:41:41.000000000 +0200
900 +++ xfs/fs/xfs/xfs_buf_item.c 2011-10-14 14:44:24.367895813 +0200
901 @@ -632,7 +632,7 @@ xfs_buf_item_push(
902 * the xfsbufd to get this buffer written. We have to unlock the buffer
903 * to allow the xfsbufd to write it, too.
907 xfs_buf_item_pushbuf(
908 struct xfs_log_item *lip)
910 @@ -646,6 +646,7 @@ xfs_buf_item_pushbuf(
912 xfs_buf_delwri_promote(bp);
918 Index: xfs/fs/xfs/xfs_inode_item.c
919 ===================================================================
920 --- xfs.orig/fs/xfs/xfs_inode_item.c 2011-10-14 14:41:41.000000000 +0200
921 +++ xfs/fs/xfs/xfs_inode_item.c 2011-10-14 14:44:19.323950541 +0200
922 @@ -713,13 +713,14 @@ xfs_inode_item_committed(
923 * marked delayed write. If that's the case, we'll promote it and that will
924 * allow the caller to write the buffer by triggering the xfsbufd to run.
928 xfs_inode_item_pushbuf(
929 struct xfs_log_item *lip)
931 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
932 struct xfs_inode *ip = iip->ili_inode;
936 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
938 @@ -730,7 +731,7 @@ xfs_inode_item_pushbuf(
939 if (completion_done(&ip->i_flush) ||
940 !(lip->li_flags & XFS_LI_IN_AIL)) {
941 xfs_iunlock(ip, XFS_ILOCK_SHARED);
946 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
947 @@ -738,10 +739,13 @@ xfs_inode_item_pushbuf(
949 xfs_iunlock(ip, XFS_ILOCK_SHARED);
953 if (XFS_BUF_ISDELAYWRITE(bp))
954 xfs_buf_delwri_promote(bp);
955 + if (XFS_BUF_ISPINNED(bp))
962 Index: xfs/fs/xfs/xfs_trans.h
963 ===================================================================
964 --- xfs.orig/fs/xfs/xfs_trans.h 2011-10-14 14:41:41.000000000 +0200
965 +++ xfs/fs/xfs/xfs_trans.h 2011-10-14 14:43:45.308394072 +0200
966 @@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
967 void (*iop_unlock)(xfs_log_item_t *);
968 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
969 void (*iop_push)(xfs_log_item_t *);
970 - void (*iop_pushbuf)(xfs_log_item_t *);
971 + bool (*iop_pushbuf)(xfs_log_item_t *);
972 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
975 Index: xfs/fs/xfs/xfs_trans_ail.c
976 ===================================================================
977 --- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-14 14:42:22.000000000 +0200
978 +++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-14 14:43:45.316393949 +0200
979 @@ -478,8 +478,13 @@ xfs_ail_worker(
981 case XFS_ITEM_PUSHBUF:
982 XFS_STATS_INC(xs_push_ail_pushbuf);
984 - ailp->xa_last_pushed_lsn = lsn;
986 + if (!IOP_PUSHBUF(lip)) {
990 + ailp->xa_last_pushed_lsn = lsn;
996 _______________________________________________
999 http://oss.sgi.com/mailman/listinfo/xfs
1002 commit 0030807c66f058230bcb20d2573bcaf28852e804 upstream
1004 Currently we have a few issues with the way the workqueue code is used to
1005 implement AIL pushing:
1007 - it accidentally uses the same workqueue as the syncer action, and thus
1008 can be prevented from running if there are enough sync actions active
1010 - it doesn't use the HIGHPRI flag to queue at the head of the queue of
1013 At this point I'm not confident enough in getting all the workqueue flags and
1014 tweaks right to provide a perfectly reliable execution context for AIL
1015 pushing, which is the most important piece in XFS to make forward progress
1018 Revert back to use a kthread per filesystem which fixes all the above issues
1019 at the cost of having a task struct and stack around for each mounted
1020 filesystem. In addition this also gives us much better ways to diagnose
1021 any issues involving hung AIL pushing and removes a small amount of code.
1023 Signed-off-by: Christoph Hellwig <hch@lst.de>
1024 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
1025 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
1027 Index: xfs/fs/xfs/xfs_trans_ail.c
1028 ===================================================================
1029 --- xfs.orig/fs/xfs/xfs_trans_ail.c 2011-10-14 14:43:45.316393949 +0200
1030 +++ xfs/fs/xfs/xfs_trans_ail.c 2011-10-14 14:45:11.937395278 +0200
1032 #include "xfs_trans_priv.h"
1033 #include "xfs_error.h"
1035 -struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
1039 * Check that the list is sorted as it should be.
1040 @@ -406,16 +404,10 @@ xfs_ail_delete(
1041 xfs_trans_ail_cursor_clear(ailp, lip);
1045 - * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
1046 - * to run at a later time if there is more work to do to complete the push.
1050 - struct work_struct *work)
1053 + struct xfs_ail *ailp)
1055 - struct xfs_ail *ailp = container_of(to_delayed_work(work),
1056 - struct xfs_ail, xa_work);
1057 xfs_mount_t *mp = ailp->xa_mount;
1058 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
1059 xfs_log_item_t *lip;
1060 @@ -556,20 +548,6 @@ out_done:
1061 /* We're past our target or empty, so idle */
1062 ailp->xa_last_pushed_lsn = 0;
1065 - * We clear the XFS_AIL_PUSHING_BIT first before checking
1066 - * whether the target has changed. If the target has changed,
1067 - * this pushes the requeue race directly onto the result of the
1068 - * atomic test/set bit, so we are guaranteed that either the
1069 - * the pusher that changed the target or ourselves will requeue
1070 - * the work (but not both).
1072 - clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
1074 - if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
1075 - test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
1079 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
1081 @@ -592,9 +570,30 @@ out_done:
1085 - /* There is more to do, requeue us. */
1086 - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
1087 - msecs_to_jiffies(tout));
1095 + struct xfs_ail *ailp = data;
1096 + long tout = 0; /* milliseconds */
1098 + while (!kthread_should_stop()) {
1099 + if (tout && tout <= 20)
1100 + __set_current_state(TASK_KILLABLE);
1102 + __set_current_state(TASK_INTERRUPTIBLE);
1103 + schedule_timeout(tout ?
1104 + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
1108 + tout = xfsaild_push(ailp);
1115 @@ -629,8 +628,9 @@ xfs_ail_push(
1118 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
1119 - if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
1120 - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
1123 + wake_up_process(ailp->xa_task);
1127 @@ -865,9 +865,18 @@ xfs_trans_ail_init(
1128 ailp->xa_mount = mp;
1129 INIT_LIST_HEAD(&ailp->xa_ail);
1130 spin_lock_init(&ailp->xa_lock);
1131 - INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
1133 + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
1134 + ailp->xa_mount->m_fsname);
1135 + if (IS_ERR(ailp->xa_task))
1136 + goto out_free_ailp;
1147 @@ -876,6 +885,6 @@ xfs_trans_ail_destroy(
1149 struct xfs_ail *ailp = mp->m_ail;
1151 - cancel_delayed_work_sync(&ailp->xa_work);
1152 + kthread_stop(ailp->xa_task);
1155 Index: xfs/fs/xfs/xfs_trans_priv.h
1156 ===================================================================
1157 --- xfs.orig/fs/xfs/xfs_trans_priv.h 2011-10-14 14:42:03.000000000 +0200
1158 +++ xfs/fs/xfs/xfs_trans_priv.h 2011-10-14 14:45:38.191895324 +0200
1159 @@ -64,23 +64,17 @@ struct xfs_ail_cursor {
1162 struct xfs_mount *xa_mount;
1163 + struct task_struct *xa_task;
1164 struct list_head xa_ail;
1165 xfs_lsn_t xa_target;
1166 struct xfs_ail_cursor xa_cursors;
1168 - struct delayed_work xa_work;
1169 xfs_lsn_t xa_last_pushed_lsn;
1170 - unsigned long xa_flags;
1173 -#define XFS_AIL_PUSHING_BIT 0
1176 * From xfs_trans_ail.c
1179 -extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
1181 void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
1182 struct xfs_ail_cursor *cur,
1183 struct xfs_log_item **log_items, int nr_items,
1184 Index: xfs/fs/xfs/linux-2.6/xfs_linux.h
1185 ===================================================================
1186 --- xfs.orig/fs/xfs/linux-2.6/xfs_linux.h 2011-10-14 14:41:41.000000000 +0200
1187 +++ xfs/fs/xfs/linux-2.6/xfs_linux.h 2011-10-14 14:45:11.941411722 +0200
1189 #include <linux/ctype.h>
1190 #include <linux/writeback.h>
1191 #include <linux/capability.h>
1192 +#include <linux/kthread.h>
1193 +#include <linux/freezer.h>
1194 #include <linux/list_sort.h>
1196 #include <asm/page.h>
1197 Index: xfs/fs/xfs/linux-2.6/xfs_super.c
1198 ===================================================================
1199 --- xfs.orig/fs/xfs/linux-2.6/xfs_super.c 2011-10-14 14:46:38.497394866 +0200
1200 +++ xfs/fs/xfs/linux-2.6/xfs_super.c 2011-10-14 14:46:49.047894210 +0200
1201 @@ -1660,24 +1660,13 @@ xfs_init_workqueues(void)
1203 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1207 - xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1209 - goto out_destroy_syncd;
1215 - destroy_workqueue(xfs_syncd_wq);
1221 xfs_destroy_workqueues(void)
1223 - destroy_workqueue(xfs_ail_wq);
1224 destroy_workqueue(xfs_syncd_wq);
1228 _______________________________________________
1231 http://oss.sgi.com/mailman/listinfo/xfs
1234 Fixes a possible memory corruption when the link is larger than
1235 MAXPATHLEN and XFS_DEBUG is not enabled. This also remove the
1236 S_ISLNK assert, since the inode mode is checked previously in
1237 xfs_readlink_by_handle() and via VFS.
1239 Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
1241 fs/xfs/xfs_vnodeops.c | 11 ++++++++---
1242 1 files changed, 8 insertions(+), 3 deletions(-)
1244 diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
1245 index 51fc429..c3288be 100644
1246 --- a/fs/xfs/xfs_vnodeops.c
1247 +++ b/fs/xfs/xfs_vnodeops.c
1248 @@ -123,13 +123,18 @@ xfs_readlink(
1250 xfs_ilock(ip, XFS_ILOCK_SHARED);
1252 - ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
1253 - ASSERT(ip->i_d.di_size <= MAXPATHLEN);
1255 pathlen = ip->i_d.di_size;
1259 + if (pathlen > MAXPATHLEN) {
1260 + xfs_alert(mp, "%s: inode (%llu) symlink length (%d) too long",
1261 + __func__, (unsigned long long)ip->i_ino, pathlen);
1263 + return XFS_ERROR(EFSCORRUPTED);
1267 if (ip->i_df.if_flags & XFS_IFINLINE) {
1268 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
1269 link[pathlen] = '\0';
1273 _______________________________________________
1276 http://oss.sgi.com/mailman/listinfo/xfs