kernel-small_fixes.patch

   1 --- linux-2.6.32/drivers/infiniband/Kconfig~    2009-12-05 00:26:03.663774916 +0100
   2 +++ linux-2.6.32/drivers/infiniband/Kconfig     2009-12-05 00:26:05.914179759 +0100
   3 @@ -37,7 +37,6 @@
   4  config INFINIBAND_ADDR_TRANS
   5         bool
   6         depends on INET
   7 -       depends on !(INFINIBAND = y && IPV6 = m)
   8         default y
   9
  10  source "drivers/infiniband/hw/mthca/Kconfig"
  11 --- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100
  12 +++ linux-2.6.33/scripts/mod/modpost.c  2010-03-07 14:26:47.242168558 +0100
  13 @@ -15,7 +15,8 @@
  14  #include <stdio.h>
  15  #include <ctype.h>
  16  #include "modpost.h"
  17 -#include "../../include/generated/autoconf.h"
  18 +// PLD architectures don't use CONFIG_SYMBOL_PREFIX
  19 +//#include "../../include/generated/autoconf.h"
  20  #include "../../include/linux/license.h"
  21
  22  /* Some toolchains use a `_' prefix for all user symbols. */
  23
  24 commit 87b09f1f25cd1e01d7c50bf423c7fe33027d7511
  25 Author: stephen hemminger <shemminger@vyatta.com>
  26 Date:   Fri Feb 12 06:58:00 2010 +0000
  27
  28     sky2: dont enable PME legacy mode
  29
  30     This bit is not changed by vendor driver, and should be left alone.
  31     The documentation implies this a debug bit.
  32       0 = WAKE# only asserted when VMAIN not available
  33       1 = WAKE# is depend on wake events and independent of VMAIN.
  34
  35     Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
  36     Signed-off-by: David S. Miller <davem@davemloft.net>
  37
  38 diff --git b/drivers/net/sky2.c a/drivers/net/sky2.c
  39 index 2494842..edf37aa 100644
  40 --- b/drivers/net/sky2.c
  41 +++ a/drivers/net/sky2.c
  42 @@ -733,6 +733,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
  43         unsigned port = sky2->port;
  44         enum flow_control save_mode;
  45         u16 ctrl;
  46 +       u32 reg1;
  47
  48         /* Bring hardware out of reset */
  49         sky2_write16(hw, B0_CTST, CS_RST_CLR);
  50 @@ -786,6 +787,11 @@ static void sky2_wol_init(struct sky2_port *sky2)
  51         /* Disable PiG firmware */
  52         sky2_write16(hw, B0_CTST, Y2_HW_WOL_OFF);
  53
  54 +       /* Turn on legacy PCI-Express PME mode */
  55 +       reg1 = sky2_pci_read32(hw, PCI_DEV_REG1);
  56 +       reg1 |= PCI_Y2_PME_LEGACY;
  57 +       sky2_pci_write32(hw, PCI_DEV_REG1, reg1);
  58 +
  59         /* block receiver */
  60         sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
  61  }
  62 On Sat, 2 Jul 2011, Andi Kleen wrote:
  63
  64 > > The problem is that blk_peek_request() calls scsi_prep_fn(), which
  65 > > does this:
  66 > >
  67 > >     struct scsi_device *sdev = q->queuedata;
  68 > >     int ret = BLKPREP_KILL;
  69 > >
  70 > >     if (req->cmd_type == REQ_TYPE_BLOCK_PC)
  71 > >             ret = scsi_setup_blk_pc_cmnd(sdev, req);
  72 > >     return scsi_prep_return(q, req, ret);
  73 > >
  74 > > It doesn't check to see if sdev is NULL, nor does
  75 > > scsi_setup_blk_pc_cmnd().  That accounts for this error:
  76 >
  77 > I actually added a NULL check in scsi_setup_blk_pc_cmnd early on,
  78 > but that just caused RCU CPU stalls afterwards and then eventually
  79 > a hung system.
  80
  81 The RCU problem is likely to be a separate issue.  It might even be a
  82 result of the use-after-free problem with the elevator.
  83
  84 At any rate, it's clear that the crash in the refcounting log you
  85 posted occurred because scsi_setup_blk_pc_cmnd() called
  86 scsi_prep_state_check(), which tried to dereference the NULL pointer.
  87
  88 Would you like to try this patch to see if it fixes the problem?  As I
  89 said before, I'm not certain it's the best thing to do, but it worked
  90 on my system.
  91
  92 Alan Stern
  93
  94
  95
  96
  97 Index: usb-3.0/drivers/scsi/scsi_lib.c
  98 ===================================================================
  99 --- usb-3.0.orig/drivers/scsi/scsi_lib.c
 100 +++ usb-3.0/drivers/scsi/scsi_lib.c
 101 @@ -1247,6 +1247,8 @@ int scsi_prep_fn(struct request_queue *q
 102         struct scsi_device *sdev = q->queuedata;
 103         int ret = BLKPREP_KILL;
 104
 105 +       if (!sdev)
 106 +               return ret;
 107         if (req->cmd_type == REQ_TYPE_BLOCK_PC)
 108                 ret = scsi_setup_blk_pc_cmnd(sdev, req);
 109         return scsi_prep_return(q, req, ret);
 110 Index: usb-3.0/drivers/scsi/scsi_sysfs.c
 111 ===================================================================
 112 --- usb-3.0.orig/drivers/scsi/scsi_sysfs.c
 113 +++ usb-3.0/drivers/scsi/scsi_sysfs.c
 114 @@ -322,6 +322,8 @@ static void scsi_device_dev_release_user
 115                 kfree(evt);
 116         }
 117
 118 +       /* Freeing the queue signals to block that we're done */
 119 +       scsi_free_queue(sdev->request_queue);
 120         blk_put_queue(sdev->request_queue);
 121         /* NULL queue means the device can't be used */
 122         sdev->request_queue = NULL;
 123 @@ -936,8 +938,6 @@ void __scsi_remove_device(struct scsi_de
 124         /* cause the request function to reject all I/O requests */
 125         sdev->request_queue->queuedata = NULL;
 126
 127 -       /* Freeing the queue signals to block that we're done */
 128 -       scsi_free_queue(sdev->request_queue);
 129         put_device(dev);
 130  }
 131
 132
 133
 134 --
 135 To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
 136 the body of a message to majordomo@vger.kernel.org
 137 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 138 Please read the FAQ at  http://www.tux.org/lkml/
 139 commit 3326c784c9f492e988617d93f647ae0cfd4c8d09
 140 Author: Jiri Pirko <jpirko@redhat.com>
 141 Date:   Wed Jul 20 04:54:38 2011 +0000
 142
 143     forcedeth: do vlan cleanup
 144
 145     - unify vlan and nonvlan rx path
 146     - kill np->vlangrp and nv_vlan_rx_register
 147     - allow to turn on/off rx vlan accel via ethtool (set_features)
 148
 149     Signed-off-by: Jiri Pirko <jpirko@redhat.com>
 150     Signed-off-by: David S. Miller <davem@davemloft.net>
 151
 152 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
 153 index 537b695..e64cd9c 100644
 154 --- a/drivers/net/forcedeth.c
 155 +++ b/drivers/net/forcedeth.c
 156 @@ -820,9 +820,6 @@ struct fe_priv {
 157         struct nv_skb_map *tx_end_flip;
 158         int tx_stop;
 159
 160 -       /* vlan fields */
 161 -       struct vlan_group *vlangrp;
 162 -
 163         /* msi/msi-x fields */
 164         u32 msi_flags;
 165         struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
 166 @@ -2766,17 +2763,13 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
 167                         skb->protocol = eth_type_trans(skb, dev);
 168                         prefetch(skb->data);
 169
 170 -                       if (likely(!np->vlangrp)) {
 171 -                               napi_gro_receive(&np->napi, skb);
 172 -                       } else {
 173 -                               vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
 174 -                               if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
 175 -                                       vlan_gro_receive(&np->napi, np->vlangrp,
 176 -                                                        vlanflags & NV_RX3_VLAN_TAG_MASK, skb);
 177 -                               } else {
 178 -                                       napi_gro_receive(&np->napi, skb);
 179 -                               }
 180 +                       vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
 181 +                       if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
 182 +                               u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
 183 +
 184 +                               __vlan_hwaccel_put_tag(skb, vid);
 185                         }
 186 +                       napi_gro_receive(&np->napi, skb);
 187
 188                         dev->stats.rx_packets++;
 189                         dev->stats.rx_bytes += len;
 190 @@ -4484,6 +4477,27 @@ static u32 nv_fix_features(struct net_device *dev, u32 features)
 191         return features;
 192  }
 193
 194 +static void nv_vlan_mode(struct net_device *dev, u32 features)
 195 +{
 196 +       struct fe_priv *np = get_nvpriv(dev);
 197 +
 198 +       spin_lock_irq(&np->lock);
 199 +
 200 +       if (features & NETIF_F_HW_VLAN_RX)
 201 +               np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP;
 202 +       else
 203 +               np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
 204 +
 205 +       if (features & NETIF_F_HW_VLAN_TX)
 206 +               np->txrxctl_bits |= NVREG_TXRXCTL_VLANINS;
 207 +       else
 208 +               np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
 209 +
 210 +       writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 211 +
 212 +       spin_unlock_irq(&np->lock);
 213 +}
 214 +
 215  static int nv_set_features(struct net_device *dev, u32 features)
 216  {
 217         struct fe_priv *np = netdev_priv(dev);
 218 @@ -4504,6 +4518,9 @@ static int nv_set_features(struct net_device *dev, u32 features)
 219                 spin_unlock_irq(&np->lock);
 220         }
 221
 222 +       if (changed & (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX))
 223 +               nv_vlan_mode(dev, features);
 224 +
 225         return 0;
 226  }
 227
 228 @@ -4879,29 +4896,6 @@ static const struct ethtool_ops ops = {
 229         .self_test = nv_self_test,
 230  };
 231
 232 -static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
 233 -{
 234 -       struct fe_priv *np = get_nvpriv(dev);
 235 -
 236 -       spin_lock_irq(&np->lock);
 237 -
 238 -       /* save vlan group */
 239 -       np->vlangrp = grp;
 240 -
 241 -       if (grp) {
 242 -               /* enable vlan on MAC */
 243 -               np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS;
 244 -       } else {
 245 -               /* disable vlan on MAC */
 246 -               np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
 247 -               np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
 248 -       }
 249 -
 250 -       writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 251 -
 252 -       spin_unlock_irq(&np->lock);
 253 -}
 254 -
 255  /* The mgmt unit and driver use a semaphore to access the phy during init */
 256  static int nv_mgmt_acquire_sema(struct net_device *dev)
 257  {
 258 @@ -5208,7 +5202,6 @@ static const struct net_device_ops nv_netdev_ops = {
 259         .ndo_validate_addr      = eth_validate_addr,
 260         .ndo_set_mac_address    = nv_set_mac_address,
 261         .ndo_set_multicast_list = nv_set_multicast,
 262 -       .ndo_vlan_rx_register   = nv_vlan_rx_register,
 263  #ifdef CONFIG_NET_POLL_CONTROLLER
 264         .ndo_poll_controller    = nv_poll_controller,
 265  #endif
 266 @@ -5226,7 +5219,6 @@ static const struct net_device_ops nv_netdev_ops_optimized = {
 267         .ndo_validate_addr      = eth_validate_addr,
 268         .ndo_set_mac_address    = nv_set_mac_address,
 269         .ndo_set_multicast_list = nv_set_multicast,
 270 -       .ndo_vlan_rx_register   = nv_vlan_rx_register,
 271  #ifdef CONFIG_NET_POLL_CONTROLLER
 272         .ndo_poll_controller    = nv_poll_controller,
 273  #endif
 274 commit 0891b0e08937aaec2c4734acb94c5ff8042313bb
 275 Author: Jiri Pirko <jpirko@redhat.com>
 276 Date:   Tue Jul 26 10:19:28 2011 +0000
 277
 278     forcedeth: fix vlans
 279
 280     For some reason, when rxaccel is disabled, NV_RX3_VLAN_TAG_PRESENT is
 281     still set and some pseudorandom vids appear. So check for
 282     NETIF_F_HW_VLAN_RX as well. Also set correctly hw_features and set vlan
 283     mode on probe.
 284
 285     Signed-off-by: Jiri Pirko <jpirko@redhat.com>
 286     Signed-off-by: David S. Miller <davem@davemloft.net>
 287
 288 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
 289 index e64cd9c..e55df30 100644
 290 --- a/drivers/net/forcedeth.c
 291 +++ b/drivers/net/forcedeth.c
 292 @@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
 293                         prefetch(skb->data);
 294
 295                         vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
 296 -                       if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
 297 +
 298 +                       /*
 299 +                        * There's need to check for NETIF_F_HW_VLAN_RX here.
 300 +                        * Even if vlan rx accel is disabled,
 301 +                        * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set.
 302 +                        */
 303 +                       if (dev->features & NETIF_F_HW_VLAN_RX &&
 304 +                           vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
 305                                 u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
 306
 307                                 __vlan_hwaccel_put_tag(skb, vid);
 308 @@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 309                 np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
 310                 dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG |
 311                         NETIF_F_TSO | NETIF_F_RXCSUM;
 312 -               dev->features |= dev->hw_features;
 313         }
 314
 315         np->vlanctl_bits = 0;
 316         if (id->driver_data & DEV_HAS_VLAN) {
 317                 np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
 318 -               dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
 319 +               dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
 320         }
 321
 322 +       dev->features |= dev->hw_features;
 323 +
 324         np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
 325         if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
 326             (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
 327 @@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 328                 goto out_error;
 329         }
 330
 331 +       nv_vlan_mode(dev, dev->features);
 332 +
 333         netif_carrier_off(dev);
 334
 335         dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
 336 --- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~       2011-07-22 04:17:23.000000000 +0200
 337 +++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh        2011-08-25 21:26:04.799150642 +0200
 338 @@ -9,6 +9,12 @@
 339                         $cc -print-file-name=lib${lib}.${ext} | grep -q /
 340                         if [ $? -eq 0 ]; then
 341                                 echo "-l${lib}"
 342 +                               for libt in tinfow tinfo ; do
 343 +                                       $cc -print-file-name=lib${libt}.${ext} | grep -q /
 344 +                                       if [ $? -eq 0 ]; then
 345 +                                               echo "-l${libt}"
 346 +                                       fi
 347 +                               done
 348                                 exit
 349                         fi
 350                 done
 351 commit 37b652ec6445be99d0193047d1eda129a1a315d3
 352 Author: Dave Chinner <dchinner@redhat.com>
 353 Date:   Thu Aug 25 07:17:01 2011 +0000
 354
 355     xfs: don't serialise direct IO reads on page cache checks
 356
 357     There is no need to grab the i_mutex of the IO lock in exclusive
 358     mode if we don't need to invalidate the page cache. Taking these
 359     locks on every direct IO effective serialises them as taking the IO
 360     lock in exclusive mode has to wait for all shared holders to drop
 361     the lock. That only happens when IO is complete, so effective it
 362     prevents dispatch of concurrent direct IO reads to the same inode.
 363
 364     Fix this by taking the IO lock shared to check the page cache state,
 365     and only then drop it and take the IO lock exclusively if there is
 366     work to be done. Hence for the normal direct IO case, no exclusive
 367     locking will occur.
 368
 369     Signed-off-by: Dave Chinner <dchinner@redhat.com>
 370     Tested-by: Joern Engel <joern@logfs.org>
 371     Reviewed-by: Christoph Hellwig <hch@lst.de>
 372     Signed-off-by: Alex Elder <aelder@sgi.com>
 373
 374 diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
 375 index 7f7b424..8fd4a07 100644
 376 --- a/fs/xfs/linux-2.6/xfs_file.c
 377 +++ b/fs/xfs/linux-2.6/xfs_file.c
 378 @@ -317,7 +317,19 @@ xfs_file_aio_read(
 379         if (XFS_FORCED_SHUTDOWN(mp))
 380                 return -EIO;
 381
 382 -       if (unlikely(ioflags & IO_ISDIRECT)) {
 383 +       /*
 384 +        * Locking is a bit tricky here. If we take an exclusive lock
 385 +        * for direct IO, we effectively serialise all new concurrent
 386 +        * read IO to this file and block it behind IO that is currently in
 387 +        * progress because IO in progress holds the IO lock shared. We only
 388 +        * need to hold the lock exclusive to blow away the page cache, so
 389 +        * only take lock exclusively if the page cache needs invalidation.
 390 +        * This allows the normal direct IO case of no page cache pages to
 391 +        * proceeed concurrently without serialisation.
 392 +        */
 393 +       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 394 +       if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
 395 +               xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 396                 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 397
 398                 if (inode->i_mapping->nrpages) {
 399 @@ -330,8 +342,7 @@ xfs_file_aio_read(
 400                         }
 401                 }
 402                 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 403 -       } else
 404 -               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 405 +       }
 406
 407         trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
 408
 409
 410
 411
 412 Start the periodic sync workers only after we have finished xfs_mountfs
 413 and thus fully set up the filesystem structures.  Without this we can
 414 call into xfs_qm_sync before the quotainfo strucute is set up if the
 415 mount takes unusually long, and probably hit other incomplete states
 416 as well.
 417
 418 Also clean up the xfs_fs_fill_super error path by using consistent
 419 label names, and removing an impossible to reach case.
 420
 421 Signed-off-by: Christoph Hellwig <hch@lst.de>
 422 Reported-by: Arkadiusz Miskiewicz <arekm@maven.pl>
 423 Reviewed-by: Alex Elder <aelder@sgi.com>
 424
 425 diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
 426 index a1a881e..3ebb458 100644
 427 --- a/fs/xfs/linux-2.6/xfs_super.c
 428 +++ b/fs/xfs/linux-2.6/xfs_super.c
 429 @@ -1412,37 +1412,35 @@ xfs_fs_fill_super(
 430         sb->s_time_gran = 1;
 431         set_posix_acl_flag(sb);
 432
 433 -       error = xfs_syncd_init(mp);
 434 -       if (error)
 435 -               goto out_filestream_unmount;
 436 -
 437         xfs_inode_shrinker_register(mp);
 438
 439         error = xfs_mountfs(mp);
 440         if (error)
 441 -               goto out_syncd_stop;
 442 +               goto out_filestream_unmount;
 443 +
 444 +       error = xfs_syncd_init(mp);
 445 +       if (error)
 446 +               goto out_unmount;
 447
 448         root = igrab(VFS_I(mp->m_rootip));
 449         if (!root) {
 450                 error = ENOENT;
 451 -               goto fail_unmount;
 452 +               goto out_syncd_stop;
 453         }
 454         if (is_bad_inode(root)) {
 455                 error = EINVAL;
 456 -               goto fail_vnrele;
 457 +               goto out_syncd_stop;
 458         }
 459         sb->s_root = d_alloc_root(root);
 460         if (!sb->s_root) {
 461                 error = ENOMEM;
 462 -               goto fail_vnrele;
 463 +               goto out_iput;
 464         }
 465
 466         return 0;
 467
 468 - out_syncd_stop:
 469 -       xfs_inode_shrinker_unregister(mp);
 470 -       xfs_syncd_stop(mp);
 471   out_filestream_unmount:
 472 +       xfs_inode_shrinker_unregister(mp);
 473         xfs_filestream_unmount(mp);
 474   out_free_sb:
 475         xfs_freesb(mp);
 476 @@ -1456,17 +1454,12 @@ xfs_fs_fill_super(
 477   out:
 478         return -error;
 479
 480 - fail_vnrele:
 481 -       if (sb->s_root) {
 482 -               dput(sb->s_root);
 483 -               sb->s_root = NULL;
 484 -       } else {
 485 -               iput(root);
 486 -       }
 487 -
 488 - fail_unmount:
 489 -       xfs_inode_shrinker_unregister(mp);
 490 + out_iput:
 491 +       iput(root);
 492 + out_syncd_stop:
 493         xfs_syncd_stop(mp);
 494 + out_unmount:
 495 +       xfs_inode_shrinker_unregister(mp);
 496
 497         /*
 498          * Blow away any referenced inode in the filestreams cache.
 499
 500 _______________________________________________
 501 xfs mailing list
 502 xfs@oss.sgi.com
 503 http://oss.sgi.com/mailman/listinfo/xfs
 504
 505
 506 From: Dave Chinner <dchinner@redhat.com>
 507
 508 commit 1d8c95a363bf8cd4d4182dd19c01693b635311c2 upstream
 509
 510
 511 xfs: use a cursor for bulk AIL insertion
 512
 513 Delayed logging can insert tens of thousands of log items into the
 514 AIL at the same LSN. When the committing of log commit records
 515 occur, we can get insertions occurring at an LSN that is not at the
 516 end of the AIL. If there are thousands of items in the AIL on the
 517 tail LSN, each insertion has to walk the AIL to find the correct
 518 place to insert the new item into the AIL. This can consume large
 519 amounts of CPU time and block other operations from occurring while
 520 the traversals are in progress.
 521
 522 To avoid this repeated walk, use a AIL cursor to record
 523 where we should be inserting the new items into the AIL without
 524 having to repeat the walk. The cursor infrastructure already
 525 provides this functionality for push walks, so is a simple extension
 526 of existing code. While this will not avoid the initial walk, it
 527 will avoid repeating it tens of thousands of times during a single
 528 checkpoint commit.
 529
 530 This version includes logic improvements from Christoph Hellwig.
 531
 532 Signed-off-by: Dave Chinner <dchinner@redhat.com>
 533 Reviewed-by: Christoph Hellwig <hch@lst.de>
 534 Signed-off-by: Alex Elder <aelder@sgi.com>
 535
 536 diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
 537 index c83f63b..efc147f 100644
 538 --- a/fs/xfs/xfs_trans.c
 539 +++ b/fs/xfs/xfs_trans.c
 540 @@ -1426,6 +1426,7 @@ xfs_trans_committed(
 541  static inline void
 542  xfs_log_item_batch_insert(
 543         struct xfs_ail          *ailp,
 544 +       struct xfs_ail_cursor   *cur,
 545         struct xfs_log_item     **log_items,
 546         int                     nr_items,
 547         xfs_lsn_t               commit_lsn)
 548 @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
 549
 550         spin_lock(&ailp->xa_lock);
 551         /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
 552 -       xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
 553 +       xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
 554
 555         for (i = 0; i < nr_items; i++)
 556                 IOP_UNPIN(log_items[i], 0);
 557 @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
 558   * as an iclog write error even though we haven't started any IO yet. Hence in
 559   * this case all we need to do is IOP_COMMITTED processing, followed by an
 560   * IOP_UNPIN(aborted) call.
 561 + *
 562 + * The AIL cursor is used to optimise the insert process. If commit_lsn is not
 563 + * at the end of the AIL, the insert cursor avoids the need to walk
 564 + * the AIL to find the insertion point on every xfs_log_item_batch_insert()
 565 + * call. This saves a lot of needless list walking and is a net win, even
 566 + * though it slightly increases that amount of AIL lock traffic to set it up
 567 + * and tear it down.
 568   */
 569  void
 570  xfs_trans_committed_bulk(
 571 @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
 572  #define LOG_ITEM_BATCH_SIZE    32
 573         struct xfs_log_item     *log_items[LOG_ITEM_BATCH_SIZE];
 574         struct xfs_log_vec      *lv;
 575 +       struct xfs_ail_cursor   cur;
 576         int                     i = 0;
 577
 578 +       spin_lock(&ailp->xa_lock);
 579 +       xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
 580 +       spin_unlock(&ailp->xa_lock);
 581 +
 582         /* unpin all the log items */
 583         for (lv = log_vector; lv; lv = lv->lv_next ) {
 584                 struct xfs_log_item     *lip = lv->lv_item;
 585 @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
 586                         /*
 587                          * Not a bulk update option due to unusual item_lsn.
 588                          * Push into AIL immediately, rechecking the lsn once
 589 -                        * we have the ail lock. Then unpin the item.
 590 +                        * we have the ail lock. Then unpin the item. This does
 591 +                        * not affect the AIL cursor the bulk insert path is
 592 +                        * using.
 593                          */
 594                         spin_lock(&ailp->xa_lock);
 595                         if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
 596 @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
 597                 /* Item is a candidate for bulk AIL insert.  */
 598                 log_items[i++] = lv->lv_item;
 599                 if (i >= LOG_ITEM_BATCH_SIZE) {
 600 -                       xfs_log_item_batch_insert(ailp, log_items,
 601 +                       xfs_log_item_batch_insert(ailp, &cur, log_items,
 602                                         LOG_ITEM_BATCH_SIZE, commit_lsn);
 603                         i = 0;
 604                 }
 605 @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
 606
 607         /* make sure we insert the remainder! */
 608         if (i)
 609 -               xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
 610 +               xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
 611 +
 612 +       spin_lock(&ailp->xa_lock);
 613 +       xfs_trans_ail_cursor_done(ailp, &cur);
 614 +       spin_unlock(&ailp->xa_lock);
 615  }
 616
 617  /*
 618 diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
 619 index 5fc2380..9a69dc0 100644
 620 --- a/fs/xfs/xfs_trans_ail.c
 621 +++ b/fs/xfs/xfs_trans_ail.c
 622 @@ -272,9 +272,9 @@ xfs_trans_ail_cursor_clear(
 623  }
 624
 625  /*
 626 - * Return the item in the AIL with the current lsn.
 627 - * Return the current tree generation number for use
 628 - * in calls to xfs_trans_next_ail().
 629 + * Initialise the cursor to the first item in the AIL with the given @lsn.
 630 + * This searches the list from lowest LSN to highest. Pass a @lsn of zero
 631 + * to initialise the cursor to the first item in the AIL.
 632   */
 633  xfs_log_item_t *
 634  xfs_trans_ail_cursor_first(
 635 @@ -300,31 +300,97 @@ out:
 636  }
 637
 638  /*
 639 - * splice the log item list into the AIL at the given LSN.
 640 + * Initialise the cursor to the last item in the AIL with the given @lsn.
 641 + * This searches the list from highest LSN to lowest. If there is no item with
 642 + * the value of @lsn, then it sets the cursor to the last item with an LSN lower
 643 + * than @lsn.
 644 + */
 645 +static struct xfs_log_item *
 646 +__xfs_trans_ail_cursor_last(
 647 +       struct xfs_ail          *ailp,
 648 +       xfs_lsn_t               lsn)
 649 +{
 650 +       xfs_log_item_t          *lip;
 651 +
 652 +       list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
 653 +               if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
 654 +                       return lip;
 655 +       }
 656 +       return NULL;
 657 +}
 658 +
 659 +/*
 660 + * Initialise the cursor to the last item in the AIL with the given @lsn.
 661 + * This searches the list from highest LSN to lowest.
 662 + */
 663 +struct xfs_log_item *
 664 +xfs_trans_ail_cursor_last(
 665 +       struct xfs_ail          *ailp,
 666 +       struct xfs_ail_cursor   *cur,
 667 +       xfs_lsn_t               lsn)
 668 +{
 669 +       xfs_trans_ail_cursor_init(ailp, cur);
 670 +       cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
 671 +       return cur->item;
 672 +}
 673 +
 674 +/*
 675 + * splice the log item list into the AIL at the given LSN. We splice to the
 676 + * tail of the given LSN to maintain insert order for push traversals. The
 677 + * cursor is optional, allowing repeated updates to the same LSN to avoid
 678 + * repeated traversals.
 679   */
 680  static void
 681  xfs_ail_splice(
 682 -       struct xfs_ail  *ailp,
 683 -       struct list_head *list,
 684 -       xfs_lsn_t       lsn)
 685 +       struct xfs_ail          *ailp,
 686 +       struct xfs_ail_cursor   *cur,
 687 +       struct list_head        *list,
 688 +       xfs_lsn_t               lsn)
 689  {
 690 -       xfs_log_item_t  *next_lip;
 691 +       struct xfs_log_item     *lip = cur ? cur->item : NULL;
 692 +       struct xfs_log_item     *next_lip;
 693
 694 -       /* If the list is empty, just insert the item.  */
 695 -       if (list_empty(&ailp->xa_ail)) {
 696 -               list_splice(list, &ailp->xa_ail);
 697 -               return;
 698 +       /*
 699 +        * Get a new cursor if we don't have a placeholder or the existing one
 700 +        * has been invalidated.
 701 +        */
 702 +       if (!lip || (__psint_t)lip & 1) {
 703 +               lip = __xfs_trans_ail_cursor_last(ailp, lsn);
 704 +
 705 +               if (!lip) {
 706 +                       /* The list is empty, so just splice and return.  */
 707 +                       if (cur)
 708 +                               cur->item = NULL;
 709 +                       list_splice(list, &ailp->xa_ail);
 710 +                       return;
 711 +               }
 712         }
 713
 714 -       list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
 715 -               if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
 716 -                       break;
 717 +       /*
 718 +        * Our cursor points to the item we want to insert _after_, so we have
 719 +        * to update the cursor to point to the end of the list we are splicing
 720 +        * in so that it points to the correct location for the next splice.
 721 +        * i.e. before the splice
 722 +        *
 723 +        *  lsn -> lsn -> lsn + x -> lsn + x ...
 724 +        *          ^
 725 +        *          | cursor points here
 726 +        *
 727 +        * After the splice we have:
 728 +        *
 729 +        *  lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
 730 +        *          ^                            ^
 731 +        *          | cursor points here         | needs to move here
 732 +        *
 733 +        * So we set the cursor to the last item in the list to be spliced
 734 +        * before we execute the splice, resulting in the cursor pointing to
 735 +        * the correct item after the splice occurs.
 736 +        */
 737 +       if (cur) {
 738 +               next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
 739 +               cur->item = next_lip;
 740         }
 741 -
 742 -       ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
 743 -              XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
 744 -
 745 -       list_splice_init(list, &next_lip->li_ail);
 746 +       list_splice(list, &lip->li_ail);
 747  }
 748
 749  /*
 750 @@ -645,6 +711,7 @@ xfs_trans_unlocked_item(
 751  void
 752  xfs_trans_ail_update_bulk(
 753         struct xfs_ail          *ailp,
 754 +       struct xfs_ail_cursor   *cur,
 755         struct xfs_log_item     **log_items,
 756         int                     nr_items,
 757         xfs_lsn_t               lsn) __releases(ailp->xa_lock)
 758 @@ -674,7 +741,7 @@ xfs_trans_ail_update_bulk(
 759                 list_add(&lip->li_ail, &tmp);
 760         }
 761
 762 -       xfs_ail_splice(ailp, &tmp, lsn);
 763 +       xfs_ail_splice(ailp, cur, &tmp, lsn);
 764
 765         if (!mlip_changed) {
 766                 spin_unlock(&ailp->xa_lock);
 767 diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
 768 index 6b164e9..c0cb408 100644
 769 --- a/fs/xfs/xfs_trans_priv.h
 770 +++ b/fs/xfs/xfs_trans_priv.h
 771 @@ -82,6 +82,7 @@ struct xfs_ail {
 772  extern struct workqueue_struct *xfs_ail_wq;    /* AIL workqueue */
 773
 774  void   xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
 775 +                               struct xfs_ail_cursor *cur,
 776                                 struct xfs_log_item **log_items, int nr_items,
 777                                 xfs_lsn_t lsn) __releases(ailp->xa_lock);
 778  static inline void
 779 @@ -90,7 +91,7 @@ xfs_trans_ail_update(
 780         struct xfs_log_item     *lip,
 781         xfs_lsn_t               lsn) __releases(ailp->xa_lock)
 782  {
 783 -       xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
 784 +       xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
 785  }
 786
 787  void   xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
 788 @@ -111,10 +112,13 @@ xfs_lsn_t         xfs_ail_min_lsn(struct xfs_ail *ailp);
 789  void                   xfs_trans_unlocked_item(struct xfs_ail *,
 790                                         xfs_log_item_t *);
 791
 792 -struct xfs_log_item    *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
 793 +struct xfs_log_item *  xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
 794                                         struct xfs_ail_cursor *cur,
 795                                         xfs_lsn_t lsn);
 796 -struct xfs_log_item    *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
 797 +struct xfs_log_item *  xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
 798 +                                       struct xfs_ail_cursor *cur,
 799 +                                       xfs_lsn_t lsn);
 800 +struct xfs_log_item *  xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
 801                                         struct xfs_ail_cursor *cur);
 802  void                   xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
 803                                         struct xfs_ail_cursor *cur);
 804
 805 _______________________________________________
 806 xfs mailing list
 807 xfs@oss.sgi.com
 808 http://oss.sgi.com/mailman/listinfo/xfs
 809
 810
 811 commit bc6e588a8971aa74c02e42db4d6e0248679f3738 upstream
 812
 813 If an item was locked we should not update xa_last_pushed_lsn and thus skip
 814 it when restarting the AIL scan as we need to be able to lock and write it
 815 out as soon as possible.  Otherwise heavy lock contention might starve AIL
 816 pushing too easily, especially given the larger backoff once we moved
 817 xa_last_pushed_lsn all the way to the target lsn.
 818
 819 Signed-off-by: Christoph Hellwig <hch@lst.de>
 820 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
 821 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
 822
 823 Index: xfs/fs/xfs/xfs_trans_ail.c
 824 ===================================================================
 825 --- xfs.orig/fs/xfs/xfs_trans_ail.c     2011-10-14 14:42:03.004395373 +0200
 826 +++ xfs/fs/xfs/xfs_trans_ail.c  2011-10-14 14:42:22.687898198 +0200
 827 @@ -491,7 +491,6 @@ xfs_ail_worker(
 828
 829                 case XFS_ITEM_LOCKED:
 830                         XFS_STATS_INC(xs_push_ail_locked);
 831 -                       ailp->xa_last_pushed_lsn = lsn;
 832                         stuck++;
 833                         break;
 834
 835
 836 _______________________________________________
 837 xfs mailing list
 838 xfs@oss.sgi.com
 839 http://oss.sgi.com/mailman/listinfo/xfs
 840
 841
 842 commit 17b38471c3c07a49f0bbc2ecc2e92050c164e226 upstream
 843
 844 We need to check for pinned buffers even in .iop_pushbuf given that inode
 845 items flush into the same buffers that may be pinned directly due operations
 846 on the unlinked inode list operating directly on buffers.  To do this add a
 847 return value to .iop_pushbuf that tells the AIL push about this and use
 848 the existing log force mechanisms to unpin it.
 849
 850 Signed-off-by: Christoph Hellwig <hch@lst.de>
 851 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
 852 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
 853
 854 Index: xfs/fs/xfs/quota/xfs_dquot_item.c
 855 ===================================================================
 856 --- xfs.orig/fs/xfs/quota/xfs_dquot_item.c      2011-10-14 14:41:41.036231498 +0200
 857 +++ xfs/fs/xfs/quota/xfs_dquot_item.c   2011-10-14 14:44:09.276394842 +0200
 858 @@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
 859   * search the buffer cache can be a time consuming thing, and AIL lock is a
 860   * spinlock.
 861   */
 862 -STATIC void
 863 +STATIC bool
 864  xfs_qm_dquot_logitem_pushbuf(
 865         struct xfs_log_item     *lip)
 866  {
 867         struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
 868         struct xfs_dquot        *dqp = qlip->qli_dquot;
 869         struct xfs_buf          *bp;
 870 +       bool                    ret = true;
 871
 872         ASSERT(XFS_DQ_IS_LOCKED(dqp));
 873
 874 @@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
 875         if (completion_done(&dqp->q_flush) ||
 876             !(lip->li_flags & XFS_LI_IN_AIL)) {
 877                 xfs_dqunlock(dqp);
 878 -               return;
 879 +               return true;
 880         }
 881
 882         bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
 883                         dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
 884         xfs_dqunlock(dqp);
 885         if (!bp)
 886 -               return;
 887 +               return true;
 888         if (XFS_BUF_ISDELAYWRITE(bp))
 889                 xfs_buf_delwri_promote(bp);
 890 +       if (XFS_BUF_ISPINNED(bp))
 891 +               ret = false;
 892         xfs_buf_relse(bp);
 893 +       return ret;
 894  }
 895
 896  /*
 897 Index: xfs/fs/xfs/xfs_buf_item.c
 898 ===================================================================
 899 --- xfs.orig/fs/xfs/xfs_buf_item.c      2011-10-14 14:41:41.000000000 +0200
 900 +++ xfs/fs/xfs/xfs_buf_item.c   2011-10-14 14:44:24.367895813 +0200
 901 @@ -632,7 +632,7 @@ xfs_buf_item_push(
 902   * the xfsbufd to get this buffer written. We have to unlock the buffer
 903   * to allow the xfsbufd to write it, too.
 904   */
 905 -STATIC void
 906 +STATIC bool
 907  xfs_buf_item_pushbuf(
 908         struct xfs_log_item     *lip)
 909  {
 910 @@ -646,6 +646,7 @@ xfs_buf_item_pushbuf(
 911
 912         xfs_buf_delwri_promote(bp);
 913         xfs_buf_relse(bp);
 914 +       return true;
 915  }
 916
 917  STATIC void
 918 Index: xfs/fs/xfs/xfs_inode_item.c
 919 ===================================================================
 920 --- xfs.orig/fs/xfs/xfs_inode_item.c    2011-10-14 14:41:41.000000000 +0200
 921 +++ xfs/fs/xfs/xfs_inode_item.c 2011-10-14 14:44:19.323950541 +0200
 922 @@ -713,13 +713,14 @@ xfs_inode_item_committed(
 923   * marked delayed write. If that's the case, we'll promote it and that will
 924   * allow the caller to write the buffer by triggering the xfsbufd to run.
 925   */
 926 -STATIC void
 927 +STATIC bool
 928  xfs_inode_item_pushbuf(
 929         struct xfs_log_item     *lip)
 930  {
 931         struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 932         struct xfs_inode        *ip = iip->ili_inode;
 933         struct xfs_buf          *bp;
 934 +       bool                    ret = true;
 935
 936         ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
 937
 938 @@ -730,7 +731,7 @@ xfs_inode_item_pushbuf(
 939         if (completion_done(&ip->i_flush) ||
 940             !(lip->li_flags & XFS_LI_IN_AIL)) {
 941                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
 942 -               return;
 943 +               return true;
 944         }
 945
 946         bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
 947 @@ -738,10 +739,13 @@ xfs_inode_item_pushbuf(
 948
 949         xfs_iunlock(ip, XFS_ILOCK_SHARED);
 950         if (!bp)
 951 -               return;
 952 +               return true;
 953         if (XFS_BUF_ISDELAYWRITE(bp))
 954                 xfs_buf_delwri_promote(bp);
 955 +       if (XFS_BUF_ISPINNED(bp))
 956 +               ret = false;
 957         xfs_buf_relse(bp);
 958 +       return ret;
 959  }
 960
 961  /*
 962 Index: xfs/fs/xfs/xfs_trans.h
 963 ===================================================================
 964 --- xfs.orig/fs/xfs/xfs_trans.h 2011-10-14 14:41:41.000000000 +0200
 965 +++ xfs/fs/xfs/xfs_trans.h      2011-10-14 14:43:45.308394072 +0200
 966 @@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
 967         void (*iop_unlock)(xfs_log_item_t *);
 968         xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
 969         void (*iop_push)(xfs_log_item_t *);
 970 -       void (*iop_pushbuf)(xfs_log_item_t *);
 971 +       bool (*iop_pushbuf)(xfs_log_item_t *);
 972         void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
 973  } xfs_item_ops_t;
 974
 975 Index: xfs/fs/xfs/xfs_trans_ail.c
 976 ===================================================================
 977 --- xfs.orig/fs/xfs/xfs_trans_ail.c     2011-10-14 14:42:22.000000000 +0200
 978 +++ xfs/fs/xfs/xfs_trans_ail.c  2011-10-14 14:43:45.316393949 +0200
 979 @@ -478,8 +478,13 @@ xfs_ail_worker(
 980
 981                 case XFS_ITEM_PUSHBUF:
 982                         XFS_STATS_INC(xs_push_ail_pushbuf);
 983 -                       IOP_PUSHBUF(lip);
 984 -                       ailp->xa_last_pushed_lsn = lsn;
 985 +
 986 +                       if (!IOP_PUSHBUF(lip)) {
 987 +                               stuck++;
 988 +                               flush_log = 1;
 989 +                       } else {
 990 +                               ailp->xa_last_pushed_lsn = lsn;
 991 +                       }
 992                         push_xfsbufd = 1;
 993                         break;
 994
 995
 996 _______________________________________________
 997 xfs mailing list
 998 xfs@oss.sgi.com
 999 http://oss.sgi.com/mailman/listinfo/xfs
1000
1001
1002 commit 0030807c66f058230bcb20d2573bcaf28852e804 upstream
1003
1004 Currently we have a few issues with the way the workqueue code is used to
1005 implement AIL pushing:
1006
1007  - it accidentally uses the same workqueue as the syncer action, and thus
1008    can be prevented from running if there are enough sync actions active
1009    in the system.
1010  - it doesn't use the HIGHPRI flag to queue at the head of the queue of
1011    work items
1012
1013 At this point I'm not confident enough in getting all the workqueue flags and
1014 tweaks right to provide a perfectly reliable execution context for AIL
1015 pushing, which is the most important piece in XFS to make forward progress
1016 when the log fills.
1017
1018 Revert back to use a kthread per filesystem which fixes all the above issues
1019 at the cost of having a task struct and stack around for each mounted
1020 filesystem.  In addition this also gives us much better ways to diagnose
1021 any issues involving hung AIL pushing and removes a small amount of code.
1022
1023 Signed-off-by: Christoph Hellwig <hch@lst.de>
1024 Reported-by: Stefan Priebe <s.priebe@profihost.ag>
1025 Tested-by: Stefan Priebe <s.priebe@profihost.ag>
1026
1027 Index: xfs/fs/xfs/xfs_trans_ail.c
1028 ===================================================================
1029 --- xfs.orig/fs/xfs/xfs_trans_ail.c     2011-10-14 14:43:45.316393949 +0200
1030 +++ xfs/fs/xfs/xfs_trans_ail.c  2011-10-14 14:45:11.937395278 +0200
1031 @@ -28,8 +28,6 @@
1032  #include "xfs_trans_priv.h"
1033  #include "xfs_error.h"
1034
1035 -struct workqueue_struct        *xfs_ail_wq;    /* AIL workqueue */
1036 -
1037  #ifdef DEBUG
1038  /*
1039   * Check that the list is sorted as it should be.
1040 @@ -406,16 +404,10 @@ xfs_ail_delete(
1041         xfs_trans_ail_cursor_clear(ailp, lip);
1042  }
1043
1044 -/*
1045 - * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
1046 - * to run at a later time if there is more work to do to complete the push.
1047 - */
1048 -STATIC void
1049 -xfs_ail_worker(
1050 -       struct work_struct      *work)
1051 +static long
1052 +xfsaild_push(
1053 +       struct xfs_ail          *ailp)
1054  {
1055 -       struct xfs_ail          *ailp = container_of(to_delayed_work(work),
1056 -                                       struct xfs_ail, xa_work);
1057         xfs_mount_t             *mp = ailp->xa_mount;
1058         struct xfs_ail_cursor   *cur = &ailp->xa_cursors;
1059         xfs_log_item_t          *lip;
1060 @@ -556,20 +548,6 @@ out_done:
1061                 /* We're past our target or empty, so idle */
1062                 ailp->xa_last_pushed_lsn = 0;
1063
1064 -               /*
1065 -                * We clear the XFS_AIL_PUSHING_BIT first before checking
1066 -                * whether the target has changed. If the target has changed,
1067 -                * this pushes the requeue race directly onto the result of the
1068 -                * atomic test/set bit, so we are guaranteed that either the
1069 -                * the pusher that changed the target or ourselves will requeue
1070 -                * the work (but not both).
1071 -                */
1072 -               clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
1073 -               smp_rmb();
1074 -               if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
1075 -                   test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
1076 -                       return;
1077 -
1078                 tout = 50;
1079         } else if (XFS_LSN_CMP(lsn, target) >= 0) {
1080                 /*
1081 @@ -592,9 +570,30 @@ out_done:
1082                 tout = 20;
1083         }
1084
1085 -       /* There is more to do, requeue us.  */
1086 -       queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
1087 -                                       msecs_to_jiffies(tout));
1088 +       return tout;
1089 +}
1090 +
1091 +static int
1092 +xfsaild(
1093 +       void            *data)
1094 +{
1095 +       struct xfs_ail  *ailp = data;
1096 +       long            tout = 0;       /* milliseconds */
1097 +
1098 +       while (!kthread_should_stop()) {
1099 +               if (tout && tout <= 20)
1100 +                       __set_current_state(TASK_KILLABLE);
1101 +               else
1102 +                       __set_current_state(TASK_INTERRUPTIBLE);
1103 +               schedule_timeout(tout ?
1104 +                                msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
1105 +
1106 +               try_to_freeze();
1107 +
1108 +               tout = xfsaild_push(ailp);
1109 +       }
1110 +
1111 +       return 0;
1112  }
1113
1114  /*
1115 @@ -629,8 +628,9 @@ xfs_ail_push(
1116          */
1117         smp_wmb();
1118         xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
1119 -       if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
1120 -               queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
1121 +       smp_wmb();
1122 +
1123 +       wake_up_process(ailp->xa_task);
1124  }
1125
1126  /*
1127 @@ -865,9 +865,18 @@ xfs_trans_ail_init(
1128         ailp->xa_mount = mp;
1129         INIT_LIST_HEAD(&ailp->xa_ail);
1130         spin_lock_init(&ailp->xa_lock);
1131 -       INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
1132 +
1133 +       ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
1134 +                       ailp->xa_mount->m_fsname);
1135 +       if (IS_ERR(ailp->xa_task))
1136 +               goto out_free_ailp;
1137 +
1138         mp->m_ail = ailp;
1139         return 0;
1140 +
1141 +out_free_ailp:
1142 +       kmem_free(ailp);
1143 +       return ENOMEM;
1144  }
1145
1146  void
1147 @@ -876,6 +885,6 @@ xfs_trans_ail_destroy(
1148  {
1149         struct xfs_ail  *ailp = mp->m_ail;
1150
1151 -       cancel_delayed_work_sync(&ailp->xa_work);
1152 +       kthread_stop(ailp->xa_task);
1153         kmem_free(ailp);
1154  }
1155 Index: xfs/fs/xfs/xfs_trans_priv.h
1156 ===================================================================
1157 --- xfs.orig/fs/xfs/xfs_trans_priv.h    2011-10-14 14:42:03.000000000 +0200
1158 +++ xfs/fs/xfs/xfs_trans_priv.h 2011-10-14 14:45:38.191895324 +0200
1159 @@ -64,23 +64,17 @@ struct xfs_ail_cursor {
1160   */
1161  struct xfs_ail {
1162         struct xfs_mount        *xa_mount;
1163 +       struct task_struct      *xa_task;
1164         struct list_head        xa_ail;
1165         xfs_lsn_t               xa_target;
1166         struct xfs_ail_cursor   xa_cursors;
1167         spinlock_t              xa_lock;
1168 -       struct delayed_work     xa_work;
1169         xfs_lsn_t               xa_last_pushed_lsn;
1170 -       unsigned long           xa_flags;
1171  };
1172
1173 -#define XFS_AIL_PUSHING_BIT    0
1174 -
1175  /*
1176   * From xfs_trans_ail.c
1177   */
1178 -
1179 -extern struct workqueue_struct *xfs_ail_wq;    /* AIL workqueue */
1180 -
1181  void   xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
1182                                 struct xfs_ail_cursor *cur,
1183                                 struct xfs_log_item **log_items, int nr_items,
1184 Index: xfs/fs/xfs/linux-2.6/xfs_linux.h
1185 ===================================================================
1186 --- xfs.orig/fs/xfs/linux-2.6/xfs_linux.h       2011-10-14 14:41:41.000000000 +0200
1187 +++ xfs/fs/xfs/linux-2.6/xfs_linux.h    2011-10-14 14:45:11.941411722 +0200
1188 @@ -70,6 +70,8 @@
1189  #include <linux/ctype.h>
1190  #include <linux/writeback.h>
1191  #include <linux/capability.h>
1192 +#include <linux/kthread.h>
1193 +#include <linux/freezer.h>
1194  #include <linux/list_sort.h>
1195
1196  #include <asm/page.h>
1197 Index: xfs/fs/xfs/linux-2.6/xfs_super.c
1198 ===================================================================
1199 --- xfs.orig/fs/xfs/linux-2.6/xfs_super.c       2011-10-14 14:46:38.497394866 +0200
1200 +++ xfs/fs/xfs/linux-2.6/xfs_super.c    2011-10-14 14:46:49.047894210 +0200
1201 @@ -1660,24 +1660,13 @@ xfs_init_workqueues(void)
1202          */
1203         xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1204         if (!xfs_syncd_wq)
1205 -               goto out;
1206 -
1207 -       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1208 -       if (!xfs_ail_wq)
1209 -               goto out_destroy_syncd;
1210 -
1211 +               return -ENOMEM;
1212         return 0;
1213 -
1214 -out_destroy_syncd:
1215 -       destroy_workqueue(xfs_syncd_wq);
1216 -out:
1217 -       return -ENOMEM;
1218  }
1219
1220  STATIC void
1221  xfs_destroy_workqueues(void)
1222  {
1223 -       destroy_workqueue(xfs_ail_wq);
1224         destroy_workqueue(xfs_syncd_wq);
1225  }
1226
1227
1228 _______________________________________________
1229 xfs mailing list
1230 xfs@oss.sgi.com
1231 http://oss.sgi.com/mailman/listinfo/xfs
1232
1233
1234 Fixes a possible memory corruption when the link is larger than
1235 MAXPATHLEN and XFS_DEBUG is not enabled. This also remove the
1236 S_ISLNK assert, since the inode mode is checked previously in
1237 xfs_readlink_by_handle() and via VFS.
1238
1239 Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
1240 ---
1241  fs/xfs/xfs_vnodeops.c |   11 ++++++++---
1242  1 files changed, 8 insertions(+), 3 deletions(-)
1243
1244 diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
1245 index 51fc429..c3288be 100644
1246 --- a/fs/xfs/xfs_vnodeops.c
1247 +++ b/fs/xfs/xfs_vnodeops.c
1248 @@ -123,13 +123,18 @@ xfs_readlink(
1249
1250         xfs_ilock(ip, XFS_ILOCK_SHARED);
1251
1252 -       ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
1253 -       ASSERT(ip->i_d.di_size <= MAXPATHLEN);
1254 -
1255         pathlen = ip->i_d.di_size;
1256         if (!pathlen)
1257                 goto out;
1258
1259 +       if (pathlen > MAXPATHLEN) {
1260 +               xfs_alert(mp, "%s: inode (%llu) symlink length (%d) too long",
1261 +                        __func__, (unsigned long long)ip->i_ino, pathlen);
1262 +               ASSERT(0);
1263 +               return XFS_ERROR(EFSCORRUPTED);
1264 +       }
1265 +
1266 +
1267         if (ip->i_df.if_flags & XFS_IFINLINE) {
1268                 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
1269                 link[pathlen] = '\0';
1270 --
1271 1.7.6.2
1272
1273 _______________________________________________
1274 xfs mailing list
1275 xfs@oss.sgi.com
1276 http://oss.sgi.com/mailman/listinfo/xfs
1277