linux-cluster-gfs.patch

   1 # Make the VFS call down into the FS on flock calls.
   2 diff -urN -p linux-2.6.7/fs/locks.c linux/fs/locks.c
   3 --- linux-2.6.7/fs/locks.c      2004-06-16 12:00:44.567463632 -0500
   4 +++ linux/fs/locks.c    2004-06-16 12:01:58.844205936 -0500
   5 @@ -1294,6 +1294,27 @@ out_unlock:
   6         return error;
   7  }
   8
   9 +/*
  10 + * Wrapper function around the file_operations lock routine when called for
  11 + * flock().  The lock routine is called for both fcntl() and flock(), so
  12 + * the flock parameters must be translated to an equivalent fcntl()-like
  13 + * lock.
  14 + *
  15 + * Don't use locks_alloc_lock() (or flock_make_lock()) here, as
  16 + * this is just a temporary lock structure.  We especially don't
  17 + * want to fail because we couldn't allocate a lock structure if
  18 + * this is an unlock operation.
  19 + */
  20 +int flock_fs_file(struct file *filp, int type, int wait)
  21 +{
  22 +       struct file_lock fl = { .fl_flags = FL_FLOCK,
  23 +                               .fl_type = type };
  24 +
  25 +       return filp->f_op->lock(filp,
  26 +                               (wait) ? F_SETLKW : F_SETLK,
  27 +                               &fl);
  28 +}
  29 +
  30  /**
  31   *     sys_flock: - flock() system call.
  32   *     @fd: the file descriptor to lock.
  33 @@ -1342,6 +1363,50 @@ asmlinkage long sys_flock(unsigned int f
  34         if (error)
  35                 goto out_free;
  36
  37 +       /*
  38 +        * Execute any filesystem-specific flock routines.  The filesystem may
  39 +        * maintain supplemental locks.  This code allows the supplemental locks
  40 +        * to be kept in sync with the vfs flock lock.  If flock() is called on
  41 +        * a lock already held for the given filp, the current flock lock is
  42 +        * dropped before obtaining the requested lock.  This unlock operation
  43 +        * must be completed for the any filesystem specific locks and the vfs
  44 +        * flock lock before proceeding with obtaining the requested lock.  When
  45 +        * the filesystem routine drops a lock for such a request, it must
  46 +        * return -EDEADLK, allowing the vfs lock to be dropped, and the
  47 +        * filesystem code is then re-executed to obtain the lock.
  48 +        *
  49 +        * A non-blocking request that returns EWOULDBLOCK also causes any vfs
  50 +        * flock lock to be released, but then returns the error to the caller.
  51 +        */
  52 +       if (filp->f_op && filp->f_op->lock) {
  53 + repeat:
  54 +               error = flock_fs_file(filp, lock->fl_type, can_sleep);
  55 +               if (error < 0) {
  56 +                       /*
  57 +                        * We may have dropped a lock.  We need to
  58 +                        * finish unlocking before returning or
  59 +                        * continuing with lock acquisition.
  60 +                        */
  61 +                       if (error != -ENOLCK)
  62 +                               flock_lock_file(filp, &(struct file_lock){.fl_type = F_UNLCK});
  63 +
  64 +                       /*
  65 +                        * We already held the lock in some mode, and
  66 +                        * had to drop filesystem-specific locks before
  67 +                        * proceeding.  We come back through this
  68 +                        * routine to unlock the vfs flock lock.  Now go
  69 +                        * back and try again.  Using EAGAIN as the
  70 +                        * error here would be better, but the one valid
  71 +                        * error value defined for flock(), EWOULDBLOCK,
  72 +                        * is defined as EAGAIN.
  73 +                        */
  74 +                       if (error == -EDEADLK)
  75 +                               goto repeat;
  76 +
  77 +                       goto out_free;
  78 +               }
  79 +       }
  80 +
  81         for (;;) {
  82                 error = flock_lock_file(filp, lock);
  83                 if ((error != -EAGAIN) || !can_sleep)
  84 @@ -1354,6 +1419,13 @@ asmlinkage long sys_flock(unsigned int f
  85                 break;
  86         }
  87
  88 +       /*
  89 +        * If we failed to get the vfs flock, we need to clean up any
  90 +        * filesystem-specific lock state that we previously obtained.
  91 +        */
  92 +       if (error && filp->f_op && filp->f_op->lock)
  93 +               flock_fs_file(filp, F_UNLCK, 1);
  94 +
  95   out_free:
  96         if (list_empty(&lock->fl_link)) {
  97                 locks_free_lock(lock);
  98 @@ -1714,6 +1786,8 @@ void locks_remove_flock(struct file *fil
  99                 if (fl->fl_file == filp) {
 100                         if (IS_FLOCK(fl)) {
 101                                 locks_delete_lock(before);
 102 +                               if (filp->f_op && filp->f_op->lock)
 103 +                                       flock_fs_file(filp, F_UNLCK, 1);
 104                                 continue;
 105                         }
 106                         if (IS_LEASE(fl)) {
 107 # Add lock harness to the build system.
 108 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
 109 --- linux-2.6.7/fs/Kconfig      2004-06-16 12:00:44.558465722 -0500
 110 +++ linux/fs/Kconfig    2004-06-16 12:02:02.401379449 -0500
 111 @@ -1669,6 +1669,14 @@ config AFS_FS
 112  config RXRPC
 113         tristate
 114
 115 +config LOCK_HARNESS
 116 +       tristate "GFS Lock Harness"
 117 +       help
 118 +         The module that connects GFS to the modules that provide
 119 +         locking for GFS.
 120 +
 121 +         If you want to use GFS (a cluster filesystem) say Y here.
 122 +
 123  endmenu
 124
 125  menu "Partition Types"
 126 diff -urN -p linux-2.6.7/fs/Makefile linux/fs/Makefile
 127 --- linux-2.6.7/fs/Makefile     2004-06-16 12:00:44.558465722 -0500
 128 +++ linux/fs/Makefile   2004-06-16 12:02:02.402379216 -0500
 129 @@ -91,3 +91,4 @@ obj-$(CONFIG_JFS_FS)          += jfs/
 130  obj-$(CONFIG_XFS_FS)           += xfs/
 131  obj-$(CONFIG_AFS_FS)           += afs/
 132  obj-$(CONFIG_BEFS_FS)          += befs/
 133 +obj-$(CONFIG_LOCK_HARNESS)     += gfs_locking/
 134 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
 135 --- linux-2.6.7/fs/gfs_locking/Makefile 1969-12-31 18:00:00.000000000 -0600
 136 +++ linux/fs/gfs_locking/Makefile       2004-06-16 12:02:02.402379216 -0500
 137 @@ -0,0 +1,14 @@
 138 +###############################################################################
 139 +###############################################################################
 140 +##
 141 +##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 142 +##
 143 +##  This copyrighted material is made available to anyone wishing to use,
 144 +##  modify, copy, or redistribute it subject to the terms and conditions
 145 +##  of the GNU General Public License v.2.
 146 +##
 147 +###############################################################################
 148 +###############################################################################
 149 +
 150 +obj-$(CONFIG_LOCK_HARNESS)     += lock_harness/
 151 +
 152 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_harness/Makefile linux/fs/gfs_locking/lock_harness/Makefile
 153 --- linux-2.6.7/fs/gfs_locking/lock_harness/Makefile    1969-12-31 18:00:00.000000000 -0600
 154 +++ linux/fs/gfs_locking/lock_harness/Makefile  2004-06-16 12:02:02.402379216 -0500
 155 @@ -0,0 +1,16 @@
 156 +###############################################################################
 157 +###############################################################################
 158 +##
 159 +##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 160 +##
 161 +##  This copyrighted material is made available to anyone wishing to use,
 162 +##  modify, copy, or redistribute it subject to the terms and conditions
 163 +##  of the GNU General Public License v.2.
 164 +##
 165 +###############################################################################
 166 +###############################################################################
 167 +
 168 +obj-$(CONFIG_LOCK_HARNESS) += lock_harness.o
 169 +
 170 +lock_harness-y := main.o
 171 +
 172 # Add GFS to the build system.
 173 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
 174 --- linux-2.6.7/fs/Kconfig      2004-06-25 13:57:24.435829621 -0500
 175 +++ linux/fs/Kconfig    2004-06-25 13:59:16.786347614 -0500
 176 @@ -316,13 +316,13 @@ config JFS_STATISTICS
 177           to be made available to the user in the /proc/fs/jfs/ directory.
 178
 179  config FS_POSIX_ACL
 180 -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs)
 181 +# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/GFS)
 182  #
 183  # NOTE: you can implement Posix ACLs without these helpers (XFS does).
 184  #      Never use this symbol for ifdefs.
 185  #
 186         bool
 187 -       depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL
 188 +       depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || GFS_FS
 189         default y
 190
 191  config XFS_FS
 192 @@ -1677,6 +1677,20 @@ config LOCK_HARNESS
 193
 194           If you want to use GFS (a cluster filesystem) say Y here.
 195
 196 +config GFS_FS
 197 +       tristate "GFS file system support"
 198 +       depends on LOCK_HARNESS
 199 +       help
 200 +         A cluster filesystem.
 201 +
 202 +         Allows a cluster of computers to simultaneously use a block device
 203 +         that is shared between them (with FC, iSCSI, NBD, etc...).  GFS reads
 204 +         and writes to the block device like a local filesystem, but also uses
 205 +         a lock module to allow the computers coordinate their I/O so
 206 +         filesystem consistency is maintained.  One of the nifty features of
 207 +         GFS is perfect consistency -- changes made to the filesystem on one
 208 +         machine show up immediately on all other machines in the cluster.
 209 +
 210  endmenu
 211
 212  menu "Partition Types"
 213 diff -urN -p linux-2.6.7/fs/Makefile linux/fs/Makefile
 214 --- linux-2.6.7/fs/Makefile     2004-06-25 13:57:24.436829391 -0500
 215 +++ linux/fs/Makefile   2004-06-25 13:57:24.447826863 -0500
 216 @@ -92,3 +92,4 @@ obj-$(CONFIG_XFS_FS)          += xfs/
 217  obj-$(CONFIG_AFS_FS)           += afs/
 218  obj-$(CONFIG_BEFS_FS)          += befs/
 219  obj-$(CONFIG_LOCK_HARNESS)     += gfs_locking/
 220 +obj-$(CONFIG_GFS_FS)           += gfs/
 221 diff -urN -p linux-2.6.7/fs/gfs/Makefile linux/fs/gfs/Makefile
 222 --- linux-2.6.7/fs/gfs/Makefile 1969-12-31 18:00:00.000000000 -0600
 223 +++ linux/fs/gfs/Makefile       2004-06-25 13:57:24.448826633 -0500
 224 @@ -0,0 +1,51 @@
 225 +###############################################################################
 226 +###############################################################################
 227 +##
 228 +##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 229 +##
 230 +##  This copyrighted material is made available to anyone wishing to use,
 231 +##  modify, copy, or redistribute it subject to the terms and conditions
 232 +##  of the GNU General Public License v.2.
 233 +##
 234 +###############################################################################
 235 +###############################################################################
 236 +
 237 +obj-$(CONFIG_GFS_FS) += gfs.o
 238 +
 239 +gfs-y  :=      acl.o \
 240 +               bits.o \
 241 +               bmap.o \
 242 +               daemon.o \
 243 +               dio.o \
 244 +               dir.o \
 245 +               eattr.o \
 246 +               file.o \
 247 +               flock.o \
 248 +               glock.o \
 249 +               glops.o \
 250 +               inode.o \
 251 +               ioctl.o \
 252 +               locking.o \
 253 +               log.o \
 254 +               lops.o \
 255 +               lvb.o \
 256 +               main.o \
 257 +               mount.o \
 258 +               ondisk.o \
 259 +               ops_address.o \
 260 +               ops_dentry.o \
 261 +               ops_export.o \
 262 +               ops_file.o \
 263 +               ops_fstype.o \
 264 +               ops_inode.o \
 265 +               ops_super.o \
 266 +               ops_vm.o \
 267 +               page.o \
 268 +               quota.o \
 269 +               recovery.o \
 270 +               rgrp.o \
 271 +               super.o \
 272 +               trans.o \
 273 +               unlinked.o \
 274 +               util.o
 275 +
 276 # Add lock_nolock to the build system.
 277 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
 278 --- linux-2.6.7/fs/Kconfig      2004-06-16 12:02:09.563715325 -0500
 279 +++ linux/fs/Kconfig    2004-06-16 12:02:09.574712769 -0500
 280 @@ -1691,6 +1691,12 @@ config GFS_FS
 281           GFS is perfect consistency -- changes made to the filesystem on one
 282           machine show up immediately on all other machines in the cluster.
 283
 284 +config LOCK_NOLOCK
 285 +       tristate "Lock Nolock"
 286 +       depends on LOCK_HARNESS
 287 +       help
 288 +         A "fake" lock module that allows GFS to run as a local filesystem.
 289 +
 290  endmenu
 291
 292  menu "Partition Types"
 293 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
 294 --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:05.985546690 -0500
 295 +++ linux/fs/gfs_locking/Makefile       2004-06-16 12:02:09.574712769 -0500
 296 @@ -11,4 +11,5 @@
 297  ###############################################################################
 298
 299  obj-$(CONFIG_LOCK_HARNESS)     += lock_harness/
 300 +obj-$(CONFIG_LOCK_NOLOCK)      += lock_nolock/
 301
 302 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_nolock/Makefile linux/fs/gfs_locking/lock_nolock/Makefile
 303 --- linux-2.6.7/fs/gfs_locking/lock_nolock/Makefile     1969-12-31 18:00:00.000000000 -0600
 304 +++ linux/fs/gfs_locking/lock_nolock/Makefile   2004-06-16 12:02:09.575712537 -0500
 305 @@ -0,0 +1,16 @@
 306 +###############################################################################
 307 +###############################################################################
 308 +##
 309 +##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 310 +##
 311 +##  This copyrighted material is made available to anyone wishing to use,
 312 +##  modify, copy, or redistribute it subject to the terms and conditions
 313 +##  of the GNU General Public License v.2.
 314 +##
 315 +###############################################################################
 316 +###############################################################################
 317 +
 318 +obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock.o
 319 +
 320 +lock_nolock-y  := main.o
 321 +
 322 # Add lock_dlm to the build system.
 323 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
 324 --- linux-2.6.7/fs/Kconfig      2004-06-16 12:02:13.145883030 -0500
 325 +++ linux/fs/Kconfig    2004-06-16 12:02:13.157880243 -0500
 326 @@ -1697,6 +1697,12 @@ config LOCK_NOLOCK
 327         help
 328           A "fake" lock module that allows GFS to run as a local filesystem.
 329
 330 +config LOCK_DLM
 331 +       tristate "Lock DLM"
 332 +       depends on LOCK_HARNESS
 333 +       help
 334 +         A lock module that allows GFS to use a Distributed Lock Manager.
 335 +
 336  endmenu
 337
 338  menu "Partition Types"
 339 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
 340 --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:13.146882798 -0500
 341 +++ linux/fs/gfs_locking/Makefile       2004-06-16 12:02:13.157880243 -0500
 342 @@ -12,4 +12,5 @@
 343
 344  obj-$(CONFIG_LOCK_HARNESS)     += lock_harness/
 345  obj-$(CONFIG_LOCK_NOLOCK)      += lock_nolock/
 346 +obj-$(CONFIG_LOCK_DLM)         += lock_dlm/
 347
 348 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_dlm/Makefile linux/fs/gfs_locking/lock_dlm/Makefile
 349 --- linux-2.6.7/fs/gfs_locking/lock_dlm/Makefile        1969-12-31 18:00:00.000000000 -0600
 350 +++ linux/fs/gfs_locking/lock_dlm/Makefile      2004-06-16 12:02:13.157880243 -0500
 351 @@ -0,0 +1,16 @@
 352 +###############################################################################
 353 +###############################################################################
 354 +##
 355 +##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 356 +##
 357 +##  This copyrighted material is made available to anyone wishing to use,
 358 +##  modify, copy, or redistribute it subject to the terms and conditions
 359 +##  of the GNU General Public License v.2.
 360 +##
 361 +###############################################################################
 362 +###############################################################################
 363 +
 364 +obj-$(CONFIG_LOCK_DLM) += lock_dlm.o
 365 +
 366 +lock_dlm-y     := main.o group.o lock.o mount.o thread.o plock.o
 367 +
 368 # Add lock_gulm to the build system.
 369 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
 370 --- linux-2.6.7/fs/Kconfig      2004-06-16 12:02:16.816030294 -0500
 371 +++ linux/fs/Kconfig    2004-06-16 12:02:16.827027739 -0500
 372 @@ -1703,6 +1703,12 @@ config LOCK_DLM
 373         help
 374           A lock module that allows GFS to use a Distributed Lock Manager.
 375
 376 +config LOCK_GULM
 377 +       tristate "Lock GULM"
 378 +       depends on LOCK_HARNESS
 379 +       help
 380 +         A lock module that allows GFS to use a Failover Lock Manager.
 381 +
 382  endmenu
 383
 384  menu "Partition Types"
 385 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
 386 --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:16.817030062 -0500
 387 +++ linux/fs/gfs_locking/Makefile       2004-06-16 12:02:16.828027507 -0500
 388 @@ -13,4 +13,5 @@
 389  obj-$(CONFIG_LOCK_HARNESS)     += lock_harness/
 390  obj-$(CONFIG_LOCK_NOLOCK)      += lock_nolock/
 391  obj-$(CONFIG_LOCK_DLM)         += lock_dlm/
 392 +obj-$(CONFIG_LOCK_GULM)                += lock_gulm/
 393
 394 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_gulm/Makefile linux/fs/gfs_locking/lock_gulm/Makefile
 395 --- linux-2.6.7/fs/gfs_locking/lock_gulm/Makefile       1969-12-31 18:00:00.000000000 -0600
 396 +++ linux/fs/gfs_locking/lock_gulm/Makefile     2004-06-16 12:02:16.828027507 -0500
 397 @@ -0,0 +1,33 @@
 398 +###############################################################################
 399 +###############################################################################
 400 +##
 401 +##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 402 +##
 403 +##  This copyrighted material is made available to anyone wishing to use,
 404 +##  modify, copy, or redistribute it subject to the terms and conditions
 405 +##  of the GNU General Public License v.2.
 406 +##
 407 +###############################################################################
 408 +###############################################################################
 409 +
 410 +obj-$(CONFIG_LOCK_GULM) += lock_gulm.o
 411 +
 412 +lock_gulm-y    :=      gulm_core.o \
 413 +               gulm_fs.o \
 414 +               gulm_jid.o \
 415 +               gulm_lt.o \
 416 +               gulm_procinfo.o \
 417 +               handler.o \
 418 +               lg_core.o \
 419 +               lg_lock.o \
 420 +               lg_main.o \
 421 +               linux_gulm_main.o \
 422 +               load_info.o \
 423 +               util.o \
 424 +               utils_crc.o \
 425 +               utils_tostr.o \
 426 +               utils_verb_flags.o \
 427 +               xdr_base.o \
 428 +               xdr_io.o \
 429 +               xdr_socket.o
 430 +
 431 diff -urN linux-orig/fs/gfs/acl.c linux-patched/fs/gfs/acl.c
 432 --- linux-orig/fs/gfs/acl.c     1969-12-31 18:00:00.000000000 -0600
 433 +++ linux-patched/fs/gfs/acl.c  2004-06-30 13:27:49.332713682 -0500
 434 @@ -0,0 +1,397 @@
 435 +/******************************************************************************
 436 +*******************************************************************************
 437 +**
 438 +**  Copyright (C) Sistina Software, Inc.  2003  All rights reserved.
 439 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 440 +**
 441 +**  This copyrighted material is made available to anyone wishing to use,
 442 +**  modify, copy, or redistribute it subject to the terms and conditions
 443 +**  of the GNU General Public License v.2.
 444 +**
 445 +*******************************************************************************
 446 +******************************************************************************/
 447 +
 448 +#include <linux/sched.h>
 449 +#include <linux/slab.h>
 450 +#include <linux/smp_lock.h>
 451 +#include <linux/spinlock.h>
 452 +#include <asm/semaphore.h>
 453 +#include <linux/completion.h>
 454 +#include <linux/buffer_head.h>
 455 +#include <linux/xattr_acl.h>
 456 +
 457 +#include "gfs.h"
 458 +#include "acl.h"
 459 +#include "dio.h"
 460 +#include "eattr.h"
 461 +#include "glock.h"
 462 +#include "trans.h"
 463 +#include "inode.h"
 464 +
 465 +/*
 466 + * Check to make sure that the acl is actually valid
 467 + */
 468 +int
 469 +gfs_validate_acl(struct gfs_inode *ip, const char *value, int size, int access)
 470 +{
 471 +       int err = 0;
 472 +       struct posix_acl *acl = NULL;
 473 +       struct gfs_sbd *sdp = ip->i_sbd;
 474 +
 475 +       if ((current->fsuid != ip->i_di.di_uid) && !capable(CAP_FOWNER))
 476 +               return -EPERM;
 477 +       if (ip->i_di.di_type == GFS_FILE_LNK)
 478 +               return -EOPNOTSUPP;
 479 +       if (!access && ip->i_di.di_type != GFS_FILE_DIR)
 480 +               return -EACCES;
 481 +       if (!sdp->sd_args.ar_posixacls)
 482 +               return -EOPNOTSUPP;
 483 +
 484 +       if (value) {
 485 +               acl = posix_acl_from_xattr(value, size);
 486 +               if (IS_ERR(acl))
 487 +                       return PTR_ERR(acl);
 488 +               else if (acl) {
 489 +                       err = posix_acl_valid(acl);
 490 +                       posix_acl_release(acl);
 491 +               }
 492 +       }
 493 +       return err;
 494 +}
 495 +
 496 +void
 497 +gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl)
 498 +{
 499 +       struct inode *inode;
 500 +       mode_t mode;
 501 +
 502 +       inode = gfs_iget(ip, NO_CREATE);
 503 +       mode = inode->i_mode;
 504 +       posix_acl_equiv_mode(acl, &mode);
 505 +       inode->i_mode = mode;
 506 +       iput(inode);
 507 +       gfs_inode_attr_out(ip);
 508 +}
 509 +
 510 +
 511 +/**
 512 + * gfs_replace_acl - replace the value of the ea to the value of the acl
 513 + *
 514 + * NOTE: The new value must be the same size as the old one.
 515 + */
 516 +int
 517 +gfs_replace_acl(struct inode *inode, struct posix_acl *acl, int access,
 518 +               struct gfs_ea_location location)
 519 +{
 520 +       struct gfs_inode *ip = vn2ip(inode);
 521 +       struct gfs_easet_io req;
 522 +       int size;
 523 +       void *data;
 524 +       int error;
 525 +
 526 +       size = posix_acl_to_xattr(acl, NULL, 0);
 527 +       GFS_ASSERT(size == GFS_EA_DATA_LEN(location.ea),
 528 +                  printk("new acl size = %d, ea size = %u\n", size,
 529 +                         GFS_EA_DATA_LEN(location.ea)););
 530 +
 531 +       data = gmalloc(size);
 532 +
 533 +       posix_acl_to_xattr(acl, data, size);
 534 +
 535 +       req.es_data = data;
 536 +       req.es_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT;
 537 +       req.es_data_len = size;
 538 +       req.es_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN;
 539 +       req.es_cmd = GFS_EACMD_REPLACE;
 540 +       req.es_type = GFS_EATYPE_SYS;
 541 +
 542 +       error = replace_ea(ip->i_sbd, ip, location.ea, &req);
 543 +       if (!error)
 544 +               gfs_trans_add_bh(ip->i_gl, location.bh);
 545 +
 546 +       kfree(data);
 547 +
 548 +       return error;
 549 +}
 550 +
 551 +/**
 552 + * gfs_findacl - returns the requested posix acl
 553 + *
 554 + * this function does not log the inode. It assumes that a lock is already
 555 + * held on it.
 556 + */
 557 +int
 558 +gfs_findacl(struct gfs_inode *ip, int access, struct posix_acl **acl_ptr,
 559 +           struct gfs_ea_location *location)
 560 +{
 561 +       struct gfs_sbd *sdp = ip->i_sbd;
 562 +       struct posix_acl *acl;
 563 +       uint32_t avail_size;
 564 +       void *data;
 565 +       int error;
 566 +
 567 +       avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
 568 +       *acl_ptr = NULL;
 569 +
 570 +       if (!ip->i_di.di_eattr)
 571 +               return 0;
 572 +
 573 +       error = find_eattr(ip,
 574 +                          (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT,
 575 +                          (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN,
 576 +                          GFS_EATYPE_SYS, location);
 577 +       if (error <= 0)
 578 +               return error;
 579 +
 580 +       data = gmalloc(GFS_EA_DATA_LEN(location->ea));
 581 +
 582 +       error = 0;
 583 +       if (GFS_EA_IS_UNSTUFFED(location->ea))
 584 +               error = read_unstuffed(data, ip, sdp, location->ea, avail_size,
 585 +                                      gfs_ea_memcpy);
 586 +       else
 587 +               gfs_ea_memcpy(data, GFS_EA_DATA(location->ea),
 588 +                             GFS_EA_DATA_LEN(location->ea));
 589 +       if (error)
 590 +               goto out;
 591 +
 592 +       acl = posix_acl_from_xattr(data, GFS_EA_DATA_LEN(location->ea));
 593 +       if (IS_ERR(acl))
 594 +               error = PTR_ERR(acl);
 595 +       else
 596 +               *acl_ptr = acl;
 597 +
 598 + out:
 599 +       kfree(data);
 600 +       if (error)
 601 +               brelse(location->bh);
 602 +
 603 +       return error;
 604 +}
 605 +
 606 +int
 607 +gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr)
 608 +{
 609 +       struct gfs_inode *ip = vn2ip(inode);
 610 +       struct gfs_sbd *sdp = ip->i_sbd;
 611 +       struct gfs_eaget_io req;
 612 +       struct posix_acl *acl;
 613 +       int size;
 614 +       void *data;
 615 +       int error = 0;
 616 +
 617 +       *acl_ptr = NULL;
 618 +
 619 +       if (!sdp->sd_args.ar_posixacls)
 620 +               return 0;
 621 +
 622 +       req.eg_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT;
 623 +       req.eg_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN;
 624 +       req.eg_type = GFS_EATYPE_SYS;
 625 +       req.eg_len = NULL;
 626 +       req.eg_data = NULL;
 627 +       req.eg_data_len = 0;
 628 +
 629 +       error = gfs_ea_read_permission(&req, ip);
 630 +       if (error)
 631 +               return error;
 632 +
 633 +       if (!ip->i_di.di_eattr)
 634 +               return error;
 635 +
 636 +       size = get_ea(sdp, ip, &req, gfs_ea_memcpy);
 637 +       if (size < 0) {
 638 +               if (size != -ENODATA)
 639 +                       error = size;
 640 +               return error;
 641 +       }
 642 +
 643 +       data = gmalloc(size);
 644 +
 645 +       req.eg_data = data;
 646 +       req.eg_data_len = size;
 647 +
 648 +       size = get_ea(sdp, ip, &req, gfs_ea_memcpy);
 649 +       if (size < 0) {
 650 +               error = size;
 651 +               goto out_free;
 652 +       }
 653 +
 654 +       acl = posix_acl_from_xattr(data, size);
 655 +       if (IS_ERR(acl))
 656 +               error = PTR_ERR(acl);
 657 +       else
 658 +               *acl_ptr = acl;
 659 +
 660 + out_free:
 661 +       kfree(data);
 662 +
 663 +       return error;
 664 +}
 665 +
 666 +int
 667 +gfs_setup_new_acl(struct gfs_inode *dip,
 668 +                 unsigned int type, unsigned int *mode,
 669 +                 struct posix_acl **acl_ptr)
 670 +{
 671 +       struct gfs_ea_location location;
 672 +       struct posix_acl *acl = NULL;
 673 +       mode_t access_mode = *mode;
 674 +       int error;
 675 +
 676 +       if (type == GFS_FILE_LNK)
 677 +               return 0;
 678 +
 679 +       error = gfs_findacl(dip, FALSE, &acl, &location);
 680 +       if (error)
 681 +               return error;
 682 +       if (!acl) {
 683 +               (*mode) &= ~current->fs->umask;
 684 +               return 0;
 685 +       }
 686 +       brelse(location.bh);
 687 +
 688 +       if (type == GFS_FILE_DIR) {
 689 +               *acl_ptr = acl;
 690 +               return 0;
 691 +       }
 692 +
 693 +       error = posix_acl_create_masq(acl, &access_mode);
 694 +       *mode = access_mode;
 695 +       if (error > 0) {
 696 +               *acl_ptr = acl;
 697 +               return 0;
 698 +       }
 699 +
 700 +       posix_acl_release(acl);
 701 +
 702 +       return error;
 703 +}
 704 +
 705 +/**
 706 + * gfs_init_default_acl - initializes the default acl
 707 + *
 708 + * NOTE: gfs_init_access_acl must be called first
 709 + */
 710 +int
 711 +gfs_create_default_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data,
 712 +                      int size)
 713 +{
 714 +       struct gfs_easet_io req;
 715 +       struct gfs_ea_location avail;
 716 +       int error;
 717 +
 718 +       memset(&avail, 0, sizeof(struct gfs_ea_location));
 719 +
 720 +       req.es_data = data;
 721 +       req.es_name = GFS_POSIX_ACL_DEFAULT;
 722 +       req.es_data_len = size;
 723 +       req.es_name_len = GFS_POSIX_ACL_DEFAULT_LEN;
 724 +       req.es_cmd = GFS_EACMD_CREATE;
 725 +       req.es_type = GFS_EATYPE_SYS;
 726 +
 727 +       error = find_sys_space(dip, ip, size, &avail);
 728 +       if (error)
 729 +               return error;
 730 +
 731 +       avail.ea = prep_ea(avail.ea);
 732 +
 733 +       error = write_ea(ip->i_sbd, dip, ip, avail.ea, &req);
 734 +       if (!error)
 735 +               gfs_trans_add_bh(ip->i_gl, avail.bh);  /*  Huh!?!  */
 736 +
 737 +       brelse(avail.bh);
 738 +
 739 +       return error;
 740 +}
 741 +
 742 +/**
 743 + * gfs_init_access_acl - initialized the access acl
 744 + *
 745 + * NOTE: This must be the first extended attribute that is created for
 746 + *       this inode.
 747 + */
 748 +int
 749 +gfs_init_access_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data,
 750 +                   int size)
 751 +{
 752 +       struct gfs_easet_io req;
 753 +
 754 +       req.es_data = data;
 755 +       req.es_name = GFS_POSIX_ACL_ACCESS;
 756 +       req.es_data_len = size;
 757 +       req.es_name_len = GFS_POSIX_ACL_ACCESS_LEN;
 758 +       req.es_cmd = GFS_EACMD_CREATE;
 759 +       req.es_type = GFS_EATYPE_SYS;
 760 +
 761 +       return init_new_inode_eattr(dip, ip, &req);
 762 +}
 763 +
 764 +int
 765 +gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type,
 766 +            struct posix_acl *acl)
 767 +{
 768 +       struct buffer_head *dibh;
 769 +       void *data;
 770 +       int size;
 771 +       int error;
 772 +
 773 +       size = posix_acl_to_xattr(acl, NULL, 0);
 774 +
 775 +       data = gmalloc(size);
 776 +
 777 +       posix_acl_to_xattr(acl, data, size);
 778 +
 779 +       error = gfs_get_inode_buffer(ip, &dibh);
 780 +       if (error)
 781 +               goto out;
 782 +
 783 +       error = gfs_init_access_acl(dip, ip, data, size);
 784 +       if (error)
 785 +               goto out_relse;
 786 +
 787 +       if (type == GFS_FILE_DIR) {
 788 +               error = gfs_create_default_acl(dip, ip, data, size);
 789 +               if (error)
 790 +                       goto out_relse;
 791 +       }
 792 +
 793 +       gfs_trans_add_bh(ip->i_gl, dibh);
 794 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
 795 +
 796 + out_relse:
 797 +       brelse(dibh);
 798 +
 799 + out:
 800 +       kfree(data);
 801 +       posix_acl_release(acl);
 802 +
 803 +       return error;
 804 +}
 805 +
 806 +int
 807 +gfs_acl_setattr(struct inode *inode)
 808 +{
 809 +       struct gfs_inode *ip = vn2ip(inode);
 810 +       struct posix_acl *acl;
 811 +       struct gfs_ea_location location;
 812 +       int error;
 813 +
 814 +       if (S_ISLNK(inode->i_mode))
 815 +               return 0;
 816 +
 817 +       memset(&location, 0, sizeof(struct gfs_ea_location));
 818 +
 819 +       error = gfs_findacl(ip, TRUE, &acl, &location); /* Check error here? */
 820 +       if (!location.ea)
 821 +               return error;
 822 +
 823 +       error = posix_acl_chmod_masq(acl, inode->i_mode);
 824 +       if (!error)
 825 +               error = gfs_replace_acl(inode, acl, TRUE, location);
 826 +
 827 +       posix_acl_release(acl);
 828 +       brelse(location.bh);
 829 +
 830 +       return error;
 831 +}
 832 diff -urN linux-orig/fs/gfs/acl.h linux-patched/fs/gfs/acl.h
 833 --- linux-orig/fs/gfs/acl.h     1969-12-31 18:00:00.000000000 -0600
 834 +++ linux-patched/fs/gfs/acl.h  2004-06-30 13:27:49.332713682 -0500
 835 @@ -0,0 +1,28 @@
 836 +/******************************************************************************
 837 +*******************************************************************************
 838 +**
 839 +**  Copyright (C) Sistina Software, Inc.  2003  All rights reserved.
 840 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 841 +**
 842 +**  This copyrighted material is made available to anyone wishing to use,
 843 +**  modify, copy, or redistribute it subject to the terms and conditions
 844 +**  of the GNU General Public License v.2.
 845 +**
 846 +*******************************************************************************
 847 +******************************************************************************/
 848 +
 849 +#ifndef __ACL_DOT_H__
 850 +#define __ACL_DOT_H__
 851 +
 852 +int gfs_setup_new_acl(struct gfs_inode *dip,
 853 +                     unsigned int type, unsigned int *mode,
 854 +                     struct posix_acl **acl_ptr);
 855 +int gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr);
 856 +int gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type,
 857 +                struct posix_acl *acl);
 858 +int gfs_acl_setattr(struct inode *inode);
 859 +int gfs_validate_acl(struct gfs_inode *ip, const char *value, int size,
 860 +                     int access);
 861 +void gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl);
 862 +
 863 +#endif /* __ACL_DOT_H__ */
 864 diff -urN linux-orig/fs/gfs/bits.c linux-patched/fs/gfs/bits.c
 865 --- linux-orig/fs/gfs/bits.c    1969-12-31 18:00:00.000000000 -0600
 866 +++ linux-patched/fs/gfs/bits.c 2004-06-30 13:27:49.332713682 -0500
 867 @@ -0,0 +1,183 @@
 868 +/******************************************************************************
 869 +*******************************************************************************
 870 +**
 871 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
 872 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 873 +**
 874 +**  This copyrighted material is made available to anyone wishing to use,
 875 +**  modify, copy, or redistribute it subject to the terms and conditions
 876 +**  of the GNU General Public License v.2.
 877 +**
 878 +*******************************************************************************
 879 +******************************************************************************/
 880 +
 881 +/*
 882 + * These routines are used by the resource group routines (rgrp.c)
 883 + * to keep track of block allocation.  Each block is represented by two
 884 + * bits.  One bit indicates whether or not the block is used.  (1=used,
 885 + * 0=free)  The other bit indicates whether or not the block contains a
 886 + * dinode or not.  (1=dinode, 0=data block) So, each byte represents
 887 + * GFS_NBBY (i.e. 4) blocks.
 888 + */
 889 +
 890 +#include <linux/sched.h>
 891 +#include <linux/slab.h>
 892 +#include <linux/smp_lock.h>
 893 +#include <linux/spinlock.h>
 894 +#include <asm/semaphore.h>
 895 +#include <linux/completion.h>
 896 +#include <linux/buffer_head.h>
 897 +
 898 +#include "gfs.h"
 899 +#include "bits.h"
 900 +
 901 +static const char valid_change[16] = {
 902 +       /* current */
 903 +
 904 +       /* n */ 0, 1, 1, 1,
 905 +       /* e */ 1, 0, 0, 0,
 906 +       /* w */ 1, 0, 0, 1,
 907 +       0, 0, 1, 0
 908 +};
 909 +
 910 +/**
 911 + * gfs_setbit - Set a bit in the bitmaps
 912 + * @buffer: the buffer that holds the bitmaps
 913 + * @buflen: the length (in bytes) of the buffer
 914 + * @block: the block to set
 915 + * @new_state: the new state of the block
 916 + *
 917 + */
 918 +
 919 +void
 920 +gfs_setbit(struct gfs_rgrpd *rgd,
 921 +          unsigned char *buffer, unsigned int buflen,
 922 +          uint32_t block, unsigned char new_state)
 923 +{
 924 +       unsigned char *byte, *end, cur_state;
 925 +       unsigned int bit;
 926 +
 927 +       byte = buffer + (block / GFS_NBBY);
 928 +       bit = (block % GFS_NBBY) * GFS_BIT_SIZE;
 929 +       end = buffer + buflen;
 930 +
 931 +       GFS_ASSERT_RGRPD(byte < end, rgd,);
 932 +
 933 +       cur_state = (*byte >> bit) & GFS_BIT_MASK;
 934 +       GFS_ASSERT_RGRPD(valid_change[new_state * 4 + cur_state], rgd,
 935 +                        printk("cur_state = %u, new_state = %u\n",
 936 +                               cur_state, new_state););
 937 +
 938 +       *byte ^= cur_state << bit;
 939 +       *byte |= new_state << bit;
 940 +}
 941 +
 942 +/**
 943 + * gfs_testbit - test a bit in the bitmaps
 944 + * @buffer: the buffer that holds the bitmaps
 945 + * @buflen: the length (in bytes) of the buffer
 946 + * @block: the block to read
 947 + *
 948 + */
 949 +
 950 +unsigned char
 951 +gfs_testbit(struct gfs_rgrpd *rgd,
 952 +           unsigned char *buffer, unsigned int buflen, uint32_t block)
 953 +{
 954 +       unsigned char *byte, *end, cur_state;
 955 +       unsigned int bit;
 956 +
 957 +       byte = buffer + (block / GFS_NBBY);
 958 +       bit = (block % GFS_NBBY) * GFS_BIT_SIZE;
 959 +       end = buffer + buflen;
 960 +
 961 +       GFS_ASSERT_RGRPD(byte < end, rgd,);
 962 +
 963 +       cur_state = (*byte >> bit) & GFS_BIT_MASK;
 964 +
 965 +       return cur_state;
 966 +}
 967 +
 968 +/**
 969 + * gfs_bitfit - Find a free block in the bitmaps
 970 + * @buffer: the buffer that holds the bitmaps
 971 + * @buflen: the length (in bytes) of the buffer
 972 + * @goal: the block to try to allocate
 973 + * @old_state: the state of the block we're looking for
 974 + *
 975 + * Return: the block number that was allocated
 976 + */
 977 +
 978 +uint32_t
 979 +gfs_bitfit(struct gfs_rgrpd *rgd,
 980 +          unsigned char *buffer, unsigned int buflen,
 981 +          uint32_t goal, unsigned char old_state)
 982 +{
 983 +       unsigned char *byte, *end, alloc;
 984 +       uint32_t blk = goal;
 985 +       unsigned int bit;
 986 +
 987 +       byte = buffer + (goal / GFS_NBBY);
 988 +       bit = (goal % GFS_NBBY) * GFS_BIT_SIZE;
 989 +       end = buffer + buflen;
 990 +       alloc = (old_state & 1) ? 0 : 0x55;
 991 +
 992 +       while (byte < end) {
 993 +               if ((*byte & 0x55) == alloc) {
 994 +                       blk += (8 - bit) >> 1;
 995 +
 996 +                       bit = 0;
 997 +                       byte++;
 998 +
 999 +                       continue;
1000 +               }
1001 +
1002 +               if (((*byte >> bit) & GFS_BIT_MASK) == old_state)
1003 +                       return blk;
1004 +
1005 +               bit += GFS_BIT_SIZE;
1006 +               if (bit >= 8) {
1007 +                       bit = 0;
1008 +                       byte++;
1009 +               }
1010 +
1011 +               blk++;
1012 +       }
1013 +
1014 +       return BFITNOENT;
1015 +}
1016 +
1017 +/**
1018 + * gfs_bitcount - count the number of bits in a certain state
1019 + * @buffer: the buffer that holds the bitmaps
1020 + * @buflen: the length (in bytes) of the buffer
1021 + * @state: the state of the block we're looking for
1022 + *
1023 + * Returns: The number of bits
1024 + */
1025 +
1026 +uint32_t
1027 +gfs_bitcount(struct gfs_rgrpd *rgd,
1028 +            unsigned char *buffer, unsigned int buflen,
1029 +            unsigned char state)
1030 +{
1031 +       unsigned char *byte = buffer;
1032 +       unsigned char *end = buffer + buflen;
1033 +       unsigned char state1 = state << 2;
1034 +       unsigned char state2 = state << 4;
1035 +       unsigned char state3 = state << 6;
1036 +       uint32_t count = 0;
1037 +
1038 +       for (; byte < end; byte++) {
1039 +               if (((*byte) & 0x03) == state)
1040 +                       count++;
1041 +               if (((*byte) & 0x0C) == state1)
1042 +                       count++;
1043 +               if (((*byte) & 0x30) == state2)
1044 +                       count++;
1045 +               if (((*byte) & 0xC0) == state3)
1046 +                       count++;
1047 +       }
1048 +
1049 +       return count;
1050 +}
1051 diff -urN linux-orig/fs/gfs/bits.h linux-patched/fs/gfs/bits.h
1052 --- linux-orig/fs/gfs/bits.h    1969-12-31 18:00:00.000000000 -0600
1053 +++ linux-patched/fs/gfs/bits.h 2004-06-30 13:27:49.332713682 -0500
1054 @@ -0,0 +1,32 @@
1055 +/******************************************************************************
1056 +*******************************************************************************
1057 +**
1058 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
1059 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
1060 +**
1061 +**  This copyrighted material is made available to anyone wishing to use,
1062 +**  modify, copy, or redistribute it subject to the terms and conditions
1063 +**  of the GNU General Public License v.2.
1064 +**
1065 +*******************************************************************************
1066 +******************************************************************************/
1067 +
1068 +#ifndef __BITS_DOT_H__
1069 +#define __BITS_DOT_H__
1070 +
1071 +#define BFITNOENT (0xFFFFFFFF)
1072 +
1073 +void gfs_setbit(struct gfs_rgrpd *rgd,
1074 +               unsigned char *buffer, unsigned int buflen,
1075 +               uint32_t block, unsigned char new_state);
1076 +unsigned char gfs_testbit(struct gfs_rgrpd *rgd,
1077 +                         unsigned char *buffer, unsigned int buflen,
1078 +                         uint32_t block);
1079 +uint32_t gfs_bitfit(struct gfs_rgrpd *rgd,
1080 +                   unsigned char *buffer, unsigned int buflen,
1081 +                   uint32_t goal, unsigned char old_state);
1082 +uint32_t gfs_bitcount(struct gfs_rgrpd *rgd,
1083 +                     unsigned char *buffer, unsigned int buflen,
1084 +                     unsigned char state);
1085 +
1086 +#endif /* __BITS_DOT_H__ */
1087 diff -urN linux-orig/fs/gfs/bmap.c linux-patched/fs/gfs/bmap.c
1088 --- linux-orig/fs/gfs/bmap.c    1969-12-31 18:00:00.000000000 -0600
1089 +++ linux-patched/fs/gfs/bmap.c 2004-06-30 13:27:49.333713450 -0500
1090 @@ -0,0 +1,1404 @@
1091 +/******************************************************************************
1092 +*******************************************************************************
1093 +**
1094 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
1095 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
1096 +**
1097 +**  This copyrighted material is made available to anyone wishing to use,
1098 +**  modify, copy, or redistribute it subject to the terms and conditions
1099 +**  of the GNU General Public License v.2.
1100 +**
1101 +*******************************************************************************
1102 +******************************************************************************/
1103 +
1104 +#include <linux/sched.h>
1105 +#include <linux/slab.h>
1106 +#include <linux/smp_lock.h>
1107 +#include <linux/spinlock.h>
1108 +#include <asm/semaphore.h>
1109 +#include <linux/completion.h>
1110 +#include <linux/buffer_head.h>
1111 +
1112 +#include "gfs.h"
1113 +#include "bmap.h"
1114 +#include "dio.h"
1115 +#include "glock.h"
1116 +#include "inode.h"
1117 +#include "ioctl.h"
1118 +#include "quota.h"
1119 +#include "rgrp.h"
1120 +#include "trans.h"
1121 +
1122 +struct metapath {
1123 +       unsigned int mp_list[GFS_MAX_META_HEIGHT];
1124 +};
1125 +
1126 +typedef int (*block_call_t) (struct gfs_inode *ip, struct buffer_head *dibh,
1127 +                            struct buffer_head *bh, uint64_t *top,
1128 +                            uint64_t *bottom, unsigned int height,
1129 +                            void *data);
1130 +
1131 +struct strip_mine {
1132 +       int sm_first;
1133 +       unsigned int sm_height;
1134 +};
1135 +
1136 +/**
1137 + * gfs_unstuffer_sync - unstuff a dinode synchronously
1138 + * @ip: the inode
1139 + * @dibh: the dinode buffer
1140 + * @block: the block number that was allocated
1141 + * @private: not used
1142 + *
1143 + * Returns: 0 on success, -EXXX on failure
1144 + */
1145 +
1146 +int
1147 +gfs_unstuffer_sync(struct gfs_inode *ip, struct buffer_head *dibh,
1148 +                  uint64_t block, void *private)
1149 +{
1150 +       struct gfs_sbd *sdp = ip->i_sbd;
1151 +       struct buffer_head *bh;
1152 +       int error;
1153 +
1154 +       error = gfs_get_data_buffer(ip, block, TRUE, &bh);
1155 +       if (error)
1156 +               return error;
1157 +
1158 +       gfs_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs_dinode));
1159 +
1160 +       error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
1161 +
1162 +       brelse(bh);
1163 +
1164 +       return error;
1165 +}
1166 +
1167 +/**
1168 + * gfs_unstuffer_async - unstuff a dinode asynchronously
1169 + * @ip: the inode
1170 + * @dibh: the dinode buffer
1171 + * @block: the block number that was allocated
1172 + * @private: not used
1173 + *
1174 + * Returns: 0 on success, -EXXX on failure
1175 + */
1176 +
1177 +int
1178 +gfs_unstuffer_async(struct gfs_inode *ip, struct buffer_head *dibh,
1179 +                   uint64_t block, void *private)
1180 +{
1181 +       struct gfs_sbd *sdp = ip->i_sbd;
1182 +       struct buffer_head *bh;
1183 +       int error;
1184 +
1185 +       error = gfs_get_data_buffer(ip, block, TRUE, &bh);
1186 +       if (error)
1187 +               return error;
1188 +
1189 +       gfs_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs_dinode));
1190 +
1191 +       error = gfs_dwrite(sdp, bh, DIO_DIRTY);
1192 +
1193 +       brelse(bh);
1194 +
1195 +       return error;
1196 +}
1197 +
1198 +/**
1199 + * gfs_unstuff_dinode - Unstuff a dinode when the data has grown too big
1200 + * @ip: The GFS inode to unstuff
1201 + * @unstuffer: the routine that handles unstuffing a non-zero length file
1202 + * @private: private data for the unstuffer
1203 + *
1204 + * This routine unstuffs a dinode and returns it to a "normal" state such
1205 + * that the height can be grown in the traditional way.
1206 + *
1207 + * Returns: 0 on success, -EXXXX on failure
1208 + */
1209 +
1210 +int
1211 +gfs_unstuff_dinode(struct gfs_inode *ip, gfs_unstuffer_t unstuffer,
1212 +                  void *private)
1213 +{
1214 +       struct buffer_head *bh, *dibh;
1215 +       uint64_t block = 0;
1216 +       int journaled = gfs_is_jdata(ip);
1217 +       int error;
1218 +
1219 +       GFS_ASSERT_INODE(gfs_is_stuffed(ip), ip,);
1220 +
1221 +       error = gfs_get_inode_buffer(ip, &dibh);
1222 +       if (error)
1223 +               return error;
1224 +
1225 +       if (ip->i_di.di_size) {
1226 +               /* Get a free block, fill it with the stuffed data,
1227 +                  and write it out to disk */
1228 +
1229 +               if (journaled) {
1230 +                       error = gfs_metaalloc(ip, &block);
1231 +                       if (error)
1232 +                               goto fail;
1233 +
1234 +                       error = gfs_get_data_buffer(ip, block, TRUE, &bh);
1235 +                       if (error)
1236 +                               goto fail;
1237 +
1238 +                       gfs_buffer_copy_tail(bh, sizeof(struct gfs_meta_header),
1239 +                                            dibh, sizeof(struct gfs_dinode));
1240 +
1241 +                       brelse(bh);
1242 +               } else {
1243 +                       gfs_blkalloc(ip, &block);
1244 +
1245 +                       error = unstuffer(ip, dibh, block, private);
1246 +                       if (error)
1247 +                               goto fail;
1248 +               }
1249 +       }
1250 +
1251 +       /*  Set up the pointer to the new block  */
1252 +
1253 +       gfs_trans_add_bh(ip->i_gl, dibh);
1254 +
1255 +       gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode));
1256 +
1257 +       if (ip->i_di.di_size) {
1258 +               *(uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)) = cpu_to_gfs64(block);
1259 +               ip->i_di.di_blocks++;
1260 +       }
1261 +
1262 +       ip->i_di.di_height = 1;
1263 +
1264 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
1265 +       brelse(dibh);
1266 +
1267 +       return 0;
1268 +
1269 + fail:
1270 +       brelse(dibh);
1271 +
1272 +       return error;
1273 +}
1274 +
1275 +/**
1276 + * calc_tree_height - Calculate the height of a metadata tree
1277 + * @ip: The GFS inode
1278 + * @size: The proposed size of the file
1279 + *
1280 + * Work out how tall a metadata tree needs to be in order to accommodate a
1281 + * file of a particular size. If size is less than the current size of
1282 + * the inode, then the current size of the inode is used instead of the
1283 + * supplied one.
1284 + *
1285 + * Returns: the height the tree should be
1286 + */
1287 +
1288 +static unsigned int
1289 +calc_tree_height(struct gfs_inode *ip, uint64_t size)
1290 +{
1291 +       struct gfs_sbd *sdp = ip->i_sbd;
1292 +       uint64_t *arr;
1293 +       unsigned int max, height;
1294 +
1295 +       if (ip->i_di.di_size > size)
1296 +               size = ip->i_di.di_size;
1297 +
1298 +       if (gfs_is_jdata(ip)) {
1299 +               arr = sdp->sd_jheightsize;
1300 +               max = sdp->sd_max_jheight;
1301 +       } else {
1302 +               arr = sdp->sd_heightsize;
1303 +               max = sdp->sd_max_height;
1304 +       }
1305 +
1306 +       for (height = 0; height < max; height++)
1307 +               if (arr[height] >= size)
1308 +                       break;
1309 +
1310 +       return height;
1311 +}
1312 +
1313 +/**
1314 + * build_height - Build a metadata tree of the requested height
1315 + * @ip: The GFS inode
1316 + * @height: The height to build to
1317 + *
1318 + * This routine makes sure that the metadata tree is tall enough to hold
1319 + * "size" bytes of data.
1320 + *
1321 + * Returns: 0 on success, -EXXXX on failure
1322 + */
1323 +
1324 +static int
1325 +build_height(struct gfs_inode *ip, int height)
1326 +{
1327 +       struct gfs_sbd *sdp = ip->i_sbd;
1328 +       struct buffer_head *bh, *dibh;
1329 +       uint64_t block, *bp;
1330 +       unsigned int x;
1331 +       int new_block;
1332 +       int error;
1333 +
1334 +       while (ip->i_di.di_height < height) {
1335 +               error = gfs_get_inode_buffer(ip, &dibh);
1336 +               if (error)
1337 +                       return error;
1338 +
1339 +               new_block = FALSE;
1340 +               bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode));
1341 +               for (x = 0; x < sdp->sd_diptrs; x++, bp++)
1342 +                       if (*bp) {
1343 +                               new_block = TRUE;
1344 +                               break;
1345 +                       }
1346 +
1347 +               if (new_block) {
1348 +                       /*  Get a new block, fill it with the old direct pointers,
1349 +                           and write it out  */
1350 +
1351 +                       error = gfs_metaalloc(ip, &block);
1352 +                       if (error)
1353 +                               goto fail;
1354 +
1355 +                       error = gfs_dread(sdp, block, ip->i_gl,
1356 +                                         DIO_NEW | DIO_START | DIO_WAIT, &bh);
1357 +                       if (error)
1358 +                               goto fail;
1359 +
1360 +                       gfs_trans_add_bh(ip->i_gl, bh);
1361 +                       gfs_metatype_set(sdp, bh, GFS_METATYPE_IN,
1362 +                                        GFS_FORMAT_IN);
1363 +                       memset(bh->b_data + sizeof(struct gfs_meta_header),
1364 +                              0,
1365 +                              sizeof(struct gfs_indirect) -
1366 +                              sizeof(struct gfs_meta_header));
1367 +                       gfs_buffer_copy_tail(bh, sizeof(struct gfs_indirect),
1368 +                                            dibh, sizeof(struct gfs_dinode));
1369 +
1370 +                       brelse(bh);
1371 +               }
1372 +
1373 +               /*  Set up the new direct pointer and write it out to disk  */
1374 +
1375 +               gfs_trans_add_bh(ip->i_gl, dibh);
1376 +
1377 +               gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode));
1378 +
1379 +               if (new_block) {
1380 +                       *(uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)) = cpu_to_gfs64(block);
1381 +                       ip->i_di.di_blocks++;
1382 +               }
1383 +
1384 +               ip->i_di.di_height++;
1385 +
1386 +               gfs_dinode_out(&ip->i_di, dibh->b_data);
1387 +               brelse(dibh);
1388 +       }
1389 +
1390 +       return 0;
1391 +
1392 + fail:
1393 +       brelse(dibh);
1394 +
1395 +       return error;
1396 +}
1397 +
1398 +/**
1399 + * find_metapath - Find path through the metadata tree
1400 + * @ip: The inode pointer
1401 + * @mp: The metapath to return the result in
1402 + * @block: The disk block to look up
1403 + *
1404 + *   This routine returns a struct metapath structure that defines a path through
1405 + *   the metadata of inode "ip" to get to block "block".
1406 + *
1407 + *   Example:
1408 + *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
1409 + *   filesystem with a blocksize of 4096.
1410 + *
1411 + *   find_metapath() would return a struct metapath structure set to:
1412 + *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
1413 + *   and mp_list[2] = 165.
1414 + *
1415 + *   That means that in order to get to the block containing the byte at
1416 + *   offset 101342453, we would load the indirect block pointed to by pointer
1417 + *   0 in the dinode.  We would then load the indirect block pointed to by
1418 + *   pointer 48 in that indirect block.  We would then load the data block
1419 + *   pointed to by pointer 165 in that indirect block.
1420 + *
1421 + *             ----------------------------------------
1422 + *             | Dinode |                             |
1423 + *             |        |                            4|
1424 + *             |        |0 1 2 3 4 5                 9|
1425 + *             |        |                            6|
1426 + *             ----------------------------------------
1427 + *                       |
1428 + *                       |
1429 + *                       V
1430 + *             ----------------------------------------
1431 + *             | Indirect Block                       |
1432 + *             |                                     5|
1433 + *             |            4 4 4 4 4 5 5            1|
1434 + *             |0           5 6 7 8 9 0 1            2|
1435 + *             ----------------------------------------
1436 + *                                |
1437 + *                                |
1438 + *                                V
1439 + *             ----------------------------------------
1440 + *             | Indirect Block                       |
1441 + *             |                         1 1 1 1 1   5|
1442 + *             |                         6 6 6 6 6   1|
1443 + *             |0                        3 4 5 6 7   2|
1444 + *             ----------------------------------------
1445 + *                                           |
1446 + *                                           |
1447 + *                                           V
1448 + *             ----------------------------------------
1449 + *             | Data block containing offset         |
1450 + *             |            101342453                 |
1451 + *             |                                      |
1452 + *             |                                      |
1453 + *             ----------------------------------------
1454 + *
1455 + */
1456 +
1457 +static struct metapath *
1458 +find_metapath(struct gfs_inode *ip, uint64_t block)
1459 +{
1460 +       struct gfs_sbd *sdp = ip->i_sbd;
1461 +       struct metapath *mp;
1462 +       uint64_t b = block;
1463 +       unsigned int i;
1464 +
1465 +       mp = gmalloc(sizeof(struct metapath));
1466 +       memset(mp, 0, sizeof(struct metapath));
1467 +
1468 +       for (i = ip->i_di.di_height; i--;)
1469 +               mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
1470 +
1471 +       return mp;
1472 +}
1473 +
1474 +/**
1475 + * metapointer - Return pointer to start of metadata in a buffer
1476 + * @bh: The buffer
1477 + * @height: The metadata height (0 = dinode)
1478 + * @mp: The metapath
1479 + *
1480 + * Return a pointer to the block number of the next height of the metadata
1481 + * tree given a buffer containing the pointer to the current height of the
1482 + * metadata tree.
1483 + */
1484 +
1485 +static __inline__ uint64_t *
1486 +metapointer(struct buffer_head *bh, unsigned int height, struct metapath *mp)
1487 +{
1488 +       unsigned int head_size = (height > 0) ?
1489 +               sizeof(struct gfs_indirect) : sizeof(struct gfs_dinode);
1490 +
1491 +       return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height];
1492 +}
1493 +
1494 +/**
1495 + * get_metablock - Get the next metadata block in metadata tree
1496 + * @ip: The GFS inode
1497 + * @bh: Buffer containing the pointers to metadata blocks
1498 + * @height: The height of the tree (0 = dinode)
1499 + * @mp: The metapath
1500 + * @create: Non-zero if we may create a new meatdata block
1501 + * @new: Used to indicate if we did create a new metadata block
1502 + * @block: the returned disk block number
1503 + *
1504 + * Given a metatree, complete to a particular height, checks to see if the next
1505 + * height of the tree exists. If not the next height of the tree is created.
1506 + * The block number of the next height of the metadata tree is returned.
1507 + *
1508 + * Returns: 0 on success, -EXXX on failure
1509 + */
1510 +
1511 +static int
1512 +get_metablock(struct gfs_inode *ip,
1513 +             struct buffer_head *bh, unsigned int height, struct metapath *mp,
1514 +             int create, int *new, uint64_t *block)
1515 +{
1516 +       uint64_t *ptr = metapointer(bh, height, mp);
1517 +       int error;
1518 +
1519 +       if (*ptr) {
1520 +               *block = gfs64_to_cpu(*ptr);
1521 +               return 0;
1522 +       }
1523 +
1524 +       *block = 0;
1525 +
1526 +       if (!create)
1527 +               return 0;
1528 +
1529 +       error = gfs_metaalloc(ip, block);
1530 +       if (error)
1531 +               return error;
1532 +
1533 +       gfs_trans_add_bh(ip->i_gl, bh);
1534 +
1535 +       *ptr = cpu_to_gfs64(*block);
1536 +       ip->i_di.di_blocks++;
1537 +
1538 +       *new = 1;
1539 +
1540 +       return 0;
1541 +}
1542 +
1543 +/**
1544 + * get_datablock - Get datablock number from metadata block
1545 + * @ip: The GFS inode
1546 + * @bh: The buffer containing pointers to datablocks
1547 + * @mp: The metapath
1548 + * @create: Non-zero if we may create a new data block
1549 + * @new: Used to indicate if we created a new data block
1550 + * @block: the returned disk block number
1551 + *
1552 + * Given a fully built metadata tree, checks to see if a particular data
1553 + * block exists. It is created if it does not exist and the block number
1554 + * on disk is returned.
1555 + *
1556 + * Returns: 0 on success, -EXXX on failure
1557 + */
1558 +
1559 +static int
1560 +get_datablock(struct gfs_inode *ip,
1561 +             struct buffer_head *bh, struct metapath *mp,
1562 +             int create, int *new, uint64_t *block)
1563 +{
1564 +       uint64_t *ptr = metapointer(bh, ip->i_di.di_height - 1, mp);
1565 +
1566 +       if (*ptr) {
1567 +               *block = gfs64_to_cpu(*ptr);
1568 +               return 0;
1569 +       }
1570 +
1571 +       *block = 0;
1572 +
1573 +       if (!create)
1574 +               return 0;
1575 +
1576 +       if (gfs_is_jdata(ip)) {
1577 +               int error;
1578 +               error = gfs_metaalloc(ip, block);
1579 +               if (error)
1580 +                       return error;
1581 +       } else
1582 +               gfs_blkalloc(ip, block);
1583 +
1584 +       gfs_trans_add_bh(ip->i_gl, bh);
1585 +
1586 +       *ptr = cpu_to_gfs64(*block);
1587 +       ip->i_di.di_blocks++;
1588 +
1589 +       *new = 1;
1590 +
1591 +       return 0;
1592 +}
1593 +
1594 +/**
1595 + * gfs_block_map - Map a block from an inode to a disk block
1596 + * @ip: The GFS inode
1597 + * @lblock: The logical block number
1598 + * @new: Value/Result argument (1 = may create/did create new blocks)
1599 + * @dblock: the disk block number of the start of an extent
1600 + * @extlen: the size of the extent
1601 + *
1602 + * Find the block number on the current device which corresponds to an
1603 + * inode's block. If the block had to be created, "new" will be set.
1604 + *
1605 + * Returns: 0 on success, -EXXX on failure
1606 + */
1607 +
1608 +int
1609 +gfs_block_map(struct gfs_inode *ip,
1610 +             uint64_t lblock, int *new,
1611 +             uint64_t *dblock, uint32_t *extlen)
1612 +{
1613 +       struct gfs_sbd *sdp = ip->i_sbd;
1614 +       struct buffer_head *bh;
1615 +       struct metapath *mp;
1616 +       int create = *new;
1617 +       unsigned int bsize;
1618 +       unsigned int height;
1619 +       unsigned int end_of_metadata;
1620 +       unsigned int x;
1621 +       int error;
1622 +
1623 +       *new = 0;
1624 +       *dblock = 0;
1625 +       if (extlen)
1626 +               *extlen = 0;
1627 +
1628 +       if (gfs_is_stuffed(ip)) {
1629 +               if (!lblock) {
1630 +                       *dblock = ip->i_num.no_addr;
1631 +                       if (extlen)
1632 +                               *extlen = 1;
1633 +               }
1634 +               return 0;
1635 +       }
1636 +
1637 +       bsize = (gfs_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
1638 +
1639 +       height = calc_tree_height(ip, (lblock + 1) * bsize);
1640 +       if (ip->i_di.di_height < height) {
1641 +               if (!create)
1642 +                       return 0;
1643 +
1644 +               error = build_height(ip, height);
1645 +               if (error)
1646 +                       return error;
1647 +       }
1648 +
1649 +       mp = find_metapath(ip, lblock);
1650 +       end_of_metadata = ip->i_di.di_height - 1;
1651 +
1652 +       error = gfs_get_inode_buffer(ip, &bh);
1653 +       if (error)
1654 +               goto out;
1655 +
1656 +       for (x = 0; x < end_of_metadata; x++) {
1657 +               error = get_metablock(ip, bh, x, mp, create, new, dblock);
1658 +               brelse(bh);
1659 +               if (error || !*dblock)
1660 +                       goto out;
1661 +
1662 +               error = gfs_get_meta_buffer(ip, x + 1, *dblock, *new, &bh);
1663 +               if (error)
1664 +                       goto out;
1665 +       }
1666 +
1667 +       error = get_datablock(ip, bh, mp, create, new, dblock);
1668 +       if (error) {
1669 +               brelse(bh);
1670 +               goto out;
1671 +       }
1672 +
1673 +       if (extlen && *dblock) {
1674 +               *extlen = 1;
1675 +
1676 +               if (!*new) {
1677 +                       uint64_t tmp_dblock;
1678 +                       int tmp_new;
1679 +                       unsigned int nptrs;
1680 +
1681 +                       nptrs = (end_of_metadata) ? sdp->sd_inptrs : sdp->sd_diptrs;
1682 +
1683 +                       while (++mp->mp_list[end_of_metadata] < nptrs) {
1684 +                               get_datablock(ip, bh, mp,
1685 +                                             FALSE, &tmp_new,
1686 +                                             &tmp_dblock);
1687 +
1688 +                               if (*dblock + *extlen != tmp_dblock)
1689 +                                       break;
1690 +
1691 +                               (*extlen)++;
1692 +                       }
1693 +               }
1694 +       }
1695 +
1696 +       brelse(bh);
1697 +
1698 +       if (*new) {
1699 +               error = gfs_get_inode_buffer(ip, &bh);
1700 +               if (!error) {
1701 +                       gfs_trans_add_bh(ip->i_gl, bh);
1702 +                       gfs_dinode_out(&ip->i_di, bh->b_data);
1703 +                       brelse(bh);
1704 +               }
1705 +       }
1706 +
1707 + out:
1708 +       kfree(mp);
1709 +
1710 +       return error;
1711 +}
1712 +
1713 +/**
1714 + * do_grow - Make a file look bigger than it is
1715 + * @ip: the inode
1716 + * @size: the size to set the file to
1717 + *
1718 + * Called with an exclusive lock on @ip.
1719 + *
1720 + * Returns: 0 on succes, -EXXX on failure
1721 + */
1722 +
1723 +static int
1724 +do_grow(struct gfs_inode *ip, uint64_t size)
1725 +{
1726 +       struct gfs_sbd *sdp = ip->i_sbd;
1727 +       struct gfs_alloc *al;
1728 +       struct buffer_head *dibh;
1729 +       unsigned int h;
1730 +       int journaled = gfs_is_jdata(ip);
1731 +       int error;
1732 +
1733 +       al = gfs_alloc_get(ip);
1734 +
1735 +       error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1736 +       if (error)
1737 +               goto fail;
1738 +
1739 +       error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
1740 +       if (error)
1741 +               goto fail_gunlock_q;
1742 +
1743 +       if (journaled)
1744 +               al->al_requested_meta = sdp->sd_max_height + 1;
1745 +       else {
1746 +               al->al_requested_meta = sdp->sd_max_height;
1747 +               al->al_requested_data = 1;
1748 +       }
1749 +
1750 +       error = gfs_inplace_reserve(ip);
1751 +       if (error)
1752 +               goto fail_gunlock_q;
1753 +
1754 +       /* Trans may require:
1755 +          Full extention of the metadata tree, block allocation,
1756 +          a dinode modification, and a quota change */
1757 +
1758 +       error = gfs_trans_begin(sdp,
1759 +                               sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
1760 +                               1 + !!journaled,
1761 +                               1);
1762 +       if (error)
1763 +               goto fail_ipres;
1764 +
1765 +       if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) {
1766 +               if (gfs_is_stuffed(ip)) {
1767 +                       error = gfs_unstuff_dinode(ip, gfs_unstuffer_sync, NULL);
1768 +                       if (error)
1769 +                               goto fail_end_trans;
1770 +               }
1771 +
1772 +               h = calc_tree_height(ip, size);
1773 +               if (ip->i_di.di_height < h) {
1774 +                       error = build_height(ip, h);
1775 +                       if (error)
1776 +                               goto fail_end_trans;
1777 +               }
1778 +       }
1779 +
1780 +       ip->i_di.di_size = size;
1781 +       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1782 +
1783 +       error = gfs_get_inode_buffer(ip, &dibh);
1784 +       if (error)
1785 +               goto fail_end_trans;
1786 +
1787 +       gfs_trans_add_bh(ip->i_gl, dibh);
1788 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
1789 +       brelse(dibh);
1790 +
1791 +       gfs_trans_end(sdp);
1792 +
1793 +       gfs_inplace_release(ip);
1794 +       gfs_quota_unlock_m(ip);
1795 +       gfs_alloc_put(ip);
1796 +
1797 +       return 0;
1798 +
1799 + fail_end_trans:
1800 +       gfs_trans_end(sdp);
1801 +
1802 + fail_ipres:
1803 +       gfs_inplace_release(ip);
1804 +
1805 + fail_gunlock_q:
1806 +       gfs_quota_unlock_m(ip);
1807 +
1808 + fail:
1809 +       gfs_alloc_put(ip);
1810 +
1811 +       return error;
1812 +}
1813 +
1814 +/**
1815 + * recursive_scan - recursively scan through the end of a file
1816 + * @ip: the inode
1817 + * @dibh: the dinode buffer
1818 + * @mp: the path through the metadata to the point to start
1819 + * @height: the height the recursion is at
1820 + * @block: the indirect block to look at
1821 + * @first: TRUE if this is the first block
1822 + * @bc: the call to make for each piece of metadata
1823 + * @data: data opaque to this function to pass to @bc
1824 + *
1825 + * When this is first called @height and @block should be zero and
1826 + * @first should be TRUE.
1827 + *
1828 + * Returns: 0 on success, -EXXX on failure
1829 + */
1830 +
1831 +static int
1832 +recursive_scan(struct gfs_inode *ip, struct buffer_head *dibh,
1833 +              struct metapath *mp, unsigned int height, uint64_t block,
1834 +              int first, block_call_t bc, void *data)
1835 +{
1836 +       struct gfs_sbd *sdp = ip->i_sbd;
1837 +       struct buffer_head *bh = NULL;
1838 +       uint64_t *top, *bottom;
1839 +       uint64_t bn;
1840 +       int error;
1841 +
1842 +       if (!height) {
1843 +               error = gfs_get_inode_buffer(ip, &bh);
1844 +               if (error)
1845 +                       goto fail;
1846 +               dibh = bh;
1847 +
1848 +               top = (uint64_t *)(bh->b_data + sizeof(struct gfs_dinode)) +
1849 +                       mp->mp_list[0];
1850 +               bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs_dinode)) +
1851 +                       sdp->sd_diptrs;
1852 +       } else {
1853 +               error = gfs_get_meta_buffer(ip, height, block, FALSE, &bh);
1854 +               if (error)
1855 +                       goto fail;
1856 +
1857 +               top = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect)) +
1858 +                       ((first) ? mp->mp_list[height] : 0);
1859 +               bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect)) +
1860 +                       sdp->sd_inptrs;
1861 +       }
1862 +
1863 +       error = bc(ip, dibh, bh, top, bottom, height, data);
1864 +       if (error)
1865 +               goto fail;
1866 +
1867 +       if (height < ip->i_di.di_height - 1)
1868 +               for (; top < bottom; top++, first = FALSE) {
1869 +                       if (!*top)
1870 +                               continue;
1871 +
1872 +                       bn = gfs64_to_cpu(*top);
1873 +
1874 +                       error = recursive_scan(ip, dibh, mp,
1875 +                                              height + 1, bn, first,
1876 +                                              bc, data);
1877 +                       if (error)
1878 +                               goto fail;
1879 +               }
1880 +
1881 +       brelse(bh);
1882 +
1883 +       return 0;
1884 +
1885 + fail:
1886 +       if (bh)
1887 +               brelse(bh);
1888 +
1889 +       return error;
1890 +}
1891 +
1892 +/**
1893 + * do_strip - Look for a layer a particular layer of the file and strip it off
1894 + * @ip: the inode
1895 + * @dibh: the dinode buffer
1896 + * @bh: A buffer of pointers
1897 + * @top: The first pointer in the buffer
1898 + * @bottom: One more than the last pointer
1899 + * @height: the height this buffer is at
1900 + * @data: a pointer to a struct strip_mine
1901 + *
1902 + * Returns: 0 on success, -EXXX on failure
1903 + */
1904 +
1905 +static int
1906 +do_strip(struct gfs_inode *ip, struct buffer_head *dibh,
1907 +        struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
1908 +        unsigned int height, void *data)
1909 +{
1910 +       struct strip_mine *sm = (struct strip_mine *)data;
1911 +       struct gfs_sbd *sdp = ip->i_sbd;
1912 +       struct gfs_holder ri_gh;
1913 +       struct gfs_rgrp_list rlist;
1914 +       uint64_t bn, bstart;
1915 +       uint32_t blen;
1916 +       uint64_t *p;
1917 +       unsigned int rg_blocks = 0;
1918 +       int metadata;
1919 +       int x;
1920 +       int error;
1921 +
1922 +       if (!*top)
1923 +               sm->sm_first = FALSE;
1924 +
1925 +       if (height != sm->sm_height)
1926 +               return 0;
1927 +
1928 +       if (sm->sm_first) {
1929 +               top++;
1930 +               sm->sm_first = FALSE;
1931 +       }
1932 +
1933 +       metadata = (height != ip->i_di.di_height - 1) || gfs_is_jdata(ip);
1934 +
1935 +       error = gfs_rindex_hold(sdp, &ri_gh);
1936 +       if (error)
1937 +               return error;
1938 +
1939 +       memset(&rlist, 0, sizeof(struct gfs_rgrp_list));
1940 +       bstart = 0;
1941 +       blen = 0;
1942 +
1943 +       for (p = top; p < bottom; p++) {
1944 +               if (!*p)
1945 +                       continue;
1946 +
1947 +               bn = gfs64_to_cpu(*p);
1948 +
1949 +               if (bstart + blen == bn)
1950 +                       blen++;
1951 +               else {
1952 +                       if (bstart)
1953 +                               gfs_rlist_add(sdp, &rlist, bstart);
1954 +
1955 +                       bstart = bn;
1956 +                       blen = 1;
1957 +               }
1958 +       }
1959 +
1960 +       if (bstart)
1961 +               gfs_rlist_add(sdp, &rlist, bstart);
1962 +       else
1963 +               goto out; /* Nothing to do */
1964 +
1965 +       gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1966 +
1967 +       error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1968 +       if (error)
1969 +               goto fail;
1970 +
1971 +       for (x = 0; x < rlist.rl_rgrps; x++) {
1972 +               struct gfs_rgrpd *rgd;
1973 +               rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
1974 +               rg_blocks += rgd->rd_ri.ri_length;
1975 +       }
1976 +
1977 +       /* Trans may require:
1978 +          All the bitmaps that were reserved.
1979 +          One block for the dinode.
1980 +          One block for the indirect block being cleared.
1981 +          One block for a quota change. */
1982 +
1983 +       error = gfs_trans_begin(sdp, rg_blocks + 2, 1);
1984 +       if (error)
1985 +               goto fail_rg_gunlock;
1986 +
1987 +       gfs_trans_add_bh(ip->i_gl, dibh);
1988 +       gfs_trans_add_bh(ip->i_gl, bh);
1989 +
1990 +       bstart = 0;
1991 +       blen = 0;
1992 +
1993 +       for (p = top; p < bottom; p++) {
1994 +               if (!*p)
1995 +                       continue;
1996 +
1997 +               bn = gfs64_to_cpu(*p);
1998 +
1999 +               if (bstart + blen == bn)
2000 +                       blen++;
2001 +               else {
2002 +                       if (bstart) {
2003 +                               if (metadata)
2004 +                                       gfs_metafree(ip, bstart, blen);
2005 +                               else
2006 +                                       gfs_blkfree(ip, bstart, blen);
2007 +                       }
2008 +
2009 +                       bstart = bn;
2010 +                       blen = 1;
2011 +               }
2012 +
2013 +               *p = 0;
2014 +               ip->i_di.di_blocks--;
2015 +       }
2016 +
2017 +       if (bstart) {
2018 +               if (metadata)
2019 +                       gfs_metafree(ip, bstart, blen);
2020 +               else
2021 +                       gfs_blkfree(ip, bstart, blen);
2022 +       }
2023 +
2024 +       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
2025 +
2026 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
2027 +
2028 +       gfs_trans_end(sdp);
2029 +
2030 +       gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2031 +       gfs_rlist_free(&rlist);
2032 +
2033 + out:
2034 +       gfs_glock_dq_uninit(&ri_gh);
2035 +
2036 +       return 0;
2037 +
2038 + fail_rg_gunlock:
2039 +       gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2040 +
2041 + fail:
2042 +       gfs_rlist_free(&rlist);
2043 +
2044 +       gfs_glock_dq_uninit(&ri_gh);
2045 +
2046 +       return error;
2047 +}
2048 +
2049 +/**
2050 + * gfs_truncator_default - truncate a partial data block
2051 + * @ip: the inode
2052 + * @size: the size the file should be
2053 + *
2054 + * Returns: 0 on success, -EXXX on failure
2055 + */
2056 +
2057 +int
2058 +gfs_truncator_default(struct gfs_inode *ip, uint64_t size)
2059 +{
2060 +       struct gfs_sbd *sdp = ip->i_sbd;
2061 +       struct buffer_head *bh;
2062 +       uint64_t bn;
2063 +       int not_new = 0;
2064 +       int error;
2065 +
2066 +       error = gfs_block_map(ip, size >> sdp->sd_sb.sb_bsize_shift, &not_new,
2067 +                             &bn, NULL);
2068 +       if (error)
2069 +               return error;
2070 +       if (!bn)
2071 +               return 0;
2072 +
2073 +       error = gfs_get_data_buffer(ip, bn, FALSE, &bh);
2074 +       if (error)
2075 +               return error;
2076 +
2077 +       gfs_buffer_clear_tail(bh, size & (sdp->sd_sb.sb_bsize - 1));
2078 +
2079 +       error = gfs_dwrite(sdp, bh, DIO_DIRTY);
2080 +
2081 +       brelse(bh);
2082 +
2083 +       return error;
2084 +}
2085 +
2086 +/**
2087 + * truncator_journaled - truncate a partial data block
2088 + * @ip: the inode
2089 + * @size: the size the file should be
2090 + *
2091 + * Returns: 0 on success, -EXXX on failure
2092 + */
2093 +
2094 +static int
2095 +truncator_journaled(struct gfs_inode *ip, uint64_t size)
2096 +{
2097 +       struct gfs_sbd *sdp = ip->i_sbd;
2098 +       struct buffer_head *bh;
2099 +       uint64_t lbn, dbn;
2100 +       uint32_t off;
2101 +       int not_new = 0;
2102 +       int error;
2103 +
2104 +       lbn = size;
2105 +       off = do_div(lbn, sdp->sd_jbsize);
2106 +
2107 +       error = gfs_block_map(ip, lbn, &not_new, &dbn, NULL);
2108 +       if (error)
2109 +               return error;
2110 +       if (!dbn)
2111 +               return 0;
2112 +
2113 +       error = gfs_trans_begin(sdp, 1, 0);
2114 +       if (error)
2115 +               return error;
2116 +
2117 +       error = gfs_get_data_buffer(ip, dbn, FALSE, &bh);
2118 +       if (!error) {
2119 +               gfs_trans_add_bh(ip->i_gl, bh);
2120 +               gfs_buffer_clear_tail(bh,
2121 +                                     sizeof(struct gfs_meta_header) +
2122 +                                     off);
2123 +               brelse(bh);
2124 +       }
2125 +
2126 +       gfs_trans_end(sdp);
2127 +
2128 +       return error;
2129 +}
2130 +
2131 +/**
2132 + * gfs_shrink - make a file smaller
2133 + * @ip: the inode
2134 + * @size: the size to make the file
2135 + * @truncator: function to truncate the last partial block
2136 + *
2137 + * Called with an exclusive lock on @ip.
2138 + *
2139 + * Returns: 0 on success, -EXXX on failure
2140 + */
2141 +
2142 +int
2143 +gfs_shrink(struct gfs_inode *ip, uint64_t size, gfs_truncator_t truncator)
2144 +{
2145 +       struct gfs_sbd *sdp = ip->i_sbd;
2146 +       struct gfs_holder ri_gh;
2147 +       struct gfs_rgrpd *rgd;
2148 +       struct buffer_head *dibh;
2149 +       uint64_t block;
2150 +       unsigned int height;
2151 +       int journaled = gfs_is_jdata(ip);
2152 +       int error;
2153 +
2154 +       if (!size)
2155 +               block = 0;
2156 +       else if (journaled) {
2157 +               block = size - 1;
2158 +               do_div(block, sdp->sd_jbsize);
2159 +       }
2160 +       else
2161 +               block = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
2162 +
2163 +       /*  Get rid of all the data/metadata blocks  */
2164 +
2165 +       height = ip->i_di.di_height;
2166 +       if (height) {
2167 +               struct metapath *mp = find_metapath(ip, block);
2168 +               gfs_alloc_get(ip);
2169 +
2170 +               error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
2171 +               if (error) {
2172 +                       gfs_alloc_put(ip);
2173 +                       kfree(mp);
2174 +                       return error;
2175 +               }
2176 +
2177 +               while (height--) {
2178 +                       struct strip_mine sm;
2179 +
2180 +                       sm.sm_first = (size) ? TRUE : FALSE;
2181 +                       sm.sm_height = height;
2182 +
2183 +                       error = recursive_scan(ip, NULL, mp, 0, 0, TRUE,
2184 +                                              do_strip, &sm);
2185 +                       if (error) {
2186 +                               gfs_quota_unhold_m(ip);
2187 +                               gfs_alloc_put(ip);
2188 +                               kfree(mp);
2189 +                               return error;
2190 +                       }
2191 +               }
2192 +
2193 +               gfs_quota_unhold_m(ip);
2194 +               gfs_alloc_put(ip);
2195 +               kfree(mp);
2196 +       }
2197 +
2198 +       /*  If we truncated in the middle of a block, zero out the leftovers.  */
2199 +
2200 +       if (gfs_is_stuffed(ip)) {
2201 +               /*  Do nothing  */
2202 +       } else if (journaled) {
2203 +               if (do_mod(size, sdp->sd_jbsize)) {
2204 +                       error = truncator_journaled(ip, size);
2205 +                       if (error)
2206 +                               return error;
2207 +               }
2208 +       } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) {
2209 +               error = truncator(ip, size);
2210 +               if (error)
2211 +                       return error;
2212 +       }
2213 +
2214 +       /*  Set the new size (and possibly the height)  */
2215 +
2216 +       if (!size) {
2217 +               error = gfs_rindex_hold(sdp, &ri_gh);
2218 +               if (error)
2219 +                       return error;
2220 +       }
2221 +
2222 +       error = gfs_trans_begin(sdp, 1, 0);
2223 +       if (error)
2224 +               goto out;
2225 +
2226 +       error = gfs_get_inode_buffer(ip, &dibh);
2227 +       if (error)
2228 +               goto out_end_trans;
2229 +
2230 +       if (!size) {
2231 +               ip->i_di.di_height = 0;
2232 +
2233 +               rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr);
2234 +               GFS_ASSERT_INODE(rgd, ip,);
2235 +
2236 +               ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr;
2237 +               ip->i_di.di_goal_dblk =
2238 +                       ip->i_di.di_goal_mblk =
2239 +                       ip->i_num.no_addr - rgd->rd_ri.ri_data1;
2240 +       }
2241 +
2242 +       ip->i_di.di_size = size;
2243 +       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
2244 +
2245 +       gfs_trans_add_bh(ip->i_gl, dibh);
2246 +
2247 +       if (!ip->i_di.di_height &&
2248 +           size < sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))
2249 +               gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode) + size);
2250 +
2251 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
2252 +       brelse(dibh);
2253 +
2254 + out_end_trans:
2255 +       gfs_trans_end(sdp);
2256 +
2257 + out:
2258 +       if (!size)
2259 +               gfs_glock_dq_uninit(&ri_gh);
2260 +
2261 +       return error;
2262 +}
2263 +
2264 +/**
2265 + * do_same - truncate to same size (update time stamps)
2266 + * @ip:
2267 + *
2268 + * Returns: errno
2269 + */
2270 +
2271 +static int
2272 +do_same(struct gfs_inode *ip)
2273 +{
2274 +       struct gfs_sbd *sdp = ip->i_sbd;
2275 +       struct buffer_head *dibh;
2276 +       int error;
2277 +
2278 +       error = gfs_trans_begin(sdp, 1, 0);
2279 +       if (error)
2280 +               return error;
2281 +
2282 +       error = gfs_get_inode_buffer(ip, &dibh);
2283 +       if (error)
2284 +               goto out;
2285 +
2286 +       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
2287 +
2288 +       gfs_trans_add_bh(ip->i_gl, dibh);
2289 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
2290 +
2291 +       brelse(dibh);
2292 +
2293 + out:
2294 +       gfs_trans_end(sdp);
2295 +
2296 +       return error;
2297 +}
2298 +
2299 +/**
2300 + * gfs_truncatei - make a file a give size
2301 + * @ip: the inode
2302 + * @size: the size to make the file
2303 + * @truncator: function to truncate the last partial block
2304 + *
2305 + * The file size can grow, shrink, or stay the same size.
2306 + *
2307 + * Returns: 0 on success, -EXXX on failure
2308 + */
2309 +
2310 +int
2311 +gfs_truncatei(struct gfs_inode *ip, uint64_t size,
2312 +             gfs_truncator_t truncator)
2313 +{
2314 +       GFS_ASSERT_INODE(ip->i_di.di_type == GFS_FILE_REG, ip,);
2315 +
2316 +       if (size == ip->i_di.di_size)
2317 +               return do_same(ip);
2318 +       else if (size > ip->i_di.di_size)
2319 +               return do_grow(ip, size);
2320 +       else
2321 +               return gfs_shrink(ip, size, truncator);
2322 +}
2323 +
2324 +/**
2325 + * gfs_write_calc_reserv - calculate the number of blocks needed to write to a file
2326 + * @ip: the file
2327 + * @len: the number of bytes to be written to the file
2328 + * @data_blocks: returns the number of data blocks required
2329 + * @ind_blocks: returns the number of indirect blocks required
2330 + *
2331 + */
2332 +
2333 +void
2334 +gfs_write_calc_reserv(struct gfs_inode *ip, unsigned int len,
2335 +                     unsigned int *data_blocks, unsigned int *ind_blocks)
2336 +{
2337 +       struct gfs_sbd *sdp = ip->i_sbd;
2338 +       unsigned int tmp;
2339 +
2340 +       if (gfs_is_jdata(ip)) {
2341 +               *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
2342 +               *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
2343 +       } else {
2344 +               *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
2345 +               *ind_blocks = 3 * (sdp->sd_max_height - 1);
2346 +       }
2347 +
2348 +       for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
2349 +               tmp = DIV_RU(tmp, sdp->sd_inptrs);
2350 +               *ind_blocks += tmp;
2351 +       }
2352 +}
2353 +
2354 +/**
2355 + * gfs_write_alloc_required - figure out if a write is going to require an allocation
2356 + * @ip: the file being written to
2357 + * @offset: the offset to write to
2358 + * @len: the number of bytes being written
2359 + * @alloc_required: the int is set to TRUE if an alloc is required, FALSE otherwise
2360 + *
2361 + * Returns: 0 on success, -EXXX on error
2362 + */
2363 +
2364 +int
2365 +gfs_write_alloc_required(struct gfs_inode *ip,
2366 +                        uint64_t offset, unsigned int len,
2367 +                        int *alloc_required)
2368 +{
2369 +       struct gfs_sbd *sdp = ip->i_sbd;
2370 +       uint64_t lblock, lblock_stop, dblock;
2371 +       uint32_t extlen;
2372 +       int not_new = FALSE;
2373 +       int error = 0;
2374 +
2375 +       *alloc_required = FALSE;
2376 +
2377 +       if (!len)
2378 +               return 0;
2379 +
2380 +       if (gfs_is_stuffed(ip)) {
2381 +               if (offset + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))
2382 +                       *alloc_required = TRUE;
2383 +               return 0;
2384 +       }
2385 +
2386 +       if (gfs_is_jdata(ip)) {
2387 +               unsigned int bsize = sdp->sd_jbsize;
2388 +               lblock = offset;
2389 +               do_div(lblock, bsize);
2390 +               lblock_stop = offset + len + bsize - 1;
2391 +               do_div(lblock_stop, bsize);
2392 +       } else {
2393 +               unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2394 +               lblock = offset >> shift;
2395 +               lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2396 +       }
2397 +
2398 +       for (; lblock < lblock_stop; lblock += extlen) {
2399 +               error = gfs_block_map(ip, lblock, &not_new, &dblock, &extlen);
2400 +               if (error)
2401 +                       return error;
2402 +
2403 +               if (!dblock) {
2404 +                       *alloc_required = TRUE;
2405 +                       return 0;
2406 +               }
2407 +       }
2408 +
2409 +       return 0;
2410 +}
2411 +
2412 +/**
2413 + * do_gfm - Copy out the dinode/indirect blocks of a file
2414 + * @ip: the file
2415 + * @dibh: the dinode buffer
2416 + * @bh: the indirect buffer we're looking at
2417 + * @top: the first pointer in the block
2418 + * @bottom: one more than the last pointer in the block
2419 + * @height: the height the block is at
2420 + * @data: a pointer to a struct gfs_user_buffer structure
2421 + *
2422 + * If this is a journaled file, copy out the data too.
2423 + *
2424 + * Returns: 0 on success, -EXXX on failure
2425 + */
2426 +
2427 +static int
2428 +do_gfm(struct gfs_inode *ip, struct buffer_head *dibh,
2429 +       struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
2430 +       unsigned int height, void *data)
2431 +{
2432 +       struct gfs_sbd *sdp = ip->i_sbd;
2433 +       struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data;
2434 +       struct buffer_head *data_bh;
2435 +       uint64_t *bp, bn;
2436 +       int error;
2437 +
2438 +       error = gfs_add_bh_to_ub(ub, bh);
2439 +       if (error)
2440 +               return error;
2441 +
2442 +       if (ip->i_di.di_type != GFS_FILE_DIR ||
2443 +           height + 1 != ip->i_di.di_height)
2444 +               return 0;
2445 +
2446 +       for (bp = top; bp < bottom; bp++)
2447 +               if (*bp) {
2448 +                       bn = gfs64_to_cpu(*bp);
2449 +
2450 +                       error = gfs_dread(sdp, bn, ip->i_gl,
2451 +                                         DIO_START | DIO_WAIT, &data_bh);
2452 +                       if (error)
2453 +                               return error;
2454 +
2455 +                       error = gfs_add_bh_to_ub(ub, data_bh);
2456 +
2457 +                       brelse(data_bh);
2458 +
2459 +                       if (error)
2460 +                               return error;
2461 +               }
2462 +
2463 +       return 0;
2464 +}
2465 +
2466 +/**
2467 + * gfs_get_file_meta - return all the metadata for a file
2468 + * @ip: the file
2469 + * @ub: the structure representing the meta
2470 + *
2471 + * Returns: 0 on success, -EXXX on failure
2472 + */
2473 +
2474 +int
2475 +gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub)
2476 +{
2477 +       struct buffer_head *dibh;
2478 +       struct metapath *mp;
2479 +       int error;
2480 +
2481 +       if (gfs_is_stuffed(ip)) {
2482 +               error = gfs_get_inode_buffer(ip, &dibh);
2483 +               if (!error) {
2484 +                       error = gfs_add_bh_to_ub(ub, dibh);
2485 +                       brelse(dibh);
2486 +               }
2487 +       } else {
2488 +               mp = find_metapath(ip, 0);
2489 +               error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_gfm, ub);
2490 +               kfree(mp);
2491 +       }
2492 +
2493 +       return error;
2494 +}
2495 diff -urN linux-orig/fs/gfs/bmap.h linux-patched/fs/gfs/bmap.h
2496 --- linux-orig/fs/gfs/bmap.h    1969-12-31 18:00:00.000000000 -0600
2497 +++ linux-patched/fs/gfs/bmap.h 2004-06-30 13:27:49.333713450 -0500
2498 @@ -0,0 +1,48 @@
2499 +/******************************************************************************
2500 +*******************************************************************************
2501 +**
2502 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
2503 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
2504 +**
2505 +**  This copyrighted material is made available to anyone wishing to use,
2506 +**  modify, copy, or redistribute it subject to the terms and conditions
2507 +**  of the GNU General Public License v.2.
2508 +**
2509 +*******************************************************************************
2510 +******************************************************************************/
2511 +
2512 +#ifndef __BMAP_DOT_H__
2513 +#define __BMAP_DOT_H__
2514 +
2515 +typedef int (*gfs_unstuffer_t) (struct gfs_inode * ip,
2516 +                               struct buffer_head * dibh, uint64_t block,
2517 +                               void *private);
2518 +
2519 +int gfs_unstuffer_sync(struct gfs_inode *ip, struct buffer_head *dibh,
2520 +                      uint64_t block, void *private);
2521 +int gfs_unstuffer_async(struct gfs_inode *ip, struct buffer_head *dibh,
2522 +                       uint64_t block, void *private);
2523 +
2524 +int gfs_unstuff_dinode(struct gfs_inode *ip, gfs_unstuffer_t unstuffer,
2525 +                      void *private);
2526 +
2527 +int gfs_block_map(struct gfs_inode *ip,
2528 +                 uint64_t lblock, int *new,
2529 +                 uint64_t *dblock, uint32_t *extlen);
2530 +
2531 +typedef int (*gfs_truncator_t) (struct gfs_inode * ip, uint64_t size);
2532 +
2533 +int gfs_truncator_default(struct gfs_inode *ip, uint64_t size);
2534 +
2535 +int gfs_shrink(struct gfs_inode *ip, uint64_t size, gfs_truncator_t truncator);
2536 +int gfs_truncatei(struct gfs_inode *ip, uint64_t size,
2537 +                 gfs_truncator_t truncator);
2538 +
2539 +void gfs_write_calc_reserv(struct gfs_inode *ip, unsigned int len,
2540 +                          unsigned int *data_blocks, unsigned int *ind_blocks);
2541 +int gfs_write_alloc_required(struct gfs_inode *ip, uint64_t offset,
2542 +                            unsigned int len, int *alloc_required);
2543 +
2544 +int gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
2545 +
2546 +#endif /* __BMAP_DOT_H__ */
2547 diff -urN linux-orig/fs/gfs/daemon.c linux-patched/fs/gfs/daemon.c
2548 --- linux-orig/fs/gfs/daemon.c  1969-12-31 18:00:00.000000000 -0600
2549 +++ linux-patched/fs/gfs/daemon.c       2004-06-30 13:27:49.333713450 -0500
2550 @@ -0,0 +1,259 @@
2551 +/******************************************************************************
2552 +*******************************************************************************
2553 +**
2554 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
2555 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
2556 +**
2557 +**  This copyrighted material is made available to anyone wishing to use,
2558 +**  modify, copy, or redistribute it subject to the terms and conditions
2559 +**  of the GNU General Public License v.2.
2560 +**
2561 +*******************************************************************************
2562 +******************************************************************************/
2563 +
2564 +#include <linux/sched.h>
2565 +#include <linux/slab.h>
2566 +#include <linux/smp_lock.h>
2567 +#include <linux/spinlock.h>
2568 +#include <asm/semaphore.h>
2569 +#include <linux/completion.h>
2570 +#include <linux/buffer_head.h>
2571 +
2572 +#include "gfs.h"
2573 +#include "daemon.h"
2574 +#include "glock.h"
2575 +#include "log.h"
2576 +#include "quota.h"
2577 +#include "recovery.h"
2578 +#include "super.h"
2579 +#include "unlinked.h"
2580 +
2581 +/**
2582 + * gfs_scand - Writing of cached scan chanes into the scan file
2583 + * @sdp: Pointer to GFS superblock
2584 + *
2585 + */
2586 +
2587 +int
2588 +gfs_scand(void *data)
2589 +{
2590 +       struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2591 +
2592 +       daemonize("gfs_scand");
2593 +       sdp->sd_scand_process = current;
2594 +       set_bit(SDF_SCAND_RUN, &sdp->sd_flags);
2595 +       complete(&sdp->sd_thread_completion);
2596 +
2597 +       for (;;) {
2598 +               gfs_scand_internal(sdp);
2599 +
2600 +               if (!test_bit(SDF_SCAND_RUN, &sdp->sd_flags))
2601 +                       break;
2602 +
2603 +               current->state = TASK_INTERRUPTIBLE;
2604 +               schedule_timeout(sdp->sd_tune.gt_scand_secs * HZ);
2605 +       }
2606 +
2607 +       down(&sdp->sd_thread_lock);
2608 +       up(&sdp->sd_thread_lock);
2609 +
2610 +       complete(&sdp->sd_thread_completion);
2611 +
2612 +       return 0;
2613 +}
2614 +
2615 +/**
2616 + * gfs_glockd - Writing of cached scan chanes into the scan file
2617 + * @sdp: Pointer to GFS superblock
2618 + *
2619 + */
2620 +
2621 +int
2622 +gfs_glockd(void *data)
2623 +{
2624 +       struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2625 +
2626 +       daemonize("gfs_glockd");
2627 +       set_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
2628 +       complete(&sdp->sd_thread_completion);
2629 +
2630 +       for (;;) {
2631 +               while (atomic_read(&sdp->sd_reclaim_count))
2632 +                       gfs_reclaim_glock(sdp);
2633 +
2634 +               if (!test_bit(SDF_GLOCKD_RUN, &sdp->sd_flags))
2635 +                       break;
2636 +
2637 +               {
2638 +                       DECLARE_WAITQUEUE(__wait_chan, current);
2639 +                       current->state = TASK_INTERRUPTIBLE;
2640 +                       add_wait_queue(&sdp->sd_reclaim_wchan, &__wait_chan);
2641 +                       if (!atomic_read(&sdp->sd_reclaim_count)
2642 +                           && test_bit(SDF_GLOCKD_RUN, &sdp->sd_flags))
2643 +                               schedule();
2644 +                       remove_wait_queue(&sdp->sd_reclaim_wchan, &__wait_chan);
2645 +                       current->state = TASK_RUNNING;
2646 +               }
2647 +       }
2648 +
2649 +       complete(&sdp->sd_thread_completion);
2650 +
2651 +       return 0;
2652 +}
2653 +
2654 +/**
2655 + * gfs_recoverd - Recovery of dead machine's journals
2656 + * @sdp: Pointer to GFS superblock
2657 + *
2658 + */
2659 +
2660 +int
2661 +gfs_recoverd(void *data)
2662 +{
2663 +       struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2664 +
2665 +       daemonize("gfs_recoverd");
2666 +       sdp->sd_recoverd_process = current;
2667 +       set_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
2668 +       complete(&sdp->sd_thread_completion);
2669 +
2670 +       for (;;) {
2671 +               gfs_check_journals(sdp);
2672 +
2673 +               if (!test_bit(SDF_RECOVERD_RUN, &sdp->sd_flags))
2674 +                       break;
2675 +
2676 +               current->state = TASK_INTERRUPTIBLE;
2677 +               schedule_timeout(sdp->sd_tune.gt_recoverd_secs * HZ);
2678 +       }
2679 +
2680 +       down(&sdp->sd_thread_lock);
2681 +       up(&sdp->sd_thread_lock);
2682 +
2683 +       complete(&sdp->sd_thread_completion);
2684 +
2685 +       return 0;
2686 +}
2687 +
2688 +/**
2689 + * gfs_logd - Writing of cached log chanes into the log file
2690 + * @sdp: Pointer to GFS superblock
2691 + *
2692 + */
2693 +
2694 +int
2695 +gfs_logd(void *data)
2696 +{
2697 +       struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2698 +       struct gfs_holder ji_gh;
2699 +
2700 +       daemonize("gfs_logd");
2701 +       sdp->sd_logd_process = current;
2702 +       set_bit(SDF_LOGD_RUN, &sdp->sd_flags);
2703 +       complete(&sdp->sd_thread_completion);
2704 +
2705 +       for (;;) {
2706 +               gfs_ail_empty(sdp);
2707 +
2708 +               if (time_after_eq(jiffies,
2709 +                                 sdp->sd_jindex_refresh_time +
2710 +                                 sdp->sd_tune.gt_jindex_refresh_secs * HZ)) {
2711 +                       if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags) &&
2712 +                           !gfs_jindex_hold(sdp, &ji_gh))
2713 +                               gfs_glock_dq_uninit(&ji_gh);
2714 +                       sdp->sd_jindex_refresh_time = jiffies;
2715 +               }
2716 +
2717 +               if (!test_bit(SDF_LOGD_RUN, &sdp->sd_flags))
2718 +                       break;
2719 +
2720 +               current->state = TASK_INTERRUPTIBLE;
2721 +               schedule_timeout(sdp->sd_tune.gt_logd_secs * HZ);
2722 +       }
2723 +
2724 +       down(&sdp->sd_thread_lock);
2725 +       up(&sdp->sd_thread_lock);
2726 +
2727 +       complete(&sdp->sd_thread_completion);
2728 +
2729 +       return 0;
2730 +}
2731 +
2732 +/**
2733 + * gfs_quotad - Writing of cached quota chanes into the quota file
2734 + * @sdp: Pointer to GFS superblock
2735 + *
2736 + */
2737 +
2738 +int
2739 +gfs_quotad(void *data)
2740 +{
2741 +       struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2742 +       int error;
2743 +
2744 +       daemonize("gfs_quotad");
2745 +       sdp->sd_quotad_process = current;
2746 +       set_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
2747 +       complete(&sdp->sd_thread_completion);
2748 +
2749 +       for (;;) {
2750 +               if (time_after_eq(jiffies,
2751 +                                 sdp->sd_quota_sync_time +
2752 +                                 sdp->sd_tune.gt_quota_quantum * HZ)) {
2753 +                       error = gfs_quota_sync(sdp);
2754 +                       if (error && error != -EROFS)
2755 +                               printk("GFS: fsid=%s: quotad: error = %d\n",
2756 +                                      sdp->sd_fsname, error);
2757 +                       sdp->sd_quota_sync_time = jiffies;
2758 +               }
2759 +
2760 +               gfs_quota_scan(sdp);
2761 +
2762 +               if (!test_bit(SDF_QUOTAD_RUN, &sdp->sd_flags))
2763 +                       break;
2764 +
2765 +               current->state = TASK_INTERRUPTIBLE;
2766 +               schedule_timeout(sdp->sd_tune.gt_quotad_secs * HZ);
2767 +       }
2768 +
2769 +       down(&sdp->sd_thread_lock);
2770 +       up(&sdp->sd_thread_lock);
2771 +
2772 +       complete(&sdp->sd_thread_completion);
2773 +
2774 +       return 0;
2775 +}
2776 +
2777 +/**
2778 + * gfs_inoded - Deallocation of unlinked inodes
2779 + * @sdp: Pointer to GFS superblock
2780 + *
2781 + */
2782 +
2783 +int
2784 +gfs_inoded(void *data)
2785 +{
2786 +       struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2787 +
2788 +       daemonize("gfs_inoded");
2789 +       sdp->sd_inoded_process = current;
2790 +       set_bit(SDF_INODED_RUN, &sdp->sd_flags);
2791 +       complete(&sdp->sd_thread_completion);
2792 +
2793 +       for (;;) {
2794 +               gfs_unlinked_dealloc(sdp);
2795 +
2796 +               if (!test_bit(SDF_INODED_RUN, &sdp->sd_flags))
2797 +                       break;
2798 +
2799 +               current->state = TASK_INTERRUPTIBLE;
2800 +               schedule_timeout(sdp->sd_tune.gt_inoded_secs * HZ);
2801 +       }
2802 +
2803 +       down(&sdp->sd_thread_lock);
2804 +       up(&sdp->sd_thread_lock);
2805 +
2806 +       complete(&sdp->sd_thread_completion);
2807 +
2808 +       return 0;
2809 +}
2810 diff -urN linux-orig/fs/gfs/daemon.h linux-patched/fs/gfs/daemon.h
2811 --- linux-orig/fs/gfs/daemon.h  1969-12-31 18:00:00.000000000 -0600
2812 +++ linux-patched/fs/gfs/daemon.h       2004-06-30 13:27:49.334713218 -0500
2813 @@ -0,0 +1,24 @@
2814 +/******************************************************************************
2815 +*******************************************************************************
2816 +**
2817 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
2818 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
2819 +**
2820 +**  This copyrighted material is made available to anyone wishing to use,
2821 +**  modify, copy, or redistribute it subject to the terms and conditions
2822 +**  of the GNU General Public License v.2.
2823 +**
2824 +*******************************************************************************
2825 +******************************************************************************/
2826 +
2827 +#ifndef __DAEMON_DOT_H__
2828 +#define __DAEMON_DOT_H__
2829 +
2830 +int gfs_scand(void *data);
2831 +int gfs_glockd(void *data);
2832 +int gfs_recoverd(void *data);
2833 +int gfs_logd(void *data);
2834 +int gfs_quotad(void *data);
2835 +int gfs_inoded(void *data);
2836 +
2837 +#endif /* __DAEMON_DOT_H__ */
2838 diff -urN linux-orig/fs/gfs/dio.c linux-patched/fs/gfs/dio.c
2839 --- linux-orig/fs/gfs/dio.c     1969-12-31 18:00:00.000000000 -0600
2840 +++ linux-patched/fs/gfs/dio.c  2004-06-30 13:27:49.334713218 -0500
2841 @@ -0,0 +1,1302 @@
2842 +/******************************************************************************
2843 +*******************************************************************************
2844 +**
2845 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
2846 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
2847 +**
2848 +**  This copyrighted material is made available to anyone wishing to use,
2849 +**  modify, copy, or redistribute it subject to the terms and conditions
2850 +**  of the GNU General Public License v.2.
2851 +**
2852 +*******************************************************************************
2853 +******************************************************************************/
2854 +
2855 +#include <linux/sched.h>
2856 +#include <linux/slab.h>
2857 +#include <linux/smp_lock.h>
2858 +#include <linux/spinlock.h>
2859 +#include <asm/semaphore.h>
2860 +#include <linux/completion.h>
2861 +#include <linux/buffer_head.h>
2862 +#include <linux/mm.h>
2863 +#include <linux/pagemap.h>
2864 +#include <linux/writeback.h>
2865 +
2866 +#include "gfs.h"
2867 +#include "dio.h"
2868 +#include "glock.h"
2869 +#include "glops.h"
2870 +#include "inode.h"
2871 +#include "log.h"
2872 +#include "lops.h"
2873 +#include "rgrp.h"
2874 +#include "trans.h"
2875 +
2876 +#define buffer_busy(bh) ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
2877 +
2878 +/**
2879 + * aspace_get_block -
2880 + * @inode:
2881 + * @lblock:
2882 + * @bh_result:
2883 + * @create:
2884 + *
2885 + * Returns: 0 on success, -EXXX on failure
2886 + */
2887 +
2888 +static int
2889 +aspace_get_block(struct inode *inode, sector_t lblock,
2890 +                struct buffer_head *bh_result, int create)
2891 +{
2892 +       struct gfs_sbd *sdp = vfs2sdp(inode->i_sb);
2893 +       GFS_ASSERT_SBD(FALSE, sdp,);
2894 +}
2895 +
2896 +/**
2897 + * gfs_aspace_writepage - write an aspace page
2898 + * @page: the page
2899 + * @wbc:
2900 + *
2901 + * Returns: 0 on success, -EXXX on failure
2902 + */
2903 +
2904 +static int
2905 +gfs_aspace_writepage(struct page *page, struct writeback_control *wbc)
2906 +{
2907 +       return block_write_full_page(page, aspace_get_block, wbc);
2908 +}
2909 +
2910 +/**
2911 + * stuck_releasepage - We're stuck in gfs_releasepage().  Print stuff out.
2912 + * @bh: the buffer we're stuck on
2913 + *
2914 + */
2915 +
2916 +static void
2917 +stuck_releasepage(struct buffer_head *bh)
2918 +{
2919 +       struct gfs_sbd *sdp = vfs2sdp(bh->b_page->mapping->host->i_sb);
2920 +       struct gfs_bufdata *bd = bh2bd(bh);
2921 +
2922 +       printk("GFS: fsid=%s: stuck in gfs_releasepage()...\n", sdp->sd_fsname);
2923 +       printk("GFS: fsid=%s: blkno = %"PRIu64", bh->b_count = %d\n",
2924 +              sdp->sd_fsname,
2925 +              (uint64_t)bh->b_blocknr,
2926 +              atomic_read(&bh->b_count));
2927 +       printk("GFS: fsid=%s: bh2bd(bh) = %s\n",
2928 +              sdp->sd_fsname,
2929 +              (bd) ? "!NULL" : "NULL");
2930 +
2931 +       if (bd) {
2932 +               struct gfs_glock *gl = bd->bd_gl;
2933 +
2934 +               printk("GFS: fsid=%s: gl = (%u, %"PRIu64")\n",
2935 +                      sdp->sd_fsname,
2936 +                      gl->gl_name.ln_type,
2937 +                      gl->gl_name.ln_number);
2938 +
2939 +               printk("GFS: fsid=%s: bd_new_le.le_trans = %s\n",
2940 +                      sdp->sd_fsname,
2941 +                      (bd->bd_new_le.le_trans) ? "!NULL" : "NULL");
2942 +               printk("GFS: fsid=%s: bd_incore_le.le_trans = %s\n",
2943 +                      sdp->sd_fsname,
2944 +                      (bd->bd_incore_le.le_trans) ? "!NULL" : "NULL");
2945 +               printk("GFS: fsid=%s: bd_frozen = %s\n",
2946 +                      sdp->sd_fsname,
2947 +                      (bd->bd_frozen) ? "!NULL" : "NULL");
2948 +               printk("GFS: fsid=%s: bd_pinned = %u\n",
2949 +                      sdp->sd_fsname, bd->bd_pinned);
2950 +               printk("GFS: fsid=%s: bd_ail_tr_list = %s\n",
2951 +                      sdp->sd_fsname,
2952 +                      (list_empty(&bd->bd_ail_tr_list)) ? "Empty" : "!Empty");
2953 +
2954 +               if (gl->gl_ops == &gfs_inode_glops) {
2955 +                       struct gfs_inode *ip = gl2ip(gl);
2956 +
2957 +                       if (ip) {
2958 +                               unsigned int x;
2959 +
2960 +                               printk("GFS: fsid=%s: ip = %"PRIu64"/%"PRIu64"\n",
2961 +                                      sdp->sd_fsname,
2962 +                                      ip->i_num.no_formal_ino,
2963 +                                      ip->i_num.no_addr);
2964 +                               printk("GFS: fsid=%s: ip->i_count = %d, ip->i_vnode = %s\n",
2965 +                                    sdp->sd_fsname,
2966 +                                    atomic_read(&ip->i_count),
2967 +                                    (ip->i_vnode) ? "!NULL" : "NULL");
2968 +                               for (x = 0; x < GFS_MAX_META_HEIGHT; x++)
2969 +                                       printk("GFS: fsid=%s: ip->i_cache[%u] = %s\n",
2970 +                                              sdp->sd_fsname, x,
2971 +                                              (ip->i_cache[x]) ? "!NULL" : "NULL");
2972 +                       }
2973 +               }
2974 +       }
2975 +}
2976 +
2977 +/**
2978 + * gfs_aspace_releasepage - free the metadata associated with a page
2979 + * @page: the page that's being released
2980 + * @gfp_mask: huh??
2981 + *
2982 + * Call try_to_free_buffers() if the buffers in this page can be
2983 + * released.
2984 + *
2985 + * Returns: 0
2986 + */
2987 +
2988 +static int
2989 +gfs_aspace_releasepage(struct page *page, int gfp_mask)
2990 +{
2991 +       struct inode *aspace = page->mapping->host;
2992 +       struct gfs_sbd *sdp = vfs2sdp(aspace->i_sb);
2993 +       struct buffer_head *bh, *head;
2994 +       struct gfs_bufdata *bd;
2995 +       unsigned long t;
2996 +
2997 +       if (!page_has_buffers(page))
2998 +               goto out;
2999 +
3000 +       head = bh = page_buffers(page);
3001 +       do {
3002 +               t = jiffies;
3003 +
3004 +               while (atomic_read(&bh->b_count)) {
3005 +                       if (atomic_read(&aspace->i_writecount)) {
3006 +                               if (time_after_eq(jiffies,
3007 +                                                 t +
3008 +                                                 sdp->sd_tune.gt_stall_secs * HZ)) {
3009 +                                       stuck_releasepage(bh);
3010 +                                       t = jiffies;
3011 +                               }
3012 +
3013 +                               yield();
3014 +                               continue;
3015 +                       }
3016 +
3017 +                       return 0;
3018 +               }
3019 +
3020 +               bd = bh2bd(bh);
3021 +               if (bd) {
3022 +                       GFS_ASSERT_SBD(bd->bd_bh == bh, sdp,);
3023 +                       GFS_ASSERT_SBD(!bd->bd_new_le.le_trans, sdp,);
3024 +                       GFS_ASSERT_SBD(!bd->bd_incore_le.le_trans, sdp,);
3025 +                       GFS_ASSERT_SBD(!bd->bd_frozen, sdp,);
3026 +                       GFS_ASSERT_SBD(!bd->bd_pinned, sdp,);
3027 +                       GFS_ASSERT_SBD(list_empty(&bd->bd_ail_tr_list), sdp,);
3028 +                       kmem_cache_free(gfs_bufdata_cachep, bd);
3029 +                       atomic_dec(&sdp->sd_bufdata_count);
3030 +                       bh2bd(bh) = NULL;
3031 +               }
3032 +
3033 +               bh = bh->b_this_page;
3034 +       }
3035 +       while (bh != head);
3036 +
3037 + out:
3038 +       return try_to_free_buffers(page);
3039 +}
3040 +
3041 +static struct address_space_operations aspace_aops = {
3042 +       .writepage = gfs_aspace_writepage,
3043 +       .releasepage = gfs_aspace_releasepage,
3044 +};
3045 +
3046 +/**
3047 + * gfs_aspace_get - Get and initialize a struct inode structure
3048 + * @sdp: the filesystem the aspace is in
3049 + *
3050 + * Right now a struct inode is just a struct inode.  Maybe Linux
3051 + * will supply a more lightweight address space construct (that works)
3052 + * in the future.
3053 + *
3054 + * Make sure pages/buffers in this aspace aren't in high memory.
3055 + *
3056 + * Returns: the aspace
3057 + */
3058 +
3059 +struct inode *
3060 +gfs_aspace_get(struct gfs_sbd *sdp)
3061 +{
3062 +       struct inode *aspace;
3063 +
3064 +       aspace = new_inode(sdp->sd_vfs);
3065 +       if (aspace) {
3066 +               mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
3067 +               aspace->i_mapping->a_ops = &aspace_aops;
3068 +               aspace->i_size = ~0ULL;
3069 +               vn2ip(aspace) = NULL;
3070 +               insert_inode_hash(aspace);
3071 +       }
3072 +
3073 +       return aspace;
3074 +}
3075 +
3076 +/**
3077 + * gfs_aspace_put - get rid of an aspace
3078 + * @aspace:
3079 + *
3080 + */
3081 +
3082 +void
3083 +gfs_aspace_put(struct inode *aspace)
3084 +{
3085 +       remove_inode_hash(aspace);
3086 +       iput(aspace);
3087 +}
3088 +
3089 +/**
3090 + * gfs_ail_start_trans - Start I/O on a part of the AIL
3091 + * @sdp: the filesystem
3092 + * @tr: the part of the AIL
3093 + *
3094 + */
3095 +
3096 +void
3097 +gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
3098 +{
3099 +       struct list_head *head, *tmp, *prev;
3100 +       struct gfs_bufdata *bd;
3101 +       struct buffer_head *bh;
3102 +       int retry;
3103 +
3104 +       do {
3105 +               retry = FALSE;
3106 +
3107 +               spin_lock(&sdp->sd_ail_lock);
3108 +
3109 +               for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev;
3110 +                    tmp != head;
3111 +                    tmp = prev, prev = tmp->prev) {
3112 +                       bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
3113 +                       bh = bd->bd_bh;
3114 +
3115 +                       if (gfs_trylock_buffer(bh))
3116 +                               continue;
3117 +
3118 +                       if (bd->bd_pinned) {
3119 +                               gfs_unlock_buffer(bh);
3120 +                               continue;
3121 +                       }
3122 +
3123 +                       if (!buffer_busy(bh)) {
3124 +                               if (!buffer_uptodate(bh))
3125 +                                       gfs_io_error_bh(sdp, bh);
3126 +
3127 +                               list_del_init(&bd->bd_ail_tr_list);
3128 +                               list_del(&bd->bd_ail_gl_list);
3129 +
3130 +                               gfs_unlock_buffer(bh);
3131 +                               brelse(bh);
3132 +                               continue;
3133 +                       }
3134 +
3135 +                       if (buffer_dirty(bh)) {
3136 +                               list_move(&bd->bd_ail_tr_list, head);
3137 +
3138 +                               spin_unlock(&sdp->sd_ail_lock);
3139 +                               wait_on_buffer(bh);
3140 +                               ll_rw_block(WRITE, 1, &bh);
3141 +                               spin_lock(&sdp->sd_ail_lock);
3142 +
3143 +                               gfs_unlock_buffer(bh);
3144 +                               retry = TRUE;
3145 +                               break;
3146 +                       }
3147 +
3148 +                       gfs_unlock_buffer(bh);
3149 +               }
3150 +
3151 +               spin_unlock(&sdp->sd_ail_lock);
3152 +       } while (retry);
3153 +}
3154 +
3155 +/**
3156 + * gfs_ail_empty_trans - Check whether or not a trans in the AIL has been synced
3157 + * @sdp: the filesystem
3158 + * @tr: the transaction
3159 + *
3160 + */
3161 +
3162 +int
3163 +gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
3164 +{
3165 +       struct list_head *head, *tmp, *prev;
3166 +       struct gfs_bufdata *bd;
3167 +       struct buffer_head *bh;
3168 +       int ret;
3169 +
3170 +       spin_lock(&sdp->sd_ail_lock);
3171 +
3172 +       for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev;
3173 +            tmp != head;
3174 +            tmp = prev, prev = tmp->prev) {
3175 +               bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
3176 +               bh = bd->bd_bh;
3177 +
3178 +               if (gfs_trylock_buffer(bh))
3179 +                       continue;
3180 +
3181 +               if (bd->bd_pinned || buffer_busy(bh)) {
3182 +                       gfs_unlock_buffer(bh);
3183 +                       continue;
3184 +               }
3185 +
3186 +               if (!buffer_uptodate(bh))
3187 +                       gfs_io_error_bh(sdp, bh);
3188 +
3189 +               list_del_init(&bd->bd_ail_tr_list);
3190 +               list_del(&bd->bd_ail_gl_list);
3191 +
3192 +               gfs_unlock_buffer(bh);
3193 +               brelse(bh);
3194 +       }
3195 +
3196 +       ret = list_empty(head);
3197 +
3198 +       spin_unlock(&sdp->sd_ail_lock);
3199 +
3200 +       return ret;
3201 +}
3202 +
3203 +/**
3204 + * ail_empty_gl - remove all buffers for a given lock from the AIL
3205 + * @gl: the glock
3206 + *
3207 + * None of the buffers should be dirty, locked, or pinned.
3208 + */
3209 +
3210 +static void
3211 +ail_empty_gl(struct gfs_glock *gl)
3212 +{
3213 +       struct gfs_sbd *sdp = gl->gl_sbd;
3214 +       struct gfs_bufdata *bd;
3215 +       struct buffer_head *bh;
3216 +
3217 +       spin_lock(&sdp->sd_ail_lock);
3218 +
3219 +       while (!list_empty(&gl->gl_ail_bufs)) {
3220 +               bd = list_entry(gl->gl_ail_bufs.next,
3221 +                               struct gfs_bufdata, bd_ail_gl_list);
3222 +               bh = bd->bd_bh;
3223 +
3224 +               GFS_ASSERT_GLOCK(!bd->bd_pinned && !buffer_busy(bh), gl,
3225 +                                printk("%u %.8lX\n", bd->bd_pinned, bh->b_state););
3226 +               if (!buffer_uptodate(bh))
3227 +                       gfs_io_error_bh(sdp, bh);
3228 +
3229 +               list_del_init(&bd->bd_ail_tr_list);
3230 +               list_del(&bd->bd_ail_gl_list);
3231 +
3232 +               brelse(bh);
3233 +       }
3234 +
3235 +       spin_unlock(&sdp->sd_ail_lock);
3236 +}
3237 +
3238 +/**
3239 + * gfs_inval_buf - Invalidate all buffers associated with a glock
3240 + * @gl: the glock
3241 + *
3242 + */
3243 +
3244 +void
3245 +gfs_inval_buf(struct gfs_glock *gl)
3246 +{
3247 +       struct inode *aspace = gl->gl_aspace;
3248 +       struct address_space *mapping = gl->gl_aspace->i_mapping;
3249 +
3250 +       ail_empty_gl(gl);
3251 +
3252 +       atomic_inc(&aspace->i_writecount);
3253 +       truncate_inode_pages(mapping, 0);
3254 +       atomic_dec(&aspace->i_writecount);
3255 +
3256 +       GFS_ASSERT_GLOCK(!mapping->nrpages, gl,);
3257 +}
3258 +
3259 +/**
3260 + * gfs_sync_buf - Sync all buffers associated with a glock
3261 + * @gl: The glock
3262 + * @flags: DIO_START | DIO_WAIT
3263 + *
3264 + */
3265 +
3266 +void
3267 +gfs_sync_buf(struct gfs_glock *gl, int flags)
3268 +{
3269 +       struct address_space *mapping = gl->gl_aspace->i_mapping;
3270 +       int error = 0;
3271 +
3272 +       if (flags & DIO_START)
3273 +               error = filemap_fdatawrite(mapping);
3274 +       if (!error && (flags & DIO_WAIT))
3275 +               error = filemap_fdatawait(mapping);
3276 +       if (!error && (flags & (DIO_INVISIBLE | DIO_CHECK)) == DIO_CHECK)
3277 +               ail_empty_gl(gl);
3278 +
3279 +       if (error)
3280 +               gfs_io_error(gl->gl_sbd);
3281 +}
3282 +
3283 +/**
3284 + * getbuf - Get a buffer with a given address space
3285 + * @sdp: the filesystem
3286 + * @aspace: the address space
3287 + * @blkno: the block number
3288 + * @create: TRUE if the buffer should be created
3289 + *
3290 + * Returns: the buffer
3291 + */
3292 +
3293 +static struct buffer_head *
3294 +getbuf(struct gfs_sbd *sdp, struct inode *aspace, uint64_t blkno, int create)
3295 +{
3296 +       struct page *page;
3297 +       struct buffer_head *bh;
3298 +       unsigned int shift;
3299 +       unsigned long index;
3300 +       unsigned int bufnum;
3301 +
3302 +       shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
3303 +       index = blkno >> shift;
3304 +       bufnum = blkno - (index << shift);
3305 +
3306 +       if (create) {
3307 +               RETRY_MALLOC(page = grab_cache_page(aspace->i_mapping, index), page);
3308 +       } else {
3309 +               page = find_lock_page(aspace->i_mapping, index);
3310 +               if (!page)
3311 +                       return NULL;
3312 +       }
3313 +
3314 +       if (!page_has_buffers(page))
3315 +               create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
3316 +
3317 +       for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
3318 +               /* Do nothing */;
3319 +       get_bh(bh);
3320 +
3321 +       if (!buffer_mapped(bh))
3322 +               map_bh(bh, sdp->sd_vfs, blkno);
3323 +       else
3324 +               GFS_ASSERT_SBD(bh->b_bdev == sdp->sd_vfs->s_bdev &&
3325 +                              bh->b_blocknr == blkno,
3326 +                              sdp,);
3327 +
3328 +       unlock_page(page);
3329 +       page_cache_release(page);
3330 +
3331 +       return bh;
3332 +}
3333 +
3334 +/**
3335 + * gfs_dgetblk - Get a block
3336 + * @sdp: The GFS superblock
3337 + * @blkno: The block number
3338 + * @gl: The glock associated with this block
3339 + *
3340 + * Returns: The buffer
3341 + */
3342 +
3343 +struct buffer_head *
3344 +gfs_dgetblk(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl)
3345 +{
3346 +       struct buffer_head *bh;
3347 +
3348 +       if (gl)
3349 +               bh = getbuf(sdp, gl->gl_aspace, blkno, CREATE);
3350 +       else
3351 +               bh = sb_getblk(sdp->sd_vfs, blkno);
3352 +
3353 +       return bh;
3354 +}
3355 +
3356 +/**
3357 + * gfs_dread - Read a block from disk
3358 + * @sdp: The GFS superblock
3359 + * @blkno: The block number
3360 + * @gl: The glock covering the block
3361 + * @flags: flags to gfs_dreread()
3362 + * @bhp: the place where the buffer is returned
3363 + *
3364 + * Returns: The buffer on success, NULL on failur
3365 + */
3366 +
3367 +int
3368 +gfs_dread(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl, int flags,
3369 +         struct buffer_head **bhp)
3370 +{
3371 +       int error;
3372 +
3373 +       *bhp = gfs_dgetblk(sdp, blkno, gl);
3374 +       error = gfs_dreread(sdp, *bhp, flags);
3375 +       if (error)
3376 +               brelse(*bhp);
3377 +
3378 +       return error;
3379 +}
3380 +
3381 +/**
3382 + * gfs_prep_new_buffer - Mark a new buffer we just gfs_dgetblk()ed uptodate
3383 + * @bh: the buffer
3384 + *
3385 + */
3386 +
3387 +void
3388 +gfs_prep_new_buffer(struct buffer_head *bh)
3389 +{
3390 +       wait_on_buffer(bh);
3391 +       clear_buffer_dirty(bh);
3392 +       set_buffer_uptodate(bh);
3393 +}
3394 +
3395 +/**
3396 + * gfs_dreread - Reread a block from disk
3397 + * @sdp: the filesystem
3398 + * @bh: The block to read
3399 + * @flags: Flags that control the read
3400 + *
3401 + * Returns: 0 on success, -EXXX on failure
3402 + */
3403 +
3404 +int
3405 +gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags)
3406 +{
3407 +       int error = 0;
3408 +
3409 +       if (flags & DIO_NEW) {
3410 +               if (gfs_mhc_fish(sdp, bh))
3411 +                       return 0;
3412 +               clear_buffer_uptodate(bh);
3413 +       }
3414 +
3415 +       if (flags & DIO_FORCE)
3416 +               clear_buffer_uptodate(bh);
3417 +
3418 +       if ((flags & DIO_START) && !buffer_uptodate(bh))
3419 +               ll_rw_block(READ, 1, &bh);
3420 +
3421 +       if (flags & DIO_WAIT) {
3422 +               wait_on_buffer(bh);
3423 +
3424 +               if (!buffer_uptodate(bh)) {
3425 +                       gfs_io_error_bh(sdp, bh);
3426 +                       error = -EIO;
3427 +               }
3428 +       }
3429 +
3430 +       return error;
3431 +}
3432 +
3433 +/**
3434 + * gfs_dwrite - Write a buffer
3435 + * @sdp: the filesystem
3436 + * @bh: The buffer to write
3437 + * @flags: The type of write operation to do
3438 + *
3439 + * Returns: 0 on success, -EXXX on failure
3440 + */
3441 +
3442 +int
3443 +gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags)
3444 +{
3445 +       int error = 0;
3446 +
3447 +       GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,);
3448 +       GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,);
3449 +
3450 +       if (flags & DIO_CLEAN) {
3451 +               lock_buffer(bh);
3452 +               clear_buffer_dirty(bh);
3453 +               unlock_buffer(bh);
3454 +       }
3455 +
3456 +       if (flags & DIO_DIRTY)
3457 +               mark_buffer_dirty(bh);
3458 +
3459 +       if ((flags & DIO_START) && buffer_dirty(bh)) {
3460 +               wait_on_buffer(bh);
3461 +               ll_rw_block(WRITE, 1, &bh);
3462 +       }
3463 +
3464 +       if (flags & DIO_WAIT) {
3465 +               wait_on_buffer(bh);
3466 +
3467 +               if (!buffer_uptodate(bh) || buffer_dirty(bh)) {
3468 +                       gfs_io_error_bh(sdp, bh);
3469 +                       error = -EIO;
3470 +               }
3471 +       }
3472 +
3473 +       return error;
3474 +}
3475 +
3476 +/**
3477 + * gfs_attach_bufdata - attach a struct gfs_bufdata structure to a buffer
3478 + * @bh: The buffer to be attached to
3479 + * @gl: the glock the buffer belongs to
3480 + *
3481 + */
3482 +
3483 +void
3484 +gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl)
3485 +{
3486 +       struct gfs_bufdata *bd;
3487 +
3488 +       lock_page(bh->b_page);
3489 +
3490 +       if (bh2bd(bh)) {
3491 +               unlock_page(bh->b_page);
3492 +               return;
3493 +       }
3494 +
3495 +       RETRY_MALLOC(bd = kmem_cache_alloc(gfs_bufdata_cachep, GFP_KERNEL), bd);
3496 +       atomic_inc(&gl->gl_sbd->sd_bufdata_count);
3497 +
3498 +       memset(bd, 0, sizeof(struct gfs_bufdata));
3499 +
3500 +       bd->bd_bh = bh;
3501 +       bd->bd_gl = gl;
3502 +
3503 +       INIT_LE(&bd->bd_new_le, &gfs_buf_lops);
3504 +       INIT_LE(&bd->bd_incore_le, &gfs_buf_lops);
3505 +
3506 +       init_MUTEX(&bd->bd_lock);
3507 +
3508 +       INIT_LIST_HEAD(&bd->bd_ail_tr_list);
3509 +
3510 +       bh2bd(bh) = bd;
3511 +
3512 +       unlock_page(bh->b_page);
3513 +}
3514 +
3515 +/**
3516 + * gfs_is_pinned - Figure out if a buffer is pinned or not
3517 + * @sdp: the filesystem the buffer belongs to
3518 + * @bh: The buffer to be pinned
3519 + *
3520 + * Returns: TRUE if the buffer is pinned, FALSE otherwise
3521 + */
3522 +
3523 +int
3524 +gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh)
3525 +{
3526 +       struct gfs_bufdata *bd = bh2bd(bh);
3527 +       int ret = FALSE;
3528 +
3529 +       if (bd) {
3530 +               gfs_lock_buffer(bh);
3531 +               if (bd->bd_pinned)
3532 +                       ret = TRUE;
3533 +               gfs_unlock_buffer(bh);
3534 +       }
3535 +
3536 +       return ret;
3537 +}
3538 +
3539 +/**
3540 + * gfs_dpin - Pin a metadata buffer in memory
3541 + * @sdp: the filesystem the buffer belongs to
3542 + * @bh: The buffer to be pinned
3543 + *
3544 + */
3545 +
3546 +void
3547 +gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh)
3548 +{
3549 +       struct gfs_bufdata *bd;
3550 +       char *data;
3551 +
3552 +       GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,);
3553 +       GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,);
3554 +
3555 +       bd = bh2bd(bh);
3556 +       GFS_ASSERT_SBD(bd, sdp,);
3557 +
3558 +       gfs_lock_buffer(bh);
3559 +
3560 +       GFS_ASSERT_GLOCK(!bd->bd_frozen, bd->bd_gl,);
3561 +
3562 +       if (!bd->bd_pinned++) {
3563 +               wait_on_buffer(bh);
3564 +
3565 +               /* If this buffer is in the AIL and it has already been written,
3566 +                  remove it from the AIL. */
3567 +
3568 +               spin_lock(&sdp->sd_ail_lock);
3569 +               if (!list_empty(&bd->bd_ail_tr_list) && !buffer_busy(bh)) {
3570 +                       list_del_init(&bd->bd_ail_tr_list);
3571 +                       list_del(&bd->bd_ail_gl_list);
3572 +                       brelse(bh);
3573 +               }
3574 +               spin_unlock(&sdp->sd_ail_lock);
3575 +
3576 +               clear_buffer_dirty(bh);
3577 +               wait_on_buffer(bh);
3578 +
3579 +               if (!buffer_uptodate(bh))
3580 +                       gfs_io_error_bh(sdp, bh);
3581 +       } else {
3582 +               gfs_unlock_buffer(bh);
3583 +
3584 +               data = gmalloc(sdp->sd_sb.sb_bsize);
3585 +
3586 +               gfs_lock_buffer(bh);
3587 +               if (bd->bd_pinned > 1) {
3588 +                       memcpy(data, bh->b_data, sdp->sd_sb.sb_bsize);
3589 +                       bd->bd_frozen = data;
3590 +               } else
3591 +                       kfree(data);
3592 +       }
3593 +
3594 +       gfs_unlock_buffer(bh);
3595 +
3596 +       get_bh(bh);
3597 +}
3598 +
3599 +/**
3600 + * gfs_dunpin - Unpin a buffer
3601 + * @sdp: the filesystem the buffer belongs to
3602 + * @bh: The buffer to unpin
3603 + * @tr: The transaction in the AIL that contains this buffer
3604 + *
3605 + */
3606 +
3607 +void
3608 +gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh, struct gfs_trans *tr)
3609 +{
3610 +       struct gfs_bufdata *bd;
3611 +
3612 +       GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,);
3613 +
3614 +       bd = bh2bd(bh);
3615 +       GFS_ASSERT_SBD(bd, sdp,);
3616 +
3617 +       gfs_lock_buffer(bh);
3618 +
3619 +       GFS_ASSERT_GLOCK(bd->bd_pinned, bd->bd_gl,);
3620 +
3621 +       if (bd->bd_pinned == 1)
3622 +               mark_buffer_dirty(bh);
3623 +
3624 +       bd->bd_pinned--;
3625 +
3626 +       gfs_unlock_buffer(bh);
3627 +
3628 +       /* Add the buffer to the AIL
3629 +          and get rid of an old reference if there is one */
3630 +
3631 +       if (tr) {
3632 +               spin_lock(&sdp->sd_ail_lock);
3633 +
3634 +               if (list_empty(&bd->bd_ail_tr_list))
3635 +                       list_add(&bd->bd_ail_gl_list, &bd->bd_gl->gl_ail_bufs);
3636 +               else {
3637 +                       list_del_init(&bd->bd_ail_tr_list);
3638 +                       brelse(bh);
3639 +               }
3640 +               list_add(&bd->bd_ail_tr_list, &tr->tr_ail_bufs);
3641 +
3642 +               spin_unlock(&sdp->sd_ail_lock);
3643 +       } else
3644 +               brelse(bh);
3645 +}
3646 +
3647 +/**
3648 + * logbh_end_io - called at the end of a logbh write
3649 + * @bh: the buffer
3650 + * @uptodate: whether or not the write succeeded
3651 + *
3652 + * Don't do ENTER() AND EXIT() here.
3653 + *
3654 + */
3655 +
3656 +static void
3657 +logbh_end_io(struct buffer_head *bh, int uptodate)
3658 +{
3659 +       if (uptodate)
3660 +               set_buffer_uptodate(bh);
3661 +       else
3662 +               clear_buffer_uptodate(bh);
3663 +       unlock_buffer(bh);
3664 +}
3665 +
3666 +/**
3667 + * gfs_logbh_init - Initialize a fake buffer head
3668 + * @sdp: the filesystem
3669 + * @bh: the buffer to initialize
3670 + * @blkno: the block address of the buffer
3671 + * @data: the data to be written
3672 + *
3673 + */
3674 +
3675 +void
3676 +gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh,
3677 +              uint64_t blkno, char *data)
3678 +{
3679 +       memset(bh, 0, sizeof(struct buffer_head));
3680 +       bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
3681 +       atomic_set(&bh->b_count, 1);
3682 +       set_bh_page(bh, virt_to_page(data), ((unsigned long)data) & (PAGE_SIZE - 1));
3683 +       bh->b_blocknr = blkno;
3684 +       bh->b_size = sdp->sd_sb.sb_bsize;
3685 +       bh->b_bdev = sdp->sd_vfs->s_bdev;
3686 +       init_buffer(bh, logbh_end_io, NULL);
3687 +       INIT_LIST_HEAD(&bh->b_assoc_buffers);
3688 +}
3689 +
3690 +/**
3691 + * gfs_logbh_uninit - Clean up a fake buffer head
3692 + * @sdp: the filesystem
3693 + * @bh: the buffer to clean
3694 + *
3695 + */
3696 +
3697 +void
3698 +gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh)
3699 +{
3700 +       GFS_ASSERT_SBD(!buffer_busy(bh) &&
3701 +                      atomic_read(&bh->b_count) == 1,
3702 +                      sdp,);
3703 +}
3704 +
3705 +/**
3706 + * gfs_logbh_start - Start writing a fake buffer head
3707 + * @sdp: the filesystem
3708 + * @bh: the buffer to write
3709 + *
3710 + * Returns: 0 on success, -EXXX on error;
3711 + */
3712 +
3713 +int
3714 +gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh)
3715 +{
3716 +       submit_bh(WRITE, bh);
3717 +       return 0;
3718 +}
3719 +
3720 +/**
3721 + * gfs_logbh_wait - Wait for the write of a fake buffer head to complete
3722 + * @sdp: the filesystem
3723 + * @bh: the buffer to write
3724 + *
3725 + * Returns: 0 on success, -EXXX on error;
3726 + */
3727 +
3728 +int
3729 +gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh)
3730 +{
3731 +       int error = 0;
3732 +
3733 +       wait_on_buffer(bh);
3734 +
3735 +       if (!buffer_uptodate(bh) || buffer_dirty(bh)) {
3736 +               gfs_io_error_bh(sdp, bh);
3737 +               error = -EIO;
3738 +       }
3739 +
3740 +       return error;
3741 +}
3742 +
3743 +/**
3744 + * gfs_replay_buf - write a log buffer to its inplace location
3745 + * @gl: the journal's glock
3746 + * @bh: the buffer
3747 + *
3748 + * Returns: 0 on success, -EXXX on failure
3749 + */
3750 +
3751 +int
3752 +gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh)
3753 +{
3754 +       struct gfs_sbd *sdp = gl->gl_sbd;
3755 +       struct gfs_bufdata *bd;
3756 +
3757 +       bd = bh2bd(bh);
3758 +       if (!bd) {
3759 +               gfs_attach_bufdata(bh, gl);
3760 +               bd = bh2bd(bh);
3761 +       }
3762 +
3763 +       mark_buffer_dirty(bh);
3764 +
3765 +       if (list_empty(&bd->bd_ail_tr_list)) {
3766 +               get_bh(bh);
3767 +               list_add(&bd->bd_ail_tr_list, &sdp->sd_recovery_bufs);
3768 +       }
3769 +
3770 +       return 0;
3771 +}
3772 +
3773 +/**
3774 + * gfs_replay_check - Check up on journal replay
3775 + * @sdp: the filesystem
3776 + *
3777 + */
3778 +
3779 +void
3780 +gfs_replay_check(struct gfs_sbd *sdp)
3781 +{
3782 +       struct buffer_head *bh;
3783 +       struct gfs_bufdata *bd;
3784 +
3785 +       while (!list_empty(&sdp->sd_recovery_bufs)) {
3786 +               bd = list_entry(sdp->sd_recovery_bufs.prev,
3787 +                               struct gfs_bufdata, bd_ail_tr_list);
3788 +               bh = bd->bd_bh;
3789 +
3790 +               if (buffer_busy(bh)) {
3791 +                       list_move(&bd->bd_ail_tr_list,
3792 +                                 &sdp->sd_recovery_bufs);
3793 +                       break;
3794 +               } else {
3795 +                       list_del_init(&bd->bd_ail_tr_list);
3796 +                       if (!buffer_uptodate(bh))
3797 +                               gfs_io_error_bh(sdp, bh);
3798 +                       brelse(bh);
3799 +               }
3800 +       }
3801 +}
3802 +
3803 +/**
3804 + * gfs_replay_wait - Wait for all replayed buffers to hit the disk
3805 + * @sdp: the filesystem
3806 + *
3807 + */
3808 +
3809 +void
3810 +gfs_replay_wait(struct gfs_sbd *sdp)
3811 +{
3812 +       struct list_head *head, *tmp, *prev;
3813 +       struct buffer_head *bh;
3814 +       struct gfs_bufdata *bd;
3815 +
3816 +       for (head = &sdp->sd_recovery_bufs, tmp = head->prev, prev = tmp->prev;
3817 +            tmp != head;
3818 +            tmp = prev, prev = tmp->prev) {
3819 +               bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
3820 +               bh = bd->bd_bh;
3821 +
3822 +               if (!buffer_busy(bh)) {
3823 +                       list_del_init(&bd->bd_ail_tr_list);
3824 +                       if (!buffer_uptodate(bh))
3825 +                               gfs_io_error_bh(sdp, bh);
3826 +                       brelse(bh);
3827 +                       continue;
3828 +               }
3829 +
3830 +               if (buffer_dirty(bh)) {
3831 +                       wait_on_buffer(bh);
3832 +                       ll_rw_block(WRITE, 1, &bh);
3833 +               }
3834 +       }
3835 +
3836 +       while (!list_empty(head)) {
3837 +               bd = list_entry(head->prev, struct gfs_bufdata, bd_ail_tr_list);
3838 +               bh = bd->bd_bh;
3839 +
3840 +               wait_on_buffer(bh);
3841 +
3842 +               GFS_ASSERT_SBD(!buffer_busy(bh), sdp,);
3843 +
3844 +               list_del_init(&bd->bd_ail_tr_list);
3845 +               if (!buffer_uptodate(bh))
3846 +                       gfs_io_error_bh(sdp, bh);
3847 +               brelse(bh);
3848 +       }
3849 +}
3850 +
3851 +/**
3852 + * gfs_wipe_buffers - make buffers so they aren't dirty/pinned anymore
3853 + * @ip: the inode who owns the buffers
3854 + * @bstart: the first buffer in the run
3855 + * @blen: the number of buffers in the run
3856 + *
3857 + */
3858 +
3859 +void
3860 +gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd,
3861 +                uint64_t bstart, uint32_t blen)
3862 +{
3863 +       struct gfs_sbd *sdp = ip->i_sbd;
3864 +       struct inode *aspace = ip->i_gl->gl_aspace;
3865 +       struct buffer_head *bh;
3866 +       struct gfs_bufdata *bd;
3867 +       int busy;
3868 +       int add = FALSE;
3869 +
3870 +       while (blen) {
3871 +               bh = getbuf(sdp, aspace, bstart, NO_CREATE);
3872 +               if (bh) {
3873 +
3874 +                       bd = bh2bd(bh);
3875 +
3876 +                       if (buffer_uptodate(bh)) {
3877 +                               if (bd) {
3878 +                                       gfs_lock_buffer(bh);
3879 +                                       gfs_mhc_add(rgd, &bh, 1);
3880 +                                       busy = bd->bd_pinned || buffer_busy(bh);
3881 +                                       gfs_unlock_buffer(bh);
3882 +
3883 +                                       if (busy)
3884 +                                               add = TRUE;
3885 +                                       else {
3886 +                                               spin_lock(&sdp->sd_ail_lock);
3887 +                                               if (!list_empty(&bd->bd_ail_tr_list)) {
3888 +                                                       list_del_init(&bd->bd_ail_tr_list);
3889 +                                                       list_del(&bd->bd_ail_gl_list);
3890 +                                                       brelse(bh);
3891 +                                               }
3892 +                                               spin_unlock(&sdp->sd_ail_lock);
3893 +                                       }
3894 +                               } else {
3895 +                                       GFS_ASSERT_INODE(!buffer_dirty(bh), ip,);
3896 +                                       wait_on_buffer(bh);
3897 +                                       GFS_ASSERT_INODE(!buffer_busy(bh), ip,);
3898 +                                       gfs_mhc_add(rgd, &bh, 1);
3899 +                               }
3900 +                       } else {
3901 +                               GFS_ASSERT_INODE(!bd || !bd->bd_pinned, ip,);
3902 +                               GFS_ASSERT_INODE(!buffer_dirty(bh), ip,);
3903 +                               wait_on_buffer(bh);
3904 +                               GFS_ASSERT_INODE(!buffer_busy(bh), ip,);
3905 +                       }
3906 +
3907 +                       brelse(bh);
3908 +               }
3909 +
3910 +               bstart++;
3911 +               blen--;
3912 +       }
3913 +
3914 +       if (add)
3915 +               gfs_depend_add(rgd, ip->i_num.no_formal_ino);
3916 +}
3917 +
3918 +/**
3919 + * gfs_sync_meta - sync all the buffers in a filesystem
3920 + * @sdp: the filesystem
3921 + *
3922 + */
3923 +
3924 +void
3925 +gfs_sync_meta(struct gfs_sbd *sdp)
3926 +{
3927 +       gfs_log_flush(sdp);
3928 +       for (;;) {
3929 +               gfs_ail_start(sdp, DIO_ALL);
3930 +               if (gfs_ail_empty(sdp))
3931 +                       break;
3932 +
3933 +               current->state = TASK_UNINTERRUPTIBLE;
3934 +               schedule_timeout(HZ / 10);
3935 +       }
3936 +}
3937 +
3938 +/**
3939 + * gfs_flush_meta_cache - get rid of any references on buffers for this inode
3940 + * @ip: The GFS inode
3941 + *
3942 + */
3943 +
3944 +void
3945 +gfs_flush_meta_cache(struct gfs_inode *ip)
3946 +{
3947 +       struct buffer_head **bh_slot;
3948 +       unsigned int x;
3949 +
3950 +       spin_lock(&ip->i_lock);
3951 +
3952 +       for (x = 0; x < GFS_MAX_META_HEIGHT; x++) {
3953 +               bh_slot = &ip->i_cache[x];
3954 +               if (*bh_slot) {
3955 +                       brelse(*bh_slot);
3956 +                       *bh_slot = NULL;
3957 +               }
3958 +       }
3959 +
3960 +       spin_unlock(&ip->i_lock);
3961 +}
3962 +
3963 +/**
3964 + * gfs_get_meta_buffer - Get a metadata buffer
3965 + * @ip: The GFS inode
3966 + * @depth: The depth in the metadata tree
3967 + * @num: The block number (device relative) of the buffer
3968 + * @new: Non-zero if we may create a new buffer
3969 + * @bhp: the buffer is returned here
3970 + *
3971 + * Returns: 0 on success, -EXXX on failure
3972 + */
3973 +
3974 +int
3975 +gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new,
3976 +                   struct buffer_head **bhp)
3977 +{
3978 +       struct gfs_sbd *sdp = ip->i_sbd;
3979 +       struct buffer_head *bh, **bh_slot = &ip->i_cache[height];
3980 +       int flags = ((new) ? DIO_NEW : 0) | DIO_START | DIO_WAIT;
3981 +       int error;
3982 +
3983 +       spin_lock(&ip->i_lock);
3984 +       bh = *bh_slot;
3985 +       if (bh) {
3986 +               if (bh->b_blocknr == num)
3987 +                       get_bh(bh);
3988 +               else
3989 +                       bh = NULL;
3990 +       }
3991 +       spin_unlock(&ip->i_lock);
3992 +
3993 +       if (bh) {
3994 +               error = gfs_dreread(sdp, bh, flags);
3995 +               if (error) {
3996 +                       brelse(bh);
3997 +                       return error;
3998 +               }
3999 +       } else {
4000 +               error = gfs_dread(sdp, num, ip->i_gl, flags, &bh);
4001 +               if (error)
4002 +                       return error;
4003 +
4004 +               spin_lock(&ip->i_lock);
4005 +               if (*bh_slot != bh) {
4006 +                       if (*bh_slot)
4007 +                               brelse(*bh_slot);
4008 +                       *bh_slot = bh;
4009 +                       get_bh(bh);
4010 +               }
4011 +               spin_unlock(&ip->i_lock);
4012 +       }
4013 +
4014 +       if (new) {
4015 +               GFS_ASSERT_INODE(height, ip,);
4016 +
4017 +               gfs_trans_add_bh(ip->i_gl, bh);
4018 +               gfs_metatype_set(sdp, bh, GFS_METATYPE_IN, GFS_FORMAT_IN);
4019 +               gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
4020 +       } else
4021 +               gfs_metatype_check(sdp, bh,
4022 +                                  (height) ? GFS_METATYPE_IN : GFS_METATYPE_DI);
4023 +
4024 +       *bhp = bh;
4025 +
4026 +       return 0;
4027 +}
4028 +
4029 +/**
4030 + * gfs_get_data_buffer - Get a data buffer
4031 + * @ip: The GFS inode
4032 + * @num: The block number (device relative) of the data block
4033 + * @new: Non-zero if this is a new allocation
4034 + * @bhp: the buffer is returned here
4035 + *
4036 + * Returns: 0 on success, -EXXX on failure
4037 + */
4038 +
4039 +int
4040 +gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new,
4041 +                   struct buffer_head **bhp)
4042 +{
4043 +       struct gfs_sbd *sdp = ip->i_sbd;
4044 +       struct buffer_head *bh;
4045 +       int error = 0;
4046 +
4047 +       if (block == ip->i_num.no_addr) {
4048 +               GFS_ASSERT_INODE(!new, ip,);
4049 +
4050 +               error = gfs_dread(sdp, block, ip->i_gl, DIO_START | DIO_WAIT, &bh);
4051 +               if (error)
4052 +                       return error;
4053 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_DI);
4054 +       } else if (gfs_is_jdata(ip)) {
4055 +               if (new) {
4056 +                       error = gfs_dread(sdp, block, ip->i_gl,
4057 +                                         DIO_NEW | DIO_START | DIO_WAIT, &bh);
4058 +                       if (error)
4059 +                               return error;
4060 +                       gfs_trans_add_bh(ip->i_gl, bh);
4061 +                       gfs_metatype_set(sdp, bh, GFS_METATYPE_JD, GFS_FORMAT_JD);
4062 +                       gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
4063 +               } else {
4064 +                       error = gfs_dread(sdp, block, ip->i_gl,
4065 +                                         DIO_START | DIO_WAIT, &bh);
4066 +                       if (error)
4067 +                               return error;
4068 +                       gfs_metatype_check(sdp, bh, GFS_METATYPE_JD);
4069 +               }
4070 +       } else {
4071 +               if (new) {
4072 +                       bh = gfs_dgetblk(sdp, block, ip->i_gl);
4073 +                       gfs_prep_new_buffer(bh);
4074 +               } else {
4075 +                       error = gfs_dread(sdp, block, ip->i_gl,
4076 +                                         DIO_START | DIO_WAIT, &bh);
4077 +                       if (error)
4078 +                               return error;
4079 +               }
4080 +       }
4081 +
4082 +       *bhp = bh;
4083 +
4084 +       return 0;
4085 +}
4086 +
4087 +/**
4088 + * gfs_start_ra - start readahead on an extent of a file
4089 + * @gl: the glock the blocks belong to
4090 + * @dblock: the starting disk block
4091 + * @extlen: the number of blocks in the extent
4092 + *
4093 + */
4094 +
4095 +void
4096 +gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen)
4097 +{
4098 +       struct gfs_sbd *sdp = gl->gl_sbd;
4099 +       struct inode *aspace = gl->gl_aspace;
4100 +       struct buffer_head *first_bh, *bh;
4101 +       uint32_t max_ra = sdp->sd_tune.gt_max_readahead >> sdp->sd_sb.sb_bsize_shift;
4102 +       int error;
4103 +
4104 +       GFS_ASSERT_GLOCK(extlen, gl,);
4105 +       if (!max_ra)
4106 +               return;
4107 +       if (extlen > max_ra)
4108 +               extlen = max_ra;
4109 +
4110 +       first_bh = getbuf(sdp, aspace, dblock, CREATE);
4111 +
4112 +       if (buffer_uptodate(first_bh))
4113 +               goto out;
4114 +       if (!buffer_locked(first_bh)) {
4115 +               error = gfs_dreread(sdp, first_bh, DIO_START);
4116 +               if (error)
4117 +                       goto out;
4118 +       }
4119 +
4120 +       dblock++;
4121 +       extlen--;
4122 +
4123 +       while (extlen) {
4124 +               bh = getbuf(sdp, aspace, dblock, CREATE);
4125 +
4126 +               if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
4127 +                       error = gfs_dreread(sdp, bh, DIO_START);
4128 +                       brelse(bh);
4129 +                       if (error)
4130 +                               goto out;
4131 +               } else
4132 +                       brelse(bh);
4133 +
4134 +               dblock++;
4135 +               extlen--;
4136 +
4137 +               if (buffer_uptodate(first_bh))
4138 +                       break;
4139 +       }
4140 +
4141 + out:
4142 +       brelse(first_bh);
4143 +}
4144 diff -urN linux-orig/fs/gfs/dio.h linux-patched/fs/gfs/dio.h
4145 --- linux-orig/fs/gfs/dio.h     1969-12-31 18:00:00.000000000 -0600
4146 +++ linux-patched/fs/gfs/dio.h  2004-06-30 13:27:49.335712986 -0500
4147 @@ -0,0 +1,195 @@
4148 +/******************************************************************************
4149 +*******************************************************************************
4150 +**
4151 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
4152 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
4153 +**
4154 +**  This copyrighted material is made available to anyone wishing to use,
4155 +**  modify, copy, or redistribute it subject to the terms and conditions
4156 +**  of the GNU General Public License v.2.
4157 +**
4158 +*******************************************************************************
4159 +******************************************************************************/
4160 +
4161 +#ifndef __DIO_DOT_H__
4162 +#define __DIO_DOT_H__
4163 +
4164 +void gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr);
4165 +int gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr);
4166 +
4167 +/*  Asynchronous I/O Routines  */
4168 +
4169 +struct buffer_head *gfs_dgetblk(struct gfs_sbd *sdp, uint64_t blkno,
4170 +                               struct gfs_glock *gl);
4171 +int gfs_dread(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl,
4172 +             int flags, struct buffer_head **bhp);
4173 +
4174 +void gfs_prep_new_buffer(struct buffer_head *bh);
4175 +int gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags);
4176 +int gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags);
4177 +
4178 +void gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl);
4179 +int gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh);
4180 +void gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh);
4181 +void gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh,
4182 +               struct gfs_trans *tr);
4183 +
4184 +static __inline__
4185 +void gfs_lock_buffer(struct buffer_head *bh)
4186 +{
4187 +       struct gfs_bufdata *bd = bh2bd(bh);
4188 +       down(&bd->bd_lock);
4189 +}
4190 +static __inline__
4191 +int gfs_trylock_buffer(struct buffer_head *bh)
4192 +{
4193 +       struct gfs_bufdata *bd = bh2bd(bh);
4194 +       return down_trylock(&bd->bd_lock);
4195 +}
4196 +static __inline__
4197 +void gfs_unlock_buffer(struct buffer_head *bh)
4198 +{
4199 +       struct gfs_bufdata *bd = bh2bd(bh);
4200 +       up(&bd->bd_lock);
4201 +}
4202 +
4203 +void gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh, uint64_t blkno,
4204 +                   char *data);
4205 +void gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh);
4206 +int gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh);
4207 +int gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh);
4208 +
4209 +int gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh);
4210 +void gfs_replay_check(struct gfs_sbd *sdp);
4211 +void gfs_replay_wait(struct gfs_sbd *sdp);
4212 +
4213 +void gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd,
4214 +                     uint64_t bstart, uint32_t blen);
4215 +
4216 +void gfs_sync_meta(struct gfs_sbd *sdp);
4217 +
4218 +/*  Buffer Caching routines  */
4219 +
4220 +int gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new,
4221 +                       struct buffer_head **bhp);
4222 +int gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new,
4223 +                       struct buffer_head **bhp);
4224 +void gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen);
4225 +
4226 +static __inline__ int
4227 +gfs_get_inode_buffer(struct gfs_inode *ip, struct buffer_head **bhp)
4228 +{
4229 +       return gfs_get_meta_buffer(ip, 0, ip->i_num.no_addr, FALSE, bhp);
4230 +}
4231 +
4232 +struct inode *gfs_aspace_get(struct gfs_sbd *sdp);
4233 +void gfs_aspace_put(struct inode *aspace);
4234 +
4235 +void gfs_inval_buf(struct gfs_glock *gl);
4236 +void gfs_sync_buf(struct gfs_glock *gl, int flags);
4237 +
4238 +void gfs_flush_meta_cache(struct gfs_inode *ip);
4239 +
4240 +/*  Buffer Content Functions  */
4241 +
4242 +/**
4243 + * gfs_buffer_clear - Zeros out a buffer
4244 + * @ip: The GFS inode
4245 + * @bh: The buffer to zero
4246 + *
4247 + */
4248 +
4249 +static __inline__ void
4250 +gfs_buffer_clear(struct buffer_head *bh)
4251 +{
4252 +       memset(bh->b_data, 0, bh->b_size);
4253 +}
4254 +
4255 +/**
4256 + * gfs_buffer_clear_tail - Clear buffer beyond the dinode
4257 + * @bh: The buffer containing the on-disk inode
4258 + * @head: the size of the head of the buffer
4259 + *
4260 + * Clears the remaining part of an on-disk inode that is not a dinode.
4261 + * i.e. The data part of a stuffed inode, or the top level of metadata
4262 + * of a non-stuffed inode.
4263 + */
4264 +
4265 +static __inline__ void
4266 +gfs_buffer_clear_tail(struct buffer_head *bh, int head)
4267 +{
4268 +       memset(bh->b_data + head, 0, bh->b_size - head);
4269 +}
4270 +
4271 +/**
4272 + * gfs_buffer_clear_ends - Zero out any bits of a buffer which are not being written
4273 + * @bh: The buffer
4274 + * @offset: Offset in buffer where write starts
4275 + * @amount: Amount of data being written
4276 + * @journaled: TRUE if this is a journaled buffer
4277 + *
4278 + */
4279 +
4280 +static __inline__ void
4281 +gfs_buffer_clear_ends(struct buffer_head *bh, int offset, int amount,
4282 +                     int journaled)
4283 +{
4284 +       int z_off1 = (journaled) ? sizeof(struct gfs_meta_header) : 0;
4285 +       int z_len1 = offset - z_off1;
4286 +       int z_off2 = offset + amount;
4287 +       int z_len2 = (bh)->b_size - z_off2;
4288 +
4289 +       if (z_len1)
4290 +               memset(bh->b_data + z_off1, 0, z_len1);
4291 +
4292 +       if (z_len2)
4293 +               memset(bh->b_data + z_off2, 0, z_len2);
4294 +}
4295 +
4296 +/**
4297 + * gfs_buffer_copy_tail - copies the tail of one buffer to another
4298 + * @to_bh: the buffer to copy to
4299 + * @to_head: the size of the head of to_bh
4300 + * @from_bh: the buffer to copy from
4301 + * @from_head: the size of the head of from_bh
4302 + *
4303 + * from_head is guaranteed to bigger than to_head
4304 + */
4305 +
4306 +static __inline__ void
4307 +gfs_buffer_copy_tail(struct buffer_head *to_bh, int to_head,
4308 +                    struct buffer_head *from_bh, int from_head)
4309 +{
4310 +       memcpy(to_bh->b_data + to_head,
4311 +              from_bh->b_data + from_head,
4312 +              from_bh->b_size - from_head);
4313 +       memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
4314 +              0,
4315 +              from_head - to_head);
4316 +}
4317 +
4318 +/**
4319 + * gfs_buffer_print - print a buffer to the debug console
4320 + * @bh: the buffer
4321 + * @string:  what to print before the contents of the buffer
4322 + *
4323 + */
4324 +
4325 +static __inline__ void
4326 +gfs_buffer_print(struct buffer_head *bh, char *string)
4327 +{
4328 +       unsigned int x, size = (bh)->b_size;
4329 +       unsigned char *c = (bh)->b_data;
4330 +
4331 +       printk("%s\n", string);
4332 +
4333 +       for (x = 0; x < size; x++) {
4334 +               printk("%.2X ", c[x]);
4335 +               if (x % 16 == 15)
4336 +                       printk("\n");
4337 +       }
4338 +       if (x % 16 != 0)
4339 +               printk("\n");
4340 +}
4341 +
4342 +#endif                         /*  __DIO_DOT_H__  */
4343 diff -urN linux-orig/fs/gfs/dir.c linux-patched/fs/gfs/dir.c
4344 --- linux-orig/fs/gfs/dir.c     1969-12-31 18:00:00.000000000 -0600
4345 +++ linux-patched/fs/gfs/dir.c  2004-06-30 13:27:49.335712986 -0500
4346 @@ -0,0 +1,2273 @@
4347 +/******************************************************************************
4348 +*******************************************************************************
4349 +**
4350 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
4351 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
4352 +**
4353 +**  This copyrighted material is made available to anyone wishing to use,
4354 +**  modify, copy, or redistribute it subject to the terms and conditions
4355 +**  of the GNU General Public License v.2.
4356 +**
4357 +*******************************************************************************
4358 +******************************************************************************/
4359 +
4360 +/*
4361 +* Implements Extendible Hashing as described in:
4362 +*   "Extendible Hashing" by Fagin, et al in
4363 +*     __ACM Trans. on Database Systems__, Sept 1979.
4364 +*
4365 +*
4366 +* Here's the layout of dirents which is essentially the same as that of ext2
4367 +* within a single block. The field de_name_len is the number of bytes
4368 +* actually required for the name (no null terminator). The field de_rec_len
4369 +* is the number of bytes allocated to the dirent. The offset of the next
4370 +* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
4371 +* deleted, the preceding dirent inherits its allocated space, ie
4372 +* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
4373 +* by adding de_rec_len to the current dirent, this essentially causes the
4374 +* deleted dirent to get jumped over when iterating through all the dirents.
4375 +* When deleting the first dirent in a block, there is no previous dirent so
4376 +* the field de_ino is set to zero to designate it as deleted. When allocating
4377 +* a dirent, gfs_dirent_alloc iterates through the dirents in a block. If the
4378 +* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
4379 +* dirent is allocated. Otherwise it must go through all the 'used' dirents
4380 +* searching for one in which the amount of total space minus the amount of
4381 +* used space will provide enough space for the new dirent.
4382 +* There are two types of blocks in which dirents reside. In a stuffed dinode,
4383 +* the dirents begin at offset sizeof(struct gfs_dinode) from the beginning of the block.
4384 +* In leaves, they begin at offset sizeof (struct gfs_leaf) from the beginning of the
4385 +* leaf block. The dirents reside in leaves when
4386 +*
4387 +* dip->i_di.di_regime == GFS_DIR_EXHASH.
4388 +*
4389 +* The dirents are in the stuffed dinode when dip->i_di.di_regime == GFS_DIR_LINEAR.
4390 +* When the dirents are in leaves, the actual contents of the directory file are
4391 +* used as an array of 64-bit block pointers pointing to the leaf blocks. The
4392 +* dirents are NOT in the directory file itself. There can be more than one block
4393 +* pointer in the array that points to the same leaf. In fact, when a directory is
4394 +* first converted from linear to exhash, all of the pointers point to the same
4395 +* leaf. When a leaf is completely full, the size of the hash table can be doubled
4396 +* unless it is already at the maximum size which is hard coded into
4397 +* GFS_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list but
4398 +* never before the maximum hash table size has been reached.
4399 +*/
4400 +
4401 +#include <linux/sched.h>
4402 +#include <linux/slab.h>
4403 +#include <linux/smp_lock.h>
4404 +#include <linux/spinlock.h>
4405 +#include <asm/semaphore.h>
4406 +#include <linux/completion.h>
4407 +#include <linux/buffer_head.h>
4408 +
4409 +#include "gfs.h"
4410 +#include "dio.h"
4411 +#include "dir.h"
4412 +#include "file.h"
4413 +#include "glock.h"
4414 +#include "inode.h"
4415 +#include "ioctl.h"
4416 +#include "quota.h"
4417 +#include "rgrp.h"
4418 +#include "trans.h"
4419 +
4420 +#define IS_LEAF     (1)
4421 +#define IS_DINODE   (2)
4422 +
4423 +#if 1
4424 +#define gfs_dir_hash2offset(h) (((uint64_t)(h)) >> 1)
4425 +#define gfs_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
4426 +#else
4427 +#define gfs_dir_hash2offset(h) (((uint64_t)(h)))
4428 +#define gfs_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
4429 +#endif
4430 +
4431 +typedef int (*leaf_call_t) (struct gfs_inode *dip,
4432 +                           uint32_t index, uint32_t len, uint64_t leaf_no,
4433 +                           void *data);
4434 +
4435 +/**
4436 + * int gfs_filecmp - Compare two filenames
4437 + * @file1: The first filename
4438 + * @file2: The second filename
4439 + * @len_of_file2: The length of the second file
4440 + *
4441 + * This routine compares two filenames and returns TRUE if they are equal.
4442 + *
4443 + * Returns: TRUE (!=0) if the files are the same, otherwise FALSE (0).
4444 + */
4445 +
4446 +int
4447 +gfs_filecmp(struct qstr *file1, char *file2, int len_of_file2)
4448 +{
4449 +       if (file1->len != len_of_file2)
4450 +               return FALSE;
4451 +       if (memcmp(file1->name, file2, file1->len))
4452 +               return FALSE;
4453 +       return TRUE;
4454 +}
4455 +
4456 +/**
4457 + * dirent_first - Return the first dirent
4458 + * @dip: the directory
4459 + * @bh: The buffer
4460 + * @dent: Pointer to list of dirents
4461 + *
4462 + * return first dirent whether bh points to leaf or stuffed dinode
4463 + *
4464 + * Returns: IS_LEAF or IS_DINODE
4465 + */
4466 +
4467 +static int
4468 +dirent_first(struct gfs_inode *dip, struct buffer_head *bh,
4469 +            struct gfs_dirent **dent)
4470 +{
4471 +       struct gfs_meta_header *h = (struct gfs_meta_header *)bh->b_data;
4472 +
4473 +       if (gfs32_to_cpu(h->mh_type) == GFS_METATYPE_LF) {
4474 +               gfs_meta_check(dip->i_sbd, bh);
4475 +               *dent = (struct gfs_dirent *)(bh->b_data + sizeof(struct gfs_leaf));
4476 +               return IS_LEAF;
4477 +       } else {
4478 +               gfs_metatype_check(dip->i_sbd, bh, GFS_METATYPE_DI);
4479 +               *dent = (struct gfs_dirent *)(bh->b_data + sizeof(struct gfs_dinode));
4480 +               return IS_DINODE;
4481 +       }
4482 +}
4483 +
4484 +/**
4485 + * dirent_next - Next dirent
4486 + * @dip: the directory
4487 + * @bh: The buffer
4488 + * @dent: Pointer to list of dirents
4489 + *
4490 + * Returns: 0 on success, error code otherwise
4491 + */
4492 +
4493 +static int
4494 +dirent_next(struct gfs_inode *dip, struct buffer_head *bh,
4495 +           struct gfs_dirent **dent)
4496 +{
4497 +       struct gfs_dirent *tmp, *cur;
4498 +       char *bh_end;
4499 +       uint32_t cur_rec_len;
4500 +
4501 +       cur = *dent;
4502 +       bh_end = bh->b_data + bh->b_size;
4503 +
4504 +       cur_rec_len = gfs16_to_cpu(cur->de_rec_len);
4505 +
4506 +       if ((char *)cur + cur_rec_len >= bh_end) {
4507 +               GFS_ASSERT_INODE((char *)cur + cur_rec_len == bh_end, dip,);
4508 +               return -ENOENT;
4509 +       }
4510 +
4511 +       tmp = (struct gfs_dirent *)((char *)cur + cur_rec_len);
4512 +
4513 +       GFS_ASSERT_INODE((char *)tmp + gfs16_to_cpu(tmp->de_rec_len) <= bh_end,
4514 +                        dip,);
4515 +        /* Only the first dent could ever have de_ino == 0 */
4516 +       GFS_ASSERT_INODE(tmp->de_inum.no_formal_ino, dip,);
4517 +
4518 +       *dent = tmp;
4519 +
4520 +       return 0;
4521 +}
4522 +
4523 +/**
4524 + * dirent_del - Delete a dirent
4525 + * @dip: The GFS inode
4526 + * @bh: The buffer
4527 + * @prev: The previous dirent
4528 + * @cur: The current dirent
4529 + *
4530 + */
4531 +
4532 +static void
4533 +dirent_del(struct gfs_inode *dip, struct buffer_head *bh,
4534 +          struct gfs_dirent *prev, struct gfs_dirent *cur)
4535 +{
4536 +       uint32_t cur_rec_len, prev_rec_len;
4537 +
4538 +       GFS_ASSERT_INODE(cur->de_inum.no_formal_ino, dip,);
4539 +
4540 +       gfs_trans_add_bh(dip->i_gl, bh);
4541 +
4542 +       /* If there is no prev entry, this is the first entry in the block.
4543 +          The de_rec_len is already as big as it needs to be.  Just zero
4544 +          out the inode number and return.  */
4545 +
4546 +       if (!prev) {
4547 +               cur->de_inum.no_formal_ino = 0; /* No endianess worries */
4548 +               return;
4549 +       }
4550 +
4551 +       /*  Combine this dentry with the previous one.  */
4552 +
4553 +       prev_rec_len = gfs16_to_cpu(prev->de_rec_len);
4554 +       cur_rec_len = gfs16_to_cpu(cur->de_rec_len);
4555 +
4556 +       GFS_ASSERT_INODE((char *)prev + prev_rec_len == (char *)cur, dip,);
4557 +       GFS_ASSERT_INODE((char *)cur + cur_rec_len <=
4558 +                        bh->b_data + bh->b_size, dip,);
4559 +
4560 +       prev_rec_len += cur_rec_len;
4561 +       prev->de_rec_len = cpu_to_gfs16(prev_rec_len);
4562 +}
4563 +
4564 +/**
4565 + * gfs_dirent_alloc - Allocate a directory entry
4566 + * @dip: The GFS inode
4567 + * @bh: The buffer
4568 + * @name_len: The length of the name
4569 + * @dent_out: Pointer to list of dirents
4570 + *
4571 + * Returns: 0 on success, error code otherwise
4572 + */
4573 +
4574 +int
4575 +gfs_dirent_alloc(struct gfs_inode *dip, struct buffer_head *bh, int name_len,
4576 +                struct gfs_dirent **dent_out)
4577 +{
4578 +       struct gfs_dirent *dent, *new;
4579 +       unsigned int rec_len = GFS_DIRENT_SIZE(name_len);
4580 +       unsigned int entries = 0, offset = 0, x = 0;
4581 +       int type;
4582 +
4583 +       type = dirent_first(dip, bh, &dent);
4584 +
4585 +       if (type == IS_LEAF) {
4586 +               struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data;
4587 +               entries = gfs16_to_cpu(leaf->lf_entries);
4588 +               offset = sizeof(struct gfs_leaf);
4589 +       } else {
4590 +               struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data;
4591 +               entries = gfs32_to_cpu(dinode->di_entries);
4592 +               offset = sizeof(struct gfs_dinode);
4593 +       }
4594 +
4595 +       if (!entries) {
4596 +               gfs_trans_add_bh(dip->i_gl, bh);
4597 +
4598 +               dent->de_rec_len = bh->b_size - offset;
4599 +               dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len);
4600 +               dent->de_name_len = cpu_to_gfs16(name_len);
4601 +
4602 +               *dent_out = dent;
4603 +               return 0;
4604 +       }
4605 +
4606 +       do {
4607 +               uint32_t cur_rec_len, cur_name_len;
4608 +
4609 +               cur_rec_len = gfs16_to_cpu(dent->de_rec_len);
4610 +               cur_name_len = gfs16_to_cpu(dent->de_name_len);
4611 +
4612 +               if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) ||
4613 +                   (cur_rec_len >= GFS_DIRENT_SIZE(cur_name_len) + rec_len)) {
4614 +                       gfs_trans_add_bh(dip->i_gl, bh);
4615 +
4616 +                       if (dent->de_inum.no_formal_ino) {
4617 +                               new = (struct gfs_dirent *)((char *)dent +
4618 +                                                           GFS_DIRENT_SIZE(cur_name_len));
4619 +                               memset(new, 0, sizeof(struct gfs_dirent));
4620 +
4621 +                               new->de_rec_len = cpu_to_gfs16(cur_rec_len -
4622 +                                                              GFS_DIRENT_SIZE(cur_name_len));
4623 +                               new->de_name_len = cpu_to_gfs16(name_len);
4624 +
4625 +                               dent->de_rec_len = cur_rec_len - gfs16_to_cpu(new->de_rec_len);
4626 +                               dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len);
4627 +
4628 +                               *dent_out = new;
4629 +                               return 0;
4630 +                       }
4631 +
4632 +                       dent->de_name_len = cpu_to_gfs16(name_len);
4633 +
4634 +                       *dent_out = dent;
4635 +                       return 0;
4636 +               }
4637 +
4638 +               GFS_ASSERT_INODE(x < entries, dip,);
4639 +
4640 +               if (dent->de_inum.no_formal_ino)
4641 +                       x++;
4642 +       }
4643 +       while (dirent_next(dip, bh, &dent) == 0);
4644 +
4645 +       return -ENOSPC;
4646 +}
4647 +
4648 +/**
4649 + * dirent_fits - See if we can fit a entry in this buffer
4650 + * @dip: The GFS inode
4651 + * @bh: The buffer
4652 + * @name_len: The length of the name
4653 + *
4654 + * Returns: TRUE if it can fit, FALSE otherwise
4655 + */
4656 +
4657 +static int
4658 +dirent_fits(struct gfs_inode *dip, struct buffer_head *bh, int name_len)
4659 +{
4660 +       struct gfs_dirent *dent;
4661 +       unsigned int rec_len = GFS_DIRENT_SIZE(name_len);
4662 +       unsigned int entries = 0, x = 0;
4663 +       int type;
4664 +
4665 +       type = dirent_first(dip, bh, &dent);
4666 +
4667 +       if (type == IS_LEAF) {
4668 +               struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data;
4669 +               entries = gfs16_to_cpu(leaf->lf_entries);
4670 +       } else {
4671 +               struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data;
4672 +               entries = gfs32_to_cpu(dinode->di_entries);
4673 +       }
4674 +
4675 +       if (!entries)
4676 +               return TRUE;
4677 +
4678 +       do {
4679 +               uint32_t cur_rec_len, cur_name_len;
4680 +
4681 +               cur_rec_len = gfs16_to_cpu(dent->de_rec_len);
4682 +               cur_name_len = gfs16_to_cpu(dent->de_name_len);
4683 +
4684 +               if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) ||
4685 +                   (cur_rec_len >= GFS_DIRENT_SIZE(cur_name_len) + rec_len))
4686 +                       return TRUE;
4687 +
4688 +               GFS_ASSERT_INODE(x < entries, dip,);
4689 +
4690 +               if (dent->de_inum.no_formal_ino)
4691 +                       x++;
4692 +       }
4693 +       while (dirent_next(dip, bh, &dent) == 0);
4694 +
4695 +       return FALSE;
4696 +}
4697 +
4698 +/**
4699 + * leaf_search
4700 + * @bh:
4701 + * @filename:
4702 + * @dent_out:
4703 + * @dent_prev:
4704 + *
4705 + * Returns:
4706 + */
4707 +
4708 +static int
4709 +leaf_search(struct gfs_inode *dip,
4710 +           struct buffer_head *bh, struct qstr *filename,
4711 +           struct gfs_dirent **dent_out, struct gfs_dirent **dent_prev)
4712 +{
4713 +       uint32_t hash;
4714 +       struct gfs_dirent *dent, *prev = NULL;
4715 +       unsigned int entries = 0, x = 0;
4716 +       int type;
4717 +
4718 +       type = dirent_first(dip, bh, &dent);
4719 +
4720 +       if (type == IS_LEAF) {
4721 +               struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data;
4722 +               entries = gfs16_to_cpu(leaf->lf_entries);
4723 +       } else if (type == IS_DINODE) {
4724 +               struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data;
4725 +               entries = gfs32_to_cpu(dinode->di_entries);
4726 +       }
4727 +
4728 +       hash = gfs_dir_hash(filename->name, filename->len);
4729 +
4730 +       do {
4731 +               if (!dent->de_inum.no_formal_ino) {
4732 +                       prev = dent;
4733 +                       continue;
4734 +               }
4735 +
4736 +               if (gfs32_to_cpu(dent->de_hash) == hash &&
4737 +                   gfs_filecmp(filename, (char *)(dent + 1),
4738 +                               gfs16_to_cpu(dent->de_name_len))) {
4739 +                       *dent_out = dent;
4740 +                       if (dent_prev)
4741 +                               *dent_prev = prev;
4742 +
4743 +                       return 0;
4744 +               }
4745 +
4746 +               GFS_ASSERT_INODE(x < entries, dip,);
4747 +               x++;
4748 +               prev = dent;
4749 +       }
4750 +       while (dirent_next(dip, bh, &dent) == 0);
4751 +
4752 +       return -ENOENT;
4753 +}
4754 +
4755 +/**
4756 + * get_leaf - Get leaf
4757 + * @dip:
4758 + * @leaf_no:
4759 + * @bh_out:
4760 + *
4761 + * Returns: 0 on success, error code otherwise
4762 + */
4763 +
4764 +static int
4765 +get_leaf(struct gfs_inode *dip, uint64_t leaf_no, struct buffer_head **bhp)
4766 +{
4767 +       struct gfs_sbd *sdp = dip->i_sbd;
4768 +       int error;
4769 +
4770 +       error = gfs_dread(sdp, leaf_no, dip->i_gl, DIO_START | DIO_WAIT, bhp);
4771 +       if (!error)
4772 +               gfs_metatype_check(sdp, *bhp, GFS_METATYPE_LF);
4773 +
4774 +       return error;
4775 +}
4776 +
4777 +/**
4778 + * get_leaf_nr - Get a leaf number associated with the index
4779 + * @dip: The GFS inode
4780 + * @index:
4781 + * @leaf_out:
4782 + *
4783 + * Returns: 0 on success, error code otherwise
4784 + */
4785 +
4786 +static int
4787 +get_leaf_nr(struct gfs_inode *dip, uint32_t index, uint64_t *leaf_out)
4788 +{
4789 +       uint64_t leaf_no;
4790 +       int error;
4791 +
4792 +       error = gfs_internal_read(dip, (char *)&leaf_no,
4793 +                                 index * sizeof(uint64_t),
4794 +                                 sizeof(uint64_t));
4795 +       if (error != sizeof(uint64_t))
4796 +               return (error < 0) ? error : -EIO;
4797 +
4798 +       *leaf_out = gfs64_to_cpu(leaf_no);
4799 +
4800 +       return 0;
4801 +}
4802 +
4803 +/**
4804 + * get_first_leaf - Get first leaf
4805 + * @dip: The GFS inode
4806 + * @index:
4807 + * @bh_out:
4808 + *
4809 + * Returns: 0 on success, error code otherwise
4810 + */
4811 +
4812 +static int
4813 +get_first_leaf(struct gfs_inode *dip, uint32_t index,
4814 +              struct buffer_head **bh_out)
4815 +{
4816 +       uint64_t leaf_no;
4817 +       int error;
4818 +
4819 +       error = get_leaf_nr(dip, index, &leaf_no);
4820 +       if (!error)
4821 +               error = get_leaf(dip, leaf_no, bh_out);
4822 +
4823 +       return error;
4824 +}
4825 +
4826 +/**
4827 + * get_next_leaf - Get next leaf
4828 + * @dip: The GFS inode
4829 + * @bh_in: The buffer
4830 + * @bh_out:
4831 + *
4832 + * Returns: 0 on success, error code otherwise
4833 + */
4834 +
4835 +static int
4836 +get_next_leaf(struct gfs_inode *dip, struct buffer_head *bh_in,
4837 +             struct buffer_head **bh_out)
4838 +{
4839 +       struct gfs_leaf *leaf;
4840 +       int error;
4841 +
4842 +       leaf = (struct gfs_leaf *)bh_in->b_data;
4843 +
4844 +       if (!leaf->lf_next)
4845 +               error = -ENOENT;
4846 +       else
4847 +               error = get_leaf(dip, gfs64_to_cpu(leaf->lf_next), bh_out);
4848 +
4849 +       return error;
4850 +}
4851 +
4852 +/**
4853 + * linked_leaf_search - Linked leaf search
4854 + * @dip: The GFS inode
4855 + * @filename: The filename to search for
4856 + * @dent_out:
4857 + * @dent_prev:
4858 + * @bh_out:
4859 + *
4860 + * Returns: 0 on sucess, error code otherwise
4861 + */
4862 +
4863 +static int
4864 +linked_leaf_search(struct gfs_inode *dip, struct qstr *filename,
4865 +                  struct gfs_dirent **dent_out, struct gfs_dirent **dent_prev,
4866 +                  struct buffer_head **bh_out)
4867 +{
4868 +       struct buffer_head *bh = NULL, *bh_next;
4869 +       uint32_t hsize, index;
4870 +       uint32_t hash;
4871 +       int error;
4872 +
4873 +       hsize = 1 << dip->i_di.di_depth;
4874 +       GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
4875 +
4876 +       /*  Figure out the address of the leaf node.  */
4877 +
4878 +       hash = gfs_dir_hash(filename->name, filename->len);
4879 +       index = hash >> (32 - dip->i_di.di_depth);
4880 +
4881 +       error = get_first_leaf(dip, index, &bh_next);
4882 +       if (error)
4883 +               return error;
4884 +
4885 +       /*  Find the entry  */
4886 +
4887 +       do {
4888 +               if (bh)
4889 +                       brelse(bh);
4890 +
4891 +               bh = bh_next;
4892 +
4893 +               error = leaf_search(dip, bh, filename, dent_out, dent_prev);
4894 +               switch (error) {
4895 +               case 0:
4896 +                       *bh_out = bh;
4897 +                       return 0;
4898 +
4899 +               case -ENOENT:
4900 +                       break;
4901 +
4902 +               default:
4903 +                       brelse(bh);
4904 +                       return error;
4905 +               }
4906 +
4907 +               error = get_next_leaf(dip, bh, &bh_next);
4908 +       }
4909 +       while (!error);
4910 +
4911 +       brelse(bh);
4912 +
4913 +       return error;
4914 +}
4915 +
4916 +/**
4917 + * dir_make_exhash - Convet a stuffed directory into an ExHash directory
4918 + * @dip: The GFS inode
4919 + *
4920 + * Returns: 0 on success, error code otherwise
4921 + */
4922 +
4923 +static int
4924 +dir_make_exhash(struct gfs_inode *dip)
4925 +{
4926 +       struct gfs_sbd *sdp = dip->i_sbd;
4927 +       struct gfs_dirent *dent;
4928 +       struct buffer_head *bh, *dibh;
4929 +       struct gfs_leaf *leaf;
4930 +       int y;
4931 +       uint32_t x;
4932 +       uint64_t *lp, bn;
4933 +       int error;
4934 +
4935 +       error = gfs_get_inode_buffer(dip, &dibh);
4936 +       if (error)
4937 +               return error;
4938 +
4939 +       /*  Allocate a new block for the first leaf node  */
4940 +
4941 +       error = gfs_metaalloc(dip, &bn);
4942 +       if (error)
4943 +               goto fail;
4944 +
4945 +       /*  Turn over a new leaf  */
4946 +
4947 +       error = gfs_dread(sdp, bn, dip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh);
4948 +       if (error)
4949 +               goto fail;
4950 +
4951 +       gfs_trans_add_bh(dip->i_gl, bh);
4952 +       gfs_metatype_set(sdp, bh, GFS_METATYPE_LF, GFS_FORMAT_LF);
4953 +       gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
4954 +
4955 +       /*  Fill in the leaf structure  */
4956 +
4957 +       leaf = (struct gfs_leaf *)bh->b_data;
4958 +
4959 +       GFS_ASSERT_INODE(dip->i_di.di_entries < (1 << 16), dip,);
4960 +
4961 +       leaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE);
4962 +       leaf->lf_entries = cpu_to_gfs16(dip->i_di.di_entries);
4963 +
4964 +       /*  Copy dirents  */
4965 +
4966 +       gfs_buffer_copy_tail(bh, sizeof(struct gfs_leaf), dibh,
4967 +                            sizeof(struct gfs_dinode));
4968 +
4969 +       /*  Find last entry  */
4970 +
4971 +       x = 0;
4972 +       dirent_first(dip, bh, &dent);
4973 +
4974 +       do {
4975 +               if (!dent->de_inum.no_formal_ino)
4976 +                       continue;
4977 +               if (++x == dip->i_di.di_entries)
4978 +                       break;
4979 +       }
4980 +       while (dirent_next(dip, bh, &dent) == 0);
4981 +
4982 +       /*  Adjust the last dirent's record length
4983 +          (Remember that dent still points to the last entry.)  */
4984 +
4985 +       dent->de_rec_len = gfs16_to_cpu(dent->de_rec_len) +
4986 +               sizeof(struct gfs_dinode) -
4987 +               sizeof(struct gfs_leaf);
4988 +       dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len);
4989 +
4990 +       brelse(bh);
4991 +
4992 +       /*  We're done with the new leaf block, now setup the new
4993 +           hash table.  */
4994 +
4995 +       gfs_trans_add_bh(dip->i_gl, dibh);
4996 +       gfs_buffer_clear_tail(dibh, sizeof (struct gfs_dinode));
4997 +
4998 +       lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode));
4999 +
5000 +       for (x = sdp->sd_hash_ptrs; x--; lp++)
5001 +               *lp = cpu_to_gfs64(bn);
5002 +
5003 +       dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
5004 +       dip->i_di.di_blocks++;
5005 +       dip->i_di.di_flags |= GFS_DIF_EXHASH;
5006 +       dip->i_di.di_payload_format = 0;
5007 +
5008 +       for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
5009 +       dip->i_di.di_depth = y;
5010 +
5011 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
5012 +
5013 +       brelse(dibh);
5014 +
5015 +       return 0;
5016 +
5017 + fail:
5018 +       brelse(dibh);
5019 +       return error;
5020 +}
5021 +
5022 +/**
5023 + * dir_split_leaf - Split a leaf block into two
5024 + * @dip: The GFS inode
5025 + * @index:
5026 + * @leaf_no:
5027 + *
5028 + * Returns: 0 on success, error code on failure
5029 + */
5030 +
5031 +static int
5032 +dir_split_leaf(struct gfs_inode *dip, uint32_t index, uint64_t leaf_no)
5033 +{
5034 +       struct gfs_sbd *sdp = dip->i_sbd;
5035 +       struct buffer_head *nbh, *obh, *dibh;
5036 +       struct gfs_leaf *nleaf, *oleaf;
5037 +       struct gfs_dirent *dent, *prev = NULL, *next = NULL, *new;
5038 +       uint32_t start, len, half_len, divider;
5039 +       uint64_t bn, *lp;
5040 +       uint32_t name_len;
5041 +       int x, moved = FALSE;
5042 +       int error;
5043 +
5044 +       /*  Allocate the new leaf block  */
5045 +
5046 +       error = gfs_metaalloc(dip, &bn);
5047 +       if (error)
5048 +               return error;
5049 +
5050 +       /*  Get the new leaf block  */
5051 +
5052 +       error = gfs_dread(sdp, bn, dip->i_gl,
5053 +                         DIO_NEW | DIO_START | DIO_WAIT, &nbh);
5054 +       if (error)
5055 +               return error;
5056 +
5057 +       gfs_trans_add_bh(dip->i_gl, nbh);
5058 +       gfs_metatype_set(sdp, nbh, GFS_METATYPE_LF, GFS_FORMAT_LF);
5059 +       gfs_buffer_clear_tail(nbh, sizeof (struct gfs_meta_header));
5060 +
5061 +       nleaf = (struct gfs_leaf *)nbh->b_data;
5062 +
5063 +       nleaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE);
5064 +
5065 +       /*  Get the old leaf block  */
5066 +
5067 +       error = get_leaf(dip, leaf_no, &obh);
5068 +       if (error)
5069 +               goto fail;
5070 +
5071 +       gfs_trans_add_bh(dip->i_gl, obh);
5072 +
5073 +       oleaf = (struct gfs_leaf *)obh->b_data;
5074 +
5075 +       /*  Compute the start and len of leaf pointers in the hash table.  */
5076 +
5077 +       len = 1 << (dip->i_di.di_depth - gfs16_to_cpu(oleaf->lf_depth));
5078 +       GFS_ASSERT_INODE(len != 1, dip,);
5079 +       half_len = len >> 1;
5080 +
5081 +       start = (index & ~(len - 1));
5082 +
5083 +       /*  Change the pointers.
5084 +          Don't bother distinguishing stuffed from non-stuffed.
5085 +          This code is complicated enough already.  */
5086 +
5087 +       lp = gmalloc(half_len * sizeof(uint64_t));
5088 +
5089 +       error = gfs_internal_read(dip, (char *)lp, start * sizeof(uint64_t),
5090 +                                 half_len * sizeof(uint64_t));
5091 +       if (error != half_len * sizeof(uint64_t)) {
5092 +               if (error >= 0)
5093 +                       error = -EIO;
5094 +               goto fail_lpfree;
5095 +       }
5096 +
5097 +       /*  Change the pointers  */
5098 +
5099 +       for (x = 0; x < half_len; x++)
5100 +               lp[x] = cpu_to_gfs64(bn);
5101 +
5102 +       error = gfs_internal_write(dip, (char *)lp, start * sizeof(uint64_t),
5103 +                                  half_len * sizeof(uint64_t));
5104 +       if (error != half_len * sizeof(uint64_t)) {
5105 +               if (error >= 0)
5106 +                       error = -EIO;
5107 +               goto fail_lpfree;
5108 +       }
5109 +
5110 +       kfree(lp);
5111 +
5112 +       /*  Compute the divider  */
5113 +
5114 +       divider = (start + half_len) << (32 - dip->i_di.di_depth);
5115 +
5116 +       /*  Copy the entries  */
5117 +
5118 +       dirent_first(dip, obh, &dent);
5119 +
5120 +       do {
5121 +               next = dent;
5122 +               if (dirent_next(dip, obh, &next))
5123 +                       next = NULL;
5124 +
5125 +               if (dent->de_inum.no_formal_ino &&
5126 +                   gfs32_to_cpu(dent->de_hash) < divider) {
5127 +                       name_len = gfs16_to_cpu(dent->de_name_len);
5128 +
5129 +                       error = gfs_dirent_alloc(dip, nbh, name_len, &new);
5130 +                       GFS_ASSERT_INODE(!error, dip,);
5131 +
5132 +                       new->de_inum = dent->de_inum; /* No endianness worries */
5133 +                       new->de_hash = dent->de_hash; /* No endianness worries */
5134 +                       new->de_type = dent->de_type; /* No endianness worries */
5135 +                       memcpy((char *)(new + 1), (char *)(dent + 1),
5136 +                              name_len);
5137 +
5138 +                       nleaf->lf_entries = gfs16_to_cpu(nleaf->lf_entries) + 1;
5139 +                       nleaf->lf_entries = cpu_to_gfs16(nleaf->lf_entries);
5140 +
5141 +                       dirent_del(dip, obh, prev, dent);
5142 +
5143 +                       GFS_ASSERT_INODE(gfs16_to_cpu(oleaf->lf_entries), dip,);
5144 +                       oleaf->lf_entries = gfs16_to_cpu(oleaf->lf_entries) - 1;
5145 +                       oleaf->lf_entries = cpu_to_gfs16(oleaf->lf_entries);
5146 +
5147 +                       if (!prev)
5148 +                               prev = dent;
5149 +
5150 +                       moved = TRUE;
5151 +               } else
5152 +                       prev = dent;
5153 +
5154 +               dent = next;
5155 +       }
5156 +       while (dent);
5157 +
5158 +       /* If none of the entries got moved into the new leaf,
5159 +          artificially fill in the first entry. */
5160 +
5161 +       if (!moved) {
5162 +               error = gfs_dirent_alloc(dip, nbh, 0, &new);
5163 +               GFS_ASSERT_INODE(!error, dip,);
5164 +               new->de_inum.no_formal_ino = 0;
5165 +       }
5166 +
5167 +       oleaf->lf_depth = gfs16_to_cpu(oleaf->lf_depth) + 1;
5168 +       oleaf->lf_depth = cpu_to_gfs16(oleaf->lf_depth);
5169 +       nleaf->lf_depth = oleaf->lf_depth;
5170 +
5171 +       error = gfs_get_inode_buffer(dip, &dibh);
5172 +       GFS_ASSERT_INODE(!error, dip,); /* Pinned in gfs_internal_write() */
5173 +
5174 +       dip->i_di.di_blocks++;
5175 +
5176 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
5177 +       brelse(dibh);
5178 +
5179 +       brelse(obh);
5180 +       brelse(nbh);
5181 +
5182 +       return 0;
5183 +
5184 + fail_lpfree:
5185 +       kfree(lp);
5186 +
5187 +       brelse(obh);
5188 +
5189 + fail:
5190 +       brelse(nbh);
5191 +       return error;
5192 +}
5193 +
5194 +/**
5195 + * dir_double_exhash - Double size of ExHash table
5196 + * @dip: The GFS dinode
5197 + *
5198 + * Returns: 0 on success, error code on failure
5199 + */
5200 +
5201 +static int
5202 +dir_double_exhash(struct gfs_inode *dip)
5203 +{
5204 +       struct gfs_sbd *sdp = dip->i_sbd;
5205 +       struct buffer_head *dibh;
5206 +       uint32_t hsize;
5207 +       uint64_t *buf;
5208 +       uint64_t *from, *to;
5209 +       uint64_t block;
5210 +       int x;
5211 +       int error = 0;
5212 +
5213 +       hsize = 1 << dip->i_di.di_depth;
5214 +       GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
5215 +
5216 +       /*  Allocate both the "from" and "to" buffers in one big chunk  */
5217 +
5218 +       buf = gmalloc(3 * sdp->sd_hash_bsize);
5219 +
5220 +       for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
5221 +               error = gfs_internal_read(dip, (char *)buf,
5222 +                                         block * sdp->sd_hash_bsize,
5223 +                                         sdp->sd_hash_bsize);
5224 +               if (error != sdp->sd_hash_bsize) {
5225 +                       if (error >= 0)
5226 +                               error = -EIO;
5227 +                       goto fail;
5228 +               }
5229 +
5230 +               from = buf;
5231 +               to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
5232 +
5233 +               for (x = sdp->sd_hash_ptrs; x--; from++) {
5234 +                       *to++ = *from;  /*  No endianess worries  */
5235 +                       *to++ = *from;
5236 +               }
5237 +
5238 +               error = gfs_internal_write(dip, (char *)buf + sdp->sd_hash_bsize,
5239 +                                          block * sdp->sd_sb.sb_bsize,
5240 +                                          sdp->sd_sb.sb_bsize);
5241 +               if (error != sdp->sd_sb.sb_bsize) {
5242 +                       if (error >= 0)
5243 +                               error = -EIO;
5244 +                       goto fail;
5245 +               }
5246 +       }
5247 +
5248 +       kfree(buf);
5249 +
5250 +       error = gfs_get_inode_buffer(dip, &dibh);
5251 +       GFS_ASSERT_INODE(!error, dip,); /* Pinned in gfs_internal_write() */
5252 +
5253 +       dip->i_di.di_depth++;
5254 +
5255 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
5256 +       brelse(dibh);
5257 +
5258 +       return 0;
5259 +
5260 + fail:
5261 +       kfree(buf);
5262 +
5263 +       return error;
5264 +}
5265 +
5266 +/**
5267 + * compare_dents - compare directory entries by hash value
5268 + * @a: first dent
5269 + * @b: second dent
5270 + *
5271 + * When comparing the hash entries of @a to @b:
5272 + *   gt: returns 1
5273 + *   lt: returns -1
5274 + *   eq: returns 0
5275 + */
5276 +
5277 +static int
5278 +compare_dents(const void *a, const void *b)
5279 +{
5280 +       struct gfs_dirent *dent_a, *dent_b;
5281 +       uint32_t hash_a, hash_b;
5282 +       int ret = 0;
5283 +
5284 +       dent_a = *(struct gfs_dirent **)a;
5285 +       hash_a = dent_a->de_hash;
5286 +       hash_a = gfs32_to_cpu(hash_a);
5287 +
5288 +       dent_b = *(struct gfs_dirent **)b;
5289 +       hash_b = dent_b->de_hash;
5290 +       hash_b = gfs32_to_cpu(hash_b);
5291 +
5292 +       if (hash_a > hash_b)
5293 +               ret = 1;
5294 +       else if (hash_a < hash_b)
5295 +               ret = -1;
5296 +       else {
5297 +               unsigned int len_a = gfs16_to_cpu(dent_a->de_name_len);
5298 +               unsigned int len_b = gfs16_to_cpu(dent_b->de_name_len);
5299 +
5300 +               if (len_a > len_b)
5301 +                       ret = 1;
5302 +               else if (len_a < len_b)
5303 +                       ret = -1;
5304 +               else
5305 +                       ret = memcmp((char *)(dent_a + 1),
5306 +                                    (char *)(dent_b + 1),
5307 +                                    len_a);
5308 +       }
5309 +
5310 +       return ret;
5311 +}
5312 +
5313 +/**
5314 + * do_filldir_main - read out directory entries
5315 + * @dip: The GFS inode
5316 + * @offset: The offset in the file to read from
5317 + * @opaque: opaque data to pass to filldir
5318 + * @filldir: The function to pass entries to
5319 + * @darr: an array of struct gfs_dirent pointers to read
5320 + * @entries: the number of entries in darr
5321 + * @copied: pointer to int that's non-zero if a entry has been copied out
5322 + *
5323 + * Jump through some hoops to make sure that if there are hash collsions,
5324 + * they are read out at the beginning of a buffer.  We want to minimize
5325 + * the possibility that they will fall into different readdir buffers or
5326 + * that someone will want to seek to that location.
5327 + *
5328 + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir
5329 + */
5330 +
5331 +static int
5332 +do_filldir_main(struct gfs_inode *dip, uint64_t *offset,
5333 +               void *opaque, gfs_filldir_t filldir,
5334 +               struct gfs_dirent **darr, uint32_t entries, int *copied)
5335 +{
5336 +       struct gfs_dirent *dent, *dent_next;
5337 +       struct gfs_inum inum;
5338 +       uint64_t off, off_next;
5339 +       unsigned int x, y;
5340 +       int run = FALSE;
5341 +       int error = 0;
5342 +
5343 +       gfs_sort(darr, entries, sizeof(struct gfs_dirent *), compare_dents);
5344 +
5345 +       dent_next = darr[0];
5346 +       off_next = gfs32_to_cpu(dent_next->de_hash);
5347 +       off_next = gfs_dir_hash2offset(off_next);
5348 +
5349 +       for (x = 0, y = 1; x < entries; x++, y++) {
5350 +               dent = dent_next;
5351 +               off = off_next;
5352 +
5353 +               if (y < entries) {
5354 +                       dent_next = darr[y];
5355 +                       off_next = gfs32_to_cpu(dent_next->de_hash);
5356 +                       off_next = gfs_dir_hash2offset(off_next);
5357 +
5358 +                       if (off < *offset)
5359 +                               continue;
5360 +                       *offset = off;
5361 +
5362 +                       if (off_next == off) {
5363 +                               if (*copied && !run)
5364 +                                       return 1;
5365 +                               run = TRUE;
5366 +                       } else
5367 +                               run = FALSE;
5368 +               } else {
5369 +                       if (off < *offset)
5370 +                               continue;
5371 +                       *offset = off;
5372 +               }
5373 +
5374 +               gfs_inum_in(&inum, (char *)&dent->de_inum);
5375 +
5376 +               error = filldir(opaque, (char *)(dent + 1),
5377 +                               gfs16_to_cpu(dent->de_name_len),
5378 +                               off, &inum,
5379 +                               gfs16_to_cpu(dent->de_type));
5380 +               if (error)
5381 +                       return 1;
5382 +
5383 +               *copied = TRUE;
5384 +       }
5385 +
5386 +       /* Increment the *offset by one, so the next time we come into the do_filldir fxn,
5387 +          we get the next entry instead of the last one in the current leaf */
5388 +
5389 +       (*offset)++;
5390 +
5391 +       return 0;
5392 +}
5393 +
5394 +/**
5395 + * do_filldir_single - Read directory entries out of a single block
5396 + * @dip: The GFS inode
5397 + * @offset: The offset in the file to read from
5398 + * @opaque: opaque data to pass to filldir
5399 + * @filldir: The function to pass entries to
5400 + * @bh: the block
5401 + * @entries: the number of entries in the block
5402 + * @copied: pointer to int that's non-zero if a entry has been copied out
5403 + *
5404 + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir
5405 + */
5406 +
5407 +static int
5408 +do_filldir_single(struct gfs_inode *dip, uint64_t *offset,
5409 +                 void *opaque, gfs_filldir_t filldir,
5410 +                 struct buffer_head *bh, uint32_t entries, int *copied)
5411 +{
5412 +       struct gfs_dirent **darr;
5413 +       struct gfs_dirent *de;
5414 +       unsigned int e = 0;
5415 +       int error = 0;
5416 +
5417 +       if (!entries)
5418 +               return 0;
5419 +
5420 +       darr = gmalloc(entries * sizeof(struct gfs_dirent *));
5421 +
5422 +       dirent_first(dip, bh, &de);
5423 +       do {
5424 +               if (!de->de_inum.no_formal_ino)
5425 +                       continue;
5426 +               darr[e++] = de;
5427 +       }
5428 +       while (dirent_next(dip, bh, &de) == 0);
5429 +
5430 +       GFS_ASSERT_INODE(e == entries, dip,);
5431 +
5432 +       error = do_filldir_main(dip, offset, opaque, filldir, darr,
5433 +                               entries, copied);
5434 +
5435 +       kfree(darr);
5436 +
5437 +       return error;
5438 +}
5439 +
5440 +/**
5441 + * do_filldir_multi - Read directory entries out of a linked leaf list
5442 + * @dip: The GFS inode
5443 + * @offset: The offset in the file to read from
5444 + * @opaque: opaque data to pass to filldir
5445 + * @filldir: The function to pass entries to
5446 + * @bh: the first leaf in the list
5447 + * @copied: pointer to int that's non-zero if a entry has been copied out
5448 + *
5449 + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir
5450 + */
5451 +
5452 +static int
5453 +do_filldir_multi(struct gfs_inode *dip, uint64_t *offset,
5454 +                void *opaque, gfs_filldir_t filldir,
5455 +                struct buffer_head *bh, int *copied)
5456 +{
5457 +       struct buffer_head **larr = NULL;
5458 +       struct gfs_dirent **darr;
5459 +       struct gfs_leaf *leaf;
5460 +       struct buffer_head *tmp_bh;
5461 +       struct gfs_dirent *de;
5462 +       unsigned int entries, e = 0;
5463 +       unsigned int leaves = 0, l = 0;
5464 +       unsigned int x;
5465 +       uint64_t ln;
5466 +       int error = 0;
5467 +
5468 +       /*  Count leaves and entries  */
5469 +
5470 +       leaf = (struct gfs_leaf *)bh->b_data;
5471 +       entries = gfs16_to_cpu(leaf->lf_entries);
5472 +       ln = leaf->lf_next;
5473 +
5474 +       while (ln) {
5475 +               ln = gfs64_to_cpu(ln);
5476 +
5477 +               error = get_leaf(dip, ln, &tmp_bh);
5478 +               if (error)
5479 +                       return error;
5480 +
5481 +               leaf = (struct gfs_leaf *)tmp_bh->b_data;
5482 +               if (leaf->lf_entries) {
5483 +                       entries += gfs16_to_cpu(leaf->lf_entries);
5484 +                       leaves++;
5485 +               }
5486 +               ln = leaf->lf_next;
5487 +
5488 +               brelse(tmp_bh);
5489 +       }
5490 +
5491 +       /*  Bail out if there's nothing to do  */
5492 +
5493 +       if (!entries)
5494 +               return 0;
5495 +
5496 +       /*  Alloc arrays  */
5497 +
5498 +       if (leaves)
5499 +               larr = gmalloc(leaves * sizeof(struct buffer_head *));
5500 +
5501 +       darr = gmalloc(entries * sizeof(struct gfs_dirent *));
5502 +
5503 +       /*  Fill in arrays  */
5504 +
5505 +       leaf = (struct gfs_leaf *)bh->b_data;
5506 +       if (leaf->lf_entries) {
5507 +               dirent_first(dip, bh, &de);
5508 +               do {
5509 +                       if (!de->de_inum.no_formal_ino)
5510 +                               continue;
5511 +                       darr[e++] = de;
5512 +               }
5513 +               while (dirent_next(dip, bh, &de) == 0);
5514 +       }
5515 +       ln = leaf->lf_next;
5516 +
5517 +       while (ln) {
5518 +               ln = gfs64_to_cpu(ln);
5519 +
5520 +               error = get_leaf(dip, ln, &tmp_bh);
5521 +               if (error)
5522 +                       goto out;
5523 +
5524 +               leaf = (struct gfs_leaf *)tmp_bh->b_data;
5525 +               if (leaf->lf_entries) {
5526 +                       dirent_first(dip, tmp_bh, &de);
5527 +                       do {
5528 +                               if (!de->de_inum.no_formal_ino)
5529 +                                       continue;
5530 +                               darr[e++] = de;
5531 +                       }
5532 +                       while (dirent_next(dip, tmp_bh, &de) == 0);
5533 +
5534 +                       larr[l++] = tmp_bh;
5535 +
5536 +                       ln = leaf->lf_next;
5537 +               } else {
5538 +                       ln = leaf->lf_next;
5539 +                       brelse(tmp_bh);
5540 +               }
5541 +       }
5542 +
5543 +       GFS_ASSERT_INODE(l == leaves, dip,);
5544 +       GFS_ASSERT_INODE(e == entries, dip,);
5545 +
5546 +       /*  Do work  */
5547 +
5548 +       error = do_filldir_main(dip, offset, opaque, filldir, darr,
5549 +                               entries, copied);
5550 +
5551 +       /*  Clean up  */
5552 +
5553 + out:
5554 +       kfree(darr);
5555 +
5556 +       for (x = 0; x < l; x++)
5557 +               brelse(larr[x]);
5558 +
5559 +       if (leaves)
5560 +               kfree(larr);
5561 +
5562 +       return error;
5563 +}
5564 +
5565 +/**
5566 + * dir_e_search -
5567 + * @dip: The GFS inode
5568 + * @filename:
5569 + * @inode:
5570 + *
5571 + * Returns:
5572 + */
5573 +
5574 +static int
5575 +dir_e_search(struct gfs_inode *dip, struct qstr *filename,
5576 +            struct gfs_inum *inum, unsigned int *type)
5577 +{
5578 +       struct buffer_head *bh;
5579 +       struct gfs_dirent *dent;
5580 +       int error;
5581 +
5582 +       error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
5583 +       if (error)
5584 +               return error;
5585 +
5586 +       if (inum)
5587 +               gfs_inum_in(inum, (char *)&dent->de_inum);
5588 +       if (type)
5589 +               *type = gfs16_to_cpu(dent->de_type);
5590 +
5591 +       brelse(bh);
5592 +
5593 +       return 0;
5594 +}
5595 +
5596 +/**
5597 + * dir_e_add -
5598 + * @dip: The GFS inode
5599 + * @filename:
5600 + * @inode:
5601 + * @type:
5602 + *
5603 + */
5604 +
5605 +static int
5606 +dir_e_add(struct gfs_inode *dip, struct qstr *filename,
5607 +         struct gfs_inum *inum, unsigned int type)
5608 +{
5609 +       struct gfs_sbd *sdp = dip->i_sbd;
5610 +       struct buffer_head *bh, *nbh, *dibh;
5611 +       struct gfs_leaf *leaf, *nleaf;
5612 +       struct gfs_dirent *dent;
5613 +       uint32_t hsize, index;
5614 +       uint32_t hash;
5615 +       uint64_t leaf_no, bn;
5616 +       int error;
5617 +
5618 + restart:
5619 +       hsize = 1 << dip->i_di.di_depth;
5620 +       GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
5621 +
5622 +       /*  Figure out the address of the leaf node.  */
5623 +
5624 +       hash = gfs_dir_hash(filename->name, filename->len);
5625 +       index = hash >> (32 - dip->i_di.di_depth);
5626 +
5627 +       error = get_leaf_nr(dip, index, &leaf_no);
5628 +       if (error)
5629 +               return error;
5630 +
5631 +       /*  Add entry to the leaf  */
5632 +
5633 +       for (;;) {
5634 +               error = get_leaf(dip, leaf_no, &bh);
5635 +               if (error)
5636 +                       return error;
5637 +
5638 +               leaf = (struct gfs_leaf *)bh->b_data;
5639 +
5640 +               if (gfs_dirent_alloc(dip, bh, filename->len, &dent)) {
5641 +
5642 +                       if (gfs16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
5643 +                               /* Can we split the leaf? */
5644 +
5645 +                               brelse(bh);
5646 +
5647 +                               error = dir_split_leaf(dip, index, leaf_no);
5648 +                               if (error)
5649 +                                       return error;
5650 +
5651 +                               goto restart;
5652 +
5653 +                       } else if (dip->i_di.di_depth < GFS_DIR_MAX_DEPTH) {
5654 +                               /* Can we double the hash table? */
5655 +
5656 +                               brelse(bh);
5657 +
5658 +                               error = dir_double_exhash(dip);
5659 +                               if (error)
5660 +                                       return error;
5661 +
5662 +                               goto restart;
5663 +
5664 +                       } else if (leaf->lf_next) {
5665 +                               /* Can we try the next leaf in the list? */
5666 +                               leaf_no = gfs64_to_cpu(leaf->lf_next);
5667 +                               brelse(bh);
5668 +                               continue;
5669 +
5670 +                       } else {
5671 +                               /* Create a new leaf and add it to the list. */
5672 +
5673 +                               error = gfs_metaalloc(dip, &bn);
5674 +                               if (error) {
5675 +                                       brelse(bh);
5676 +                                       return error;
5677 +                               }
5678 +
5679 +                               error = gfs_dread(sdp, bn, dip->i_gl,
5680 +                                                 DIO_NEW | DIO_START | DIO_WAIT,
5681 +                                                 &nbh);
5682 +                               if (error) {
5683 +                                       brelse(bh);
5684 +                                       return error;
5685 +                               }
5686 +
5687 +                               gfs_trans_add_bh(dip->i_gl, nbh);
5688 +                               gfs_metatype_set(sdp, nbh, GFS_METATYPE_LF,
5689 +                                                GFS_FORMAT_LF);
5690 +                               gfs_buffer_clear_tail(nbh,
5691 +                                                     sizeof(struct gfs_meta_header));
5692 +
5693 +                               gfs_trans_add_bh(dip->i_gl, bh);
5694 +                               leaf->lf_next = cpu_to_gfs64(bn);
5695 +
5696 +                               nleaf = (struct gfs_leaf *)nbh->b_data;
5697 +                               nleaf->lf_depth = leaf->lf_depth;
5698 +                               nleaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE);
5699 +
5700 +                               if (gfs_dirent_alloc(dip, nbh, filename->len, &dent))
5701 +                                       GFS_ASSERT_INODE(FALSE, dip,);
5702 +
5703 +                               dip->i_di.di_blocks++;
5704 +
5705 +                               brelse(bh);
5706 +
5707 +                               bh = nbh;
5708 +                               leaf = nleaf;
5709 +                       }
5710 +               }
5711 +
5712 +               /*  If the gfs_dirent_alloc() succeeded, it pinned the "bh".  */
5713 +
5714 +               gfs_inum_out(inum, (char *)&dent->de_inum);
5715 +               dent->de_hash = cpu_to_gfs32(hash);
5716 +               dent->de_type = cpu_to_gfs16(type);
5717 +               memcpy((char *)(dent + 1), filename->name, filename->len);
5718 +
5719 +               leaf->lf_entries = gfs16_to_cpu(leaf->lf_entries) + 1;
5720 +               leaf->lf_entries = cpu_to_gfs16(leaf->lf_entries);
5721 +
5722 +               brelse(bh);
5723 +
5724 +               error = gfs_get_inode_buffer(dip, &dibh);
5725 +               if (error)
5726 +                       return error;
5727 +
5728 +               dip->i_di.di_entries++;
5729 +               dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5730 +
5731 +               gfs_trans_add_bh(dip->i_gl, dibh);
5732 +               gfs_dinode_out(&dip->i_di, dibh->b_data);
5733 +               brelse(dibh);
5734 +
5735 +               return 0;
5736 +       }
5737 +
5738 +       return -ENOENT;
5739 +}
5740 +
5741 +/**
5742 + * dir_e_del -
5743 + * @dip: The GFS inode
5744 + * @filename:
5745 + *
5746 + * Returns:
5747 + */
5748 +
5749 +static int
5750 +dir_e_del(struct gfs_inode *dip, struct qstr *filename)
5751 +{
5752 +       struct buffer_head *bh, *dibh;
5753 +       struct gfs_dirent *dent, *prev;
5754 +       struct gfs_leaf *leaf;
5755 +       unsigned int entries;
5756 +       int error;
5757 +
5758 +       error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
5759 +       GFS_ASSERT_INODE(error != -ENOENT, dip,);
5760 +       if (error)
5761 +               return error;
5762 +
5763 +       dirent_del(dip, bh, prev, dent); /* Pins bh */
5764 +
5765 +       leaf = (struct gfs_leaf *)bh->b_data;
5766 +       entries = gfs16_to_cpu(leaf->lf_entries);
5767 +       GFS_ASSERT_INODE(entries, dip,);
5768 +       entries--;
5769 +       leaf->lf_entries = cpu_to_gfs16(entries);
5770 +
5771 +       brelse(bh);
5772 +
5773 +       error = gfs_get_inode_buffer(dip, &dibh);
5774 +       if (error)
5775 +               return error;
5776 +
5777 +       GFS_ASSERT_INODE(dip->i_di.di_entries, dip,);
5778 +       dip->i_di.di_entries--;
5779 +       dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5780 +
5781 +       gfs_trans_add_bh(dip->i_gl, dibh);
5782 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
5783 +       brelse(dibh);
5784 +
5785 +       return 0;
5786 +}
5787 +
5788 +/**
5789 + * dir_e_read - Reads the entries from a directory into a filldir buffer
5790 + * @dip: dinode pointer
5791 + * @offset: the hash of the last entry read shifted to the right once
5792 + * @opaque: buffer for the filldir function to fill
5793 + * @filldir: points to the filldir function to use
5794 + *
5795 + */
5796 +
5797 +static int
5798 +dir_e_read(struct gfs_inode *dip, uint64_t *offset, void *opaque,
5799 +          gfs_filldir_t filldir)
5800 +{
5801 +       struct gfs_sbd *sdp = dip->i_sbd;
5802 +       struct buffer_head *bh;
5803 +       struct gfs_leaf leaf;
5804 +       uint32_t hsize, len;
5805 +       uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
5806 +       uint32_t hash, index;
5807 +       uint64_t *lp;
5808 +       int copied = FALSE;
5809 +       int error = 0;
5810 +
5811 +       hsize = 1 << dip->i_di.di_depth;
5812 +       GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
5813 +
5814 +       hash = gfs_dir_offset2hash(*offset);
5815 +       index = hash >> (32 - dip->i_di.di_depth);
5816 +
5817 +       lp = gmalloc(sdp->sd_hash_bsize);
5818 +
5819 +       while (index < hsize) {
5820 +               lp_offset = index & (sdp->sd_hash_ptrs - 1);
5821 +               ht_offset = index - lp_offset;
5822 +
5823 +               if (ht_offset_cur != ht_offset) {
5824 +                       error = gfs_internal_read(dip, (char *)lp,
5825 +                                                 ht_offset * sizeof(uint64_t),
5826 +                                                 sdp->sd_hash_bsize);
5827 +                       if (error != sdp->sd_hash_bsize) {
5828 +                               if (error >= 0)
5829 +                                       error = -EIO;
5830 +                               goto out;
5831 +                       }
5832 +                       ht_offset_cur = ht_offset;
5833 +               }
5834 +
5835 +               error = get_leaf(dip, gfs64_to_cpu(lp[lp_offset]), &bh);
5836 +               if (error)
5837 +                       goto out;
5838 +
5839 +               gfs_leaf_in(&leaf, bh->b_data);
5840 +
5841 +               if (leaf.lf_next)
5842 +                       error = do_filldir_multi(dip, offset,
5843 +                                                opaque, filldir,
5844 +                                                bh, &copied);
5845 +               else
5846 +                       error = do_filldir_single(dip, offset,
5847 +                                                 opaque, filldir,
5848 +                                                 bh, leaf.lf_entries,
5849 +                                                 &copied);
5850 +
5851 +               brelse(bh);
5852 +
5853 +               if (error) {
5854 +                       if (error > 0)
5855 +                               error = 0;
5856 +                       goto out;
5857 +               }
5858 +
5859 +               len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
5860 +               index = (index & ~(len - 1)) + len;
5861 +       }
5862 +
5863 + out:
5864 +       kfree(lp);
5865 +
5866 +       return error;
5867 +}
5868 +
5869 +/**
5870 + * dir_e_mvino -
5871 + * @dip: The GFS inode
5872 + * @filename:
5873 + * @new_inode:
5874 + *
5875 + * Returns:
5876 + */
5877 +
5878 +static int
5879 +dir_e_mvino(struct gfs_inode *dip, struct qstr *filename,
5880 +           struct gfs_inum *inum, unsigned int new_type)
5881 +{
5882 +       struct buffer_head *bh, *dibh;
5883 +       struct gfs_dirent *dent;
5884 +       int error;
5885 +
5886 +       error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
5887 +       GFS_ASSERT_INODE(error != -ENOENT, dip,);
5888 +       if (error)
5889 +               return error;
5890 +
5891 +       gfs_trans_add_bh(dip->i_gl, bh);
5892 +
5893 +       gfs_inum_out(inum, (char *)&dent->de_inum);
5894 +       dent->de_type = cpu_to_gfs16(new_type);
5895 +
5896 +       brelse(bh);
5897 +
5898 +       error = gfs_get_inode_buffer(dip, &dibh);
5899 +       if (error)
5900 +               return error;
5901 +
5902 +       dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5903 +
5904 +       gfs_trans_add_bh(dip->i_gl, dibh);
5905 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
5906 +       brelse(dibh);
5907 +
5908 +       return 0;
5909 +}
5910 +
5911 +/**
5912 + * dir_l_search -
5913 + * @dip: The GFS inode
5914 + * @filename:
5915 + * @inode:
5916 + *
5917 + * Returns:
5918 + */
5919 +
5920 +static int
5921 +dir_l_search(struct gfs_inode *dip, struct qstr *filename,
5922 +            struct gfs_inum *inum, unsigned int *type)
5923 +{
5924 +       struct buffer_head *dibh;
5925 +       struct gfs_dirent *dent;
5926 +       int error;
5927 +
5928 +       GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
5929 +
5930 +       error = gfs_get_inode_buffer(dip, &dibh);
5931 +       if (error)
5932 +               return error;
5933 +
5934 +       error = leaf_search(dip, dibh, filename, &dent, NULL);
5935 +       if (!error) {
5936 +               if (inum)
5937 +                       gfs_inum_in(inum, (char *)&dent->de_inum);
5938 +               if (type)
5939 +                       *type = gfs16_to_cpu(dent->de_type);
5940 +       }
5941 +
5942 +       brelse(dibh);
5943 +
5944 +       return error;
5945 +}
5946 +
5947 +/**
5948 + * dir_l_add -
5949 + * @dip: The GFS inode
5950 + * @filename:
5951 + * @inode:
5952 + * @type:
5953 + *
5954 + * Returns:
5955 + */
5956 +
5957 +static int
5958 +dir_l_add(struct gfs_inode *dip, struct qstr *filename,
5959 +         struct gfs_inum *inum, unsigned int type)
5960 +{
5961 +       struct buffer_head *dibh;
5962 +       struct gfs_dirent *dent;
5963 +       int error;
5964 +
5965 +       GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
5966 +
5967 +       error = gfs_get_inode_buffer(dip, &dibh);
5968 +       if (error)
5969 +               return error;
5970 +
5971 +       if (gfs_dirent_alloc(dip, dibh, filename->len, &dent)) {
5972 +               brelse(dibh);
5973 +
5974 +               error = dir_make_exhash(dip);
5975 +               if (!error)
5976 +                       error = dir_e_add(dip, filename, inum, type);
5977 +
5978 +               return error;
5979 +       }
5980 +
5981 +       /*  gfs_dirent_alloc() pins  */
5982 +
5983 +       gfs_inum_out(inum, (char *)&dent->de_inum);
5984 +       dent->de_hash = gfs_dir_hash(filename->name, filename->len);
5985 +       dent->de_hash = cpu_to_gfs32(dent->de_hash);
5986 +       dent->de_type = cpu_to_gfs16(type);
5987 +       memcpy((char *)(dent + 1), filename->name, filename->len);
5988 +
5989 +       dip->i_di.di_entries++;
5990 +       dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5991 +
5992 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
5993 +       brelse(dibh);
5994 +
5995 +       return 0;
5996 +}
5997 +
5998 +/**
5999 + * dir_l_del -
6000 + * @dip: The GFS inode
6001 + * @filename:
6002 + *
6003 + * Returns:
6004 + */
6005 +
6006 +static int
6007 +dir_l_del(struct gfs_inode *dip, struct qstr *filename)
6008 +{
6009 +       struct buffer_head *dibh;
6010 +       struct gfs_dirent *dent, *prev;
6011 +       int error;
6012 +
6013 +       GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
6014 +
6015 +       error = gfs_get_inode_buffer(dip, &dibh);
6016 +       if (error)
6017 +               return error;
6018 +
6019 +       error = leaf_search(dip, dibh, filename, &dent, &prev);
6020 +       GFS_ASSERT_INODE(!error, dip,);
6021 +
6022 +       dirent_del(dip, dibh, prev, dent);
6023 +
6024 +       /*  dirent_del() pins  */
6025 +
6026 +       GFS_ASSERT_INODE(dip->i_di.di_entries, dip,);
6027 +       dip->i_di.di_entries--;
6028 +
6029 +       dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
6030 +
6031 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
6032 +
6033 +       brelse(dibh);
6034 +
6035 +       return 0;
6036 +}
6037 +
6038 +/**
6039 + * dir_l_read -
6040 + * @dip:
6041 + * @offset:
6042 + * @opaque:
6043 + * @filldir:
6044 + *
6045 + * Returns:
6046 + */
6047 +
6048 +static int
6049 +dir_l_read(struct gfs_inode *dip, uint64_t *offset, void *opaque,
6050 +          gfs_filldir_t filldir)
6051 +{
6052 +       struct buffer_head *dibh;
6053 +       int copied = FALSE;
6054 +       int error;
6055 +
6056 +       GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
6057 +
6058 +       if (!dip->i_di.di_entries)
6059 +               return 0;
6060 +
6061 +       error = gfs_get_inode_buffer(dip, &dibh);
6062 +       if (error)
6063 +               return error;
6064 +
6065 +       error = do_filldir_single(dip, offset,
6066 +                                 opaque, filldir,
6067 +                                 dibh, dip->i_di.di_entries,
6068 +                                 &copied);
6069 +       if (error > 0)
6070 +               error = 0;
6071 +
6072 +       brelse(dibh);
6073 +
6074 +       return error;
6075 +}
6076 +
6077 +/**
6078 + * dir_l_mvino -
6079 + * @dip:
6080 + * @filename:
6081 + * @new_inode:
6082 + *
6083 + * Returns:
6084 + */
6085 +
6086 +static int
6087 +dir_l_mvino(struct gfs_inode *dip, struct qstr *filename,
6088 +           struct gfs_inum *inum, unsigned int new_type)
6089 +{
6090 +       struct buffer_head *dibh;
6091 +       struct gfs_dirent *dent;
6092 +       int error;
6093 +
6094 +       GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
6095 +
6096 +       error = gfs_get_inode_buffer(dip, &dibh);
6097 +       if (error)
6098 +               return error;
6099 +
6100 +       error = leaf_search(dip, dibh, filename, &dent, NULL);
6101 +       GFS_ASSERT_INODE(!error, dip,);
6102 +
6103 +       gfs_trans_add_bh(dip->i_gl, dibh);
6104 +
6105 +       gfs_inum_out(inum, (char *)&dent->de_inum);
6106 +       dent->de_type = cpu_to_gfs16(new_type);
6107 +
6108 +       dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
6109 +
6110 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
6111 +
6112 +       brelse(dibh);
6113 +
6114 +       return 0;
6115 +}
6116 +
6117 +/**
6118 + * gfs_dir_search - Search a directory
6119 + * @dip: The GFS inode
6120 + * @filename:
6121 + * @inode:
6122 + *
6123 + * This routine searches a directory for a file or another directory.
6124 + * Assumes a glock is held on dip.
6125 + *
6126 + * Returns: Inode number if found, -EXXXX on failure.
6127 + */
6128 +
6129 +int
6130 +gfs_dir_search(struct gfs_inode *dip, struct qstr *filename,
6131 +              struct gfs_inum *inum, unsigned int *type)
6132 +{
6133 +       int error;
6134 +
6135 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6136 +
6137 +       if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6138 +               error = dir_e_search(dip, filename, inum, type);
6139 +       else
6140 +               error = dir_l_search(dip, filename, inum, type);
6141 +
6142 +       return error;
6143 +}
6144 +
6145 +/**
6146 + * gfs_dir_add - Add new filename into directory
6147 + * @dip: The GFS inode
6148 + * @filename: The new name
6149 + * @inode: The inode number of the entry
6150 + * @type: The type of the entry
6151 + *
6152 + * Returns: 0 on success, error code on failure
6153 + */
6154 +
6155 +int
6156 +gfs_dir_add(struct gfs_inode *dip, struct qstr *filename,
6157 +           struct gfs_inum *inum, unsigned int type)
6158 +{
6159 +       int error;
6160 +
6161 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6162 +
6163 +       if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6164 +               error = dir_e_add(dip, filename, inum, type);
6165 +       else
6166 +               error = dir_l_add(dip, filename, inum, type);
6167 +
6168 +       return error;
6169 +}
6170 +
6171 +/**
6172 + * gfs_dir_del - Delete a directory entry
6173 + * @dip: The GFS inode
6174 + * @filename: The filename
6175 + *
6176 + * Returns: 0 on success, error code on failure
6177 + */
6178 +
6179 +int
6180 +gfs_dir_del(struct gfs_inode *dip, struct qstr *filename)
6181 +{
6182 +       int error;
6183 +
6184 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6185 +
6186 +       if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6187 +               error = dir_e_del(dip, filename);
6188 +       else
6189 +               error = dir_l_del(dip, filename);
6190 +
6191 +       return error;
6192 +}
6193 +
6194 +/**
6195 + * gfs_dir_read - Translate a GFS filename
6196 + * @dip: The GFS inode
6197 + * @offset:
6198 + * @opaque:
6199 + * @filldir:
6200 + *
6201 + * Returns: 0 on success, error code otherwise
6202 + */
6203 +
6204 +int
6205 +gfs_dir_read(struct gfs_inode *dip, uint64_t * offset, void *opaque,
6206 +            gfs_filldir_t filldir)
6207 +{
6208 +       int error;
6209 +
6210 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6211 +
6212 +       if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6213 +               error = dir_e_read(dip, offset, opaque, filldir);
6214 +       else
6215 +               error = dir_l_read(dip, offset, opaque, filldir);
6216 +
6217 +       return error;
6218 +}
6219 +
6220 +/**
6221 + * gfs_dir_mvino - Change inode number of directory entry
6222 + * @dip: The GFS inode
6223 + * @filename:
6224 + * @new_inode:
6225 + *
6226 + * This routine changes the inode number of a directory entry.  It's used
6227 + * by rename to change ".." when a directory is moved.
6228 + * Assumes a glock is held on dvp.
6229 + *
6230 + * Returns: 0 on success, -EXXXX on failure
6231 + */
6232 +
6233 +int
6234 +gfs_dir_mvino(struct gfs_inode *dip, struct qstr *filename,
6235 +             struct gfs_inum *inum, unsigned int new_type)
6236 +{
6237 +       int error;
6238 +
6239 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6240 +
6241 +       if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6242 +               error = dir_e_mvino(dip, filename, inum, new_type);
6243 +       else
6244 +               error = dir_l_mvino(dip, filename, inum, new_type);
6245 +
6246 +       return error;
6247 +}
6248 +
6249 +/**
6250 + * foreach_leaf - call a function for each leaf in a directory
6251 + * @dip: the directory
6252 + * @lc: the function to call for each each
6253 + * @data: private data to pass to it
6254 + *
6255 + * Returns: 0 on success, -EXXX on failure
6256 + */
6257 +
6258 +static int
6259 +foreach_leaf(struct gfs_inode *dip, leaf_call_t lc, void *data)
6260 +{
6261 +       struct gfs_sbd *sdp = dip->i_sbd;
6262 +       struct buffer_head *bh;
6263 +       struct gfs_leaf leaf;
6264 +       uint32_t hsize, len;
6265 +       uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
6266 +       uint32_t index = 0;
6267 +       uint64_t *lp;
6268 +       uint64_t leaf_no;
6269 +       int error = 0;
6270 +
6271 +       GFS_ASSERT_INODE(dip->i_di.di_flags & GFS_DIF_EXHASH, dip,);
6272 +       hsize = 1 << dip->i_di.di_depth;
6273 +       GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
6274 +
6275 +       lp = gmalloc(sdp->sd_hash_bsize);
6276 +
6277 +       while (index < hsize) {
6278 +               lp_offset = index & (sdp->sd_hash_ptrs - 1);
6279 +               ht_offset = index - lp_offset;
6280 +
6281 +               if (ht_offset_cur != ht_offset) {
6282 +                       error = gfs_internal_read(dip, (char *)lp,
6283 +                                                 ht_offset * sizeof(uint64_t),
6284 +                                                 sdp->sd_hash_bsize);
6285 +                       if (error != sdp->sd_hash_bsize) {
6286 +                               if (error >= 0)
6287 +                                       error = -EIO;
6288 +                               goto out;
6289 +                       }
6290 +                       ht_offset_cur = ht_offset;
6291 +               }
6292 +
6293 +               leaf_no = gfs64_to_cpu(lp[lp_offset]);
6294 +               if (leaf_no) {
6295 +                       error = get_leaf(dip, leaf_no, &bh);
6296 +                       if (error)
6297 +                               goto out;
6298 +                       gfs_leaf_in(&leaf, bh->b_data);
6299 +                       brelse(bh);
6300 +
6301 +                       len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
6302 +
6303 +                       error = lc(dip, index, len, leaf_no, data);
6304 +                       if (error)
6305 +                               goto out;
6306 +
6307 +                       index = (index & ~(len - 1)) + len;
6308 +               } else
6309 +                       index++;
6310 +       }
6311 +
6312 +       GFS_ASSERT_INODE(index == hsize, dip,);
6313 +
6314 + out:
6315 +       kfree(lp);
6316 +
6317 +       return error;
6318 +}
6319 +
6320 +/**
6321 + * leaf_free - Deallocate a directory leaf
6322 + * @dip: the directory
6323 + * @index: the hash table offset in the directory
6324 + * @len: the number of pointers to this leaf
6325 + * @leaf_no: the leaf number
6326 + * @data: not used
6327 + *
6328 + * Returns: 0 on success, -EXXX on failure
6329 + */
6330 +
6331 +static int
6332 +leaf_free(struct gfs_inode *dip,
6333 +         uint32_t index, uint32_t len,
6334 +         uint64_t leaf_no, void *data)
6335 +{
6336 +       struct gfs_sbd *sdp = dip->i_sbd;
6337 +       struct gfs_holder ri_gh;
6338 +       struct gfs_leaf tmp_leaf;
6339 +       struct gfs_rgrp_list rlist;
6340 +       struct buffer_head *bh, *dibh;
6341 +       uint64_t blk;
6342 +       unsigned int rg_blocks = 0;
6343 +       char *ht;
6344 +       unsigned int x, size = len * sizeof(uint64_t);
6345 +       int error;
6346 +
6347 +       memset(&rlist, 0, sizeof(struct gfs_rgrp_list));
6348 +
6349 +       ht = gmalloc(size);
6350 +       memset(ht, 0, size);
6351 +
6352 +       gfs_alloc_get(dip);
6353 +
6354 +       error = gfs_quota_hold_m(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
6355 +       if (error)
6356 +               goto fail;
6357 +
6358 +       error = gfs_rindex_hold(sdp, &ri_gh);
6359 +       if (error)
6360 +               goto fail_qs;
6361 +
6362 +       /*  Count the number of leaves  */
6363 +
6364 +       for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
6365 +               error = get_leaf(dip, blk, &bh);
6366 +               if (error)
6367 +                       goto fail_rlist;
6368 +               gfs_leaf_in(&tmp_leaf, (bh)->b_data);
6369 +               brelse(bh);
6370 +
6371 +               gfs_rlist_add(sdp, &rlist, blk);
6372 +       }
6373 +
6374 +       gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
6375 +
6376 +       error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
6377 +       if (error)
6378 +               goto fail_rlist;
6379 +
6380 +       for (x = 0; x < rlist.rl_rgrps; x++) {
6381 +               struct gfs_rgrpd *rgd;
6382 +               rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
6383 +               rg_blocks += rgd->rd_ri.ri_length;
6384 +       }
6385 +
6386 +       /* Trans may require:
6387 +          All the bitmaps that were reserved.
6388 +          One block for the dinode.
6389 +          All the hash blocks that will be changed.
6390 +          One block for a quota change. */
6391 +
6392 +       error = gfs_trans_begin(sdp,
6393 +                               rg_blocks + 1 + (DIV_RU(size, sdp->sd_jbsize) + 1),
6394 +                               1);
6395 +       if (error)
6396 +               goto fail_rg_gunlock;
6397 +
6398 +       for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
6399 +               error = get_leaf(dip, blk, &bh);
6400 +               if (error)
6401 +                       goto fail_end_trans;
6402 +               gfs_leaf_in(&tmp_leaf, bh->b_data);
6403 +               brelse(bh);
6404 +
6405 +               gfs_metafree(dip, blk, 1);
6406 +
6407 +               dip->i_di.di_blocks--;
6408 +       }
6409 +
6410 +       error = gfs_internal_write(dip, ht, index * sizeof(uint64_t), size);
6411 +       if (error != size) {
6412 +               if (error >= 0)
6413 +                       error = -EIO;
6414 +               goto fail_end_trans;
6415 +       }
6416 +
6417 +       error = gfs_get_inode_buffer(dip, &dibh);
6418 +       if (error)
6419 +               goto fail_end_trans;
6420 +
6421 +       gfs_trans_add_bh(dip->i_gl, dibh);
6422 +       gfs_dinode_out(&dip->i_di, dibh->b_data);
6423 +       brelse(dibh);
6424 +
6425 +       gfs_trans_end(sdp);
6426 +
6427 +       gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
6428 +       gfs_rlist_free(&rlist);
6429 +       gfs_glock_dq_uninit(&ri_gh);
6430 +       gfs_quota_unhold_m(dip);
6431 +       gfs_alloc_put(dip);
6432 +       kfree(ht);
6433 +
6434 +       return 0;
6435 +
6436 + fail_end_trans:
6437 +       gfs_trans_end(sdp);
6438 +
6439 + fail_rg_gunlock:
6440 +       gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
6441 +
6442 + fail_rlist:
6443 +       gfs_rlist_free(&rlist);
6444 +       gfs_glock_dq_uninit(&ri_gh);
6445 +
6446 + fail_qs:
6447 +       gfs_quota_unhold_m(dip);
6448 +
6449 + fail:
6450 +       gfs_alloc_put(dip);
6451 +       kfree(ht);
6452 +
6453 +       return error;
6454 +}
6455 +
6456 +/**
6457 + * gfs_dir_exhash_free - free all the leaf block in a directory
6458 + * @dip: the directory
6459 + *
6460 + * Returns: 0 on success, -EXXX on failure
6461 + */
6462 +
6463 +int
6464 +gfs_dir_exhash_free(struct gfs_inode *dip)
6465 +{
6466 +       struct gfs_sbd *sdp = dip->i_sbd;
6467 +       struct buffer_head *bh;
6468 +       int error;
6469 +
6470 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6471 +
6472 +       error = foreach_leaf(dip, leaf_free, NULL);
6473 +       if (error)
6474 +               return error;
6475 +
6476 +       /*  Make this a regular file in case we crash.
6477 +          (We don't want to free these blocks a second time.)  */
6478 +
6479 +       error = gfs_trans_begin(sdp, 1, 0);
6480 +       if (error)
6481 +               return error;
6482 +
6483 +       error = gfs_get_inode_buffer(dip, &bh);
6484 +       if (error)
6485 +               goto fail;
6486 +
6487 +       gfs_trans_add_bh(dip->i_gl, bh);
6488 +       ((struct gfs_dinode *)bh->b_data)->di_type = cpu_to_gfs16(GFS_FILE_REG);
6489 +
6490 +       brelse(bh);
6491 +
6492 +       gfs_trans_end(sdp);
6493 +
6494 +       return 0;
6495 +
6496 + fail:
6497 +       gfs_trans_end(sdp);
6498 +       return error;
6499 +}
6500 +
6501 +/**
6502 + * gfs_diradd_alloc_required - figure out if an entry addition is going to require an allocation
6503 + * @ip: the file being written to
6504 + * @filname: the filename that's going to be added
6505 + * @alloc_required: the int is set to TRUE if an alloc is required, FALSE otherwise
6506 + *
6507 + * Returns: 0 on success, -EXXX on error
6508 + */
6509 +
6510 +int
6511 +gfs_diradd_alloc_required(struct gfs_inode *dip, struct qstr *filename,
6512 +                         int *alloc_required)
6513 +{
6514 +       struct buffer_head *bh = NULL, *bh_next;
6515 +       uint32_t hsize, hash, index;
6516 +       int error = 0;
6517 +
6518 +       *alloc_required = FALSE;
6519 +
6520 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6521 +
6522 +       if (dip->i_di.di_flags & GFS_DIF_EXHASH) {
6523 +               hsize = 1 << dip->i_di.di_depth;
6524 +               GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size,
6525 +                                dip,);
6526 +
6527 +               hash = gfs_dir_hash(filename->name, filename->len);
6528 +               index = hash >> (32 - dip->i_di.di_depth);
6529 +
6530 +               error = get_first_leaf(dip, index, &bh_next);
6531 +               if (error)
6532 +                       return error;
6533 +
6534 +               do {
6535 +                       if (bh)
6536 +                               brelse(bh);
6537 +
6538 +                       bh = bh_next;
6539 +
6540 +                       if (dirent_fits(dip, bh, filename->len))
6541 +                               break;
6542 +
6543 +                       error = get_next_leaf(dip, bh, &bh_next);
6544 +                       if (error == -ENOENT) {
6545 +                               *alloc_required = TRUE;
6546 +                               error = 0;
6547 +                               break;
6548 +                       }
6549 +               }
6550 +               while (!error);
6551 +
6552 +               brelse(bh);
6553 +       } else {
6554 +               error = gfs_get_inode_buffer(dip, &bh);
6555 +               if (error)
6556 +                       return error;
6557 +
6558 +               if (!dirent_fits(dip, bh, filename->len))
6559 +                       *alloc_required = TRUE;
6560 +
6561 +               brelse(bh);
6562 +       }
6563 +
6564 +       return error;
6565 +}
6566 +
6567 +/**
6568 + * do_gdm - copy out one leaf (or list of leaves)
6569 + * @dip: the directory
6570 + * @index: the hash table offset in the directory
6571 + * @len: the number of pointers to this leaf
6572 + * @leaf_no: the leaf number
6573 + * @data: a pointer to a struct gfs_user_buffer structure
6574 + *
6575 + * Returns: 0 on success, -EXXX on failure
6576 + */
6577 +
6578 +static int
6579 +do_gdm(struct gfs_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no,
6580 +       void *data)
6581 +{
6582 +       struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data;
6583 +       struct gfs_leaf leaf;
6584 +       struct buffer_head *bh;
6585 +       uint64_t blk;
6586 +       int error = 0;
6587 +
6588 +       for (blk = leaf_no; blk; blk = leaf.lf_next) {
6589 +               error = get_leaf(dip, blk, &bh);
6590 +               if (error)
6591 +                       break;
6592 +
6593 +               gfs_leaf_in(&leaf, bh->b_data);
6594 +
6595 +               error = gfs_add_bh_to_ub(ub, bh);
6596 +
6597 +               brelse(bh);
6598 +
6599 +               if (error)
6600 +                       break;
6601 +       }
6602 +
6603 +       return error;
6604 +}
6605 +
6606 +/**
6607 + * gfs_get_dir_meta - return all the leaf blocks of a directory
6608 + * @dip: the directory
6609 + * @ub: the structure representing the meta
6610 + *
6611 + * Returns: 0 on success, -EXXX on failure
6612 + */
6613 +
6614 +int
6615 +gfs_get_dir_meta(struct gfs_inode *dip, struct gfs_user_buffer *ub)
6616 +{
6617 +       GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6618 +       return foreach_leaf(dip, do_gdm, ub);
6619 +}
6620 diff -urN linux-orig/fs/gfs/dir.h linux-patched/fs/gfs/dir.h
6621 --- linux-orig/fs/gfs/dir.h     1969-12-31 18:00:00.000000000 -0600
6622 +++ linux-patched/fs/gfs/dir.h  2004-06-30 13:27:49.335712986 -0500
6623 @@ -0,0 +1,55 @@
6624 +/******************************************************************************
6625 +*******************************************************************************
6626 +**
6627 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
6628 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
6629 +**
6630 +**  This copyrighted material is made available to anyone wishing to use,
6631 +**  modify, copy, or redistribute it subject to the terms and conditions
6632 +**  of the GNU General Public License v.2.
6633 +**
6634 +*******************************************************************************
6635 +******************************************************************************/
6636 +
6637 +#ifndef __DIR_DOT_H__
6638 +#define __DIR_DOT_H__
6639 +
6640 +/**
6641 + * gfs_filldir_t - Report a directory entry to the caller of gfs_dir_read()
6642 + * @opaque: opaque data used by the function
6643 + * @name: the name of the directory entry
6644 + * @length: the length of the name
6645 + * @offset: the entry's offset in the directory
6646 + * @inum: the inode number the entry points to
6647 + * @type: the type of inode the entry points to
6648 + *
6649 + * Returns: 0 on success, 1 if buffer full
6650 + */
6651 +
6652 +typedef int (*gfs_filldir_t) (void *opaque,
6653 +                             const char *name, unsigned int length,
6654 +                             uint64_t offset,
6655 +                             struct gfs_inum *inum, unsigned int type);
6656 +
6657 +int gfs_filecmp(struct qstr *file1, char *file2, int len_of_file2);
6658 +int gfs_dirent_alloc(struct gfs_inode *dip, struct buffer_head *bh,
6659 +                    int name_len, struct gfs_dirent **dent_out);
6660 +
6661 +int gfs_dir_search(struct gfs_inode *dip, struct qstr *filename,
6662 +                  struct gfs_inum *inum, unsigned int *type);
6663 +int gfs_dir_add(struct gfs_inode *dip, struct qstr *filename,
6664 +               struct gfs_inum *inum, unsigned int type);
6665 +int gfs_dir_del(struct gfs_inode *dip, struct qstr *filename);
6666 +int gfs_dir_read(struct gfs_inode *dip, uint64_t * offset, void *opaque,
6667 +                gfs_filldir_t filldir);
6668 +int gfs_dir_mvino(struct gfs_inode *dip, struct qstr *filename,
6669 +                 struct gfs_inum *new_inum, unsigned int new_type);
6670 +
6671 +int gfs_dir_exhash_free(struct gfs_inode *dip);
6672 +
6673 +int gfs_diradd_alloc_required(struct gfs_inode *dip, struct qstr *filename,
6674 +                             int *alloc_required);
6675 +
6676 +int gfs_get_dir_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
6677 +
6678 +#endif /* __DIR_DOT_H__ */
6679 diff -urN linux-orig/fs/gfs/eattr.c linux-patched/fs/gfs/eattr.c
6680 --- linux-orig/fs/gfs/eattr.c   1969-12-31 18:00:00.000000000 -0600
6681 +++ linux-patched/fs/gfs/eattr.c        2004-06-30 13:27:49.337712522 -0500
6682 @@ -0,0 +1,2340 @@
6683 +/******************************************************************************
6684 +*******************************************************************************
6685 +**
6686 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
6687 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
6688 +**
6689 +**  This copyrighted material is made available to anyone wishing to use,
6690 +**  modify, copy, or redistribute it subject to the terms and conditions
6691 +**  of the GNU General Public License v.2.
6692 +**
6693 +*******************************************************************************
6694 +******************************************************************************/
6695 +
6696 +#include <linux/sched.h>
6697 +#include <linux/slab.h>
6698 +#include <linux/smp_lock.h>
6699 +#include <linux/spinlock.h>
6700 +#include <asm/semaphore.h>
6701 +#include <linux/completion.h>
6702 +#include <linux/buffer_head.h>
6703 +#include <asm/uaccess.h>
6704 +#include <linux/xattr_acl.h>
6705 +
6706 +#include "gfs.h"
6707 +#include "acl.h"
6708 +#include "dio.h"
6709 +#include "eattr.h"
6710 +#include "glock.h"
6711 +#include "inode.h"
6712 +#include "ioctl.h"
6713 +#include "quota.h"
6714 +#include "rgrp.h"
6715 +#include "trans.h"
6716 +
6717 +#define GFS_EA_REC_LEN(x) gfs32_to_cpu((x)->ea_rec_len)
6718 +#define GFS_EA_NAME(x) ((char *)(x) + sizeof(struct gfs_ea_header))
6719 +#define GFS_EA_DATA_PTRS(x) ((uint64_t *)((char *)(x) + sizeof(struct gfs_ea_header) + (((x)->ea_name_len + 7) & ~7)))
6720 +
6721 +#define GFS_EA_NEXT(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_REC_LEN(x))
6722 +#define GFS_EA_FREESPACE(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_SIZE(x))
6723 +
6724 +#define GFS_EAREQ_IS_STUFFED(x, y) (((sizeof(struct gfs_ea_header) + (x)->es_data_len + (x)->es_name_len + 7) & ~7) <= y)
6725 +
6726 +#define GFS_EADATA_NUM_PTRS(x, y) (((x) + (y) - 1) / (y))
6727 +
6728 +#define GFS_EA_SIZE(x) ((sizeof(struct gfs_ea_header) + (x)->ea_name_len + (GFS_EA_IS_UNSTUFFED(x)? (8 * (x)->ea_num_ptrs) : GFS_EA_DATA_LEN(x)) + 7) & ~ 7)
6729 +
6730 +#define GFS_EACMD_VALID(x) ((x) <= GFS_EACMD_REMOVE)
6731 +
6732 +#define GFS_EA_IS_LAST(x) ((x)->ea_flags & GFS_EAFLAG_LAST)
6733 +
6734 +#define GFS_EA_STRLEN(x) ((x)->ea_name_len + 1 + (((x)->ea_type == GFS_EATYPE_USR)? 5 : 7))
6735 +
6736 +#define GFS_FIRST_EA(x) ((struct gfs_ea_header *) ((x)->b_data + sizeof(struct gfs_meta_header)))
6737 +
6738 +#define EA_ALLOC 1
6739 +#define EA_DEALLOC 2
6740 +
6741 +static struct buffer_head *alloc_eattr_blk(struct gfs_sbd *sdp,
6742 +                                          struct gfs_inode *alloc_ip,
6743 +                                          struct gfs_inode *ip,
6744 +                                          uint64_t * block);
6745 +
6746 +/**
6747 + * can_replace - returns true if ea is large enough to hold the data in
6748 + *               the request
6749 + */
6750 +
6751 +static __inline__ int
6752 +can_replace(struct gfs_ea_header *ea, struct gfs_easet_io *req,
6753 +           uint32_t avail_size)
6754 +{
6755 +       int data_space =
6756 +           GFS_EA_REC_LEN(ea) - sizeof (struct gfs_ea_header) -
6757 +           ea->ea_name_len;
6758 +
6759 +       if (GFS_EAREQ_IS_STUFFED(req, avail_size) && !GFS_EA_IS_UNSTUFFED(ea))
6760 +               return (req->es_data_len <= data_space);
6761 +       else
6762 +               return (GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size) <=
6763 +                       ea->ea_num_ptrs);
6764 +}
6765 +
6766 +/**
6767 + * get_req_size - returns the acutal number of bytes the request will take up
6768 + *                (not counting any unstuffed data blocks)
6769 + */
6770 +
6771 +static __inline__ uint32_t
6772 +get_req_size(struct gfs_easet_io *req, uint32_t avail_size)
6773 +{
6774 +       uint32_t size =
6775 +           ((sizeof (struct gfs_ea_header) + req->es_data_len +
6776 +             req->es_name_len + 7) & ~7);
6777 +
6778 +       if (size <= avail_size)
6779 +               return size;
6780 +
6781 +       return ((sizeof (struct gfs_ea_header) + req->es_name_len + 7) & ~7) +
6782 +           (8 * GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size));
6783 +}
6784 +
6785 +/**
6786 + * gfs_ea_write_permission - decides if the user has permission to write to
6787 + *                           the ea
6788 + * @req: the write request
6789 + * @ip: inode of file with the ea
6790 + *
6791 + * Returns: 0 on success, -EXXX on error
6792 + */
6793 +
6794 +int
6795 +gfs_ea_write_permission(struct gfs_easet_io *req, struct gfs_inode *ip)
6796 +{
6797 +       struct inode *inode = gfs_iget(ip, NO_CREATE);
6798 +       int error = 0;
6799 +
6800 +       GFS_ASSERT_INODE(inode, ip,);
6801 +
6802 +       if (req->es_type == GFS_EATYPE_USR) {
6803 +               if (!S_ISREG(inode->i_mode) &&
6804 +                   (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
6805 +                       error = -EPERM;
6806 +               else {
6807 +                       error = permission(inode, MAY_WRITE, NULL);
6808 +                       if (error == -EACCES)
6809 +                               error = -EPERM;
6810 +               }
6811 +       } else if (req->es_type == GFS_EATYPE_SYS) {
6812 +               if (IS_ACCESS_ACL(req->es_name, req->es_name_len))
6813 +                       error = gfs_validate_acl(ip, req->es_data,
6814 +                                       req->es_data_len, 1);
6815 +               else if (IS_DEFAULT_ACL(req->es_name, req->es_name_len))
6816 +                       error = gfs_validate_acl(ip, req->es_data,
6817 +                                       req->es_data_len, 0);
6818 +               else {
6819 +                       if (!capable(CAP_SYS_ADMIN))
6820 +                               error = -EPERM;
6821 +               }
6822 +       } else
6823 +               error = -EOPNOTSUPP;
6824 +
6825 +       iput(inode);
6826 +
6827 +       return error;
6828 +}
6829 +
6830 +/**
6831 + * gfs_ea_read_permission - decides if the user has permission to read from
6832 + *                          the ea
6833 + * @req: the read request
6834 + * @ip: inode of file with the ea
6835 + *
6836 + * Returns: 0 on success, -EXXX on error
6837 + */
6838 +
6839 +int
6840 +gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip)
6841 +{
6842 +       struct inode *inode = gfs_iget(ip, NO_CREATE);
6843 +       int error = 0;
6844 +
6845 +       GFS_ASSERT_INODE(inode, ip,);
6846 +
6847 +       if (req->eg_type == GFS_EATYPE_USR){
6848 +               error = permission(inode, MAY_READ, NULL);
6849 +               if (error == -EACCES)
6850 +                       error = -EPERM;
6851 +       }
6852 +       else if (req->eg_type == GFS_EATYPE_SYS) {
6853 +               if (IS_ACCESS_ACL(req->eg_name, req->eg_name_len) ||
6854 +                   IS_DEFAULT_ACL(req->eg_name, req->eg_name_len))
6855 +                       error = 0;
6856 +               else{
6857 +                       if (!capable(CAP_SYS_ADMIN))
6858 +                               error = -EPERM;
6859 +               }
6860 +       } else
6861 +               error = -EOPNOTSUPP;
6862 +
6863 +       iput(inode);
6864 +
6865 +       return error;
6866 +}
6867 +
6868 +/**
6869 + * gfs_es_memcpy - gfs memcpy wrapper with a return value
6870 + *
6871 + */
6872 +
6873 +int
6874 +gfs_ea_memcpy(void *dest, void *src, unsigned long size)
6875 +{
6876 +       memcpy(dest, src, size);
6877 +       return 0;
6878 +}
6879 +
6880 +/**
6881 + * gfs_ea_copy_to_user - copy_to_user wrapper
6882 + */
6883 +
6884 +int
6885 +gfs_ea_copy_to_user(void *dest, void *src, unsigned long size)
6886 +{
6887 +       int error;
6888 +       error = (copy_to_user(dest, src, size)) ? -EFAULT : 0;
6889 +       return error;
6890 +}
6891 +
6892 +/**
6893 + * Returns: 1 if find_direct_eattr should stop checking (if the eattr was found
6894 + *                                                location will be set)
6895 + *          0 if find_eattr should keep on checking
6896 + *          -EXXX on error
6897 + */
6898 +int
6899 +find_direct_eattr(struct gfs_inode *ip, uint64_t blkno, char *name,
6900 +                 int name_len, int type, struct gfs_ea_location *location)
6901 +{
6902 +       int err;
6903 +       struct buffer_head *bh;
6904 +       struct gfs_sbd *sdp = ip->i_sbd;
6905 +       struct gfs_ea_header *curr, *prev = NULL;
6906 +
6907 +       err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
6908 +       if (err)
6909 +               goto out;
6910 +       gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
6911 +       curr =
6912 +           (struct gfs_ea_header *) ((bh)->b_data +
6913 +                                     sizeof (struct gfs_meta_header));
6914 +       if (curr->ea_type == GFS_EATYPE_UNUSED) {
6915 +               if (GFS_EA_IS_LAST(curr))
6916 +                       goto out_drelse;
6917 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
6918 +               prev = curr;
6919 +               curr = GFS_EA_NEXT(curr);
6920 +       }
6921 +       if (type != curr->ea_type && ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
6922 +               if (type == GFS_EATYPE_SYS)
6923 +                       err = 1;
6924 +               goto out_drelse;
6925 +       }
6926 +       while (1) {
6927 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
6928 +
6929 +               if (type == curr->ea_type && name_len == curr->ea_name_len &&
6930 +                   !memcmp(name, GFS_EA_NAME(curr), name_len)) {
6931 +                       location->bh = bh;
6932 +                       location->ea = curr;
6933 +                       location->prev = prev;
6934 +                       err = 1;
6935 +                       goto out;
6936 +               }
6937 +               if (GFS_EA_IS_LAST(curr))
6938 +                       break;
6939 +               prev = curr;
6940 +               curr = GFS_EA_NEXT(curr);
6941 +       }
6942 +
6943 +      out_drelse:
6944 +       brelse(bh);
6945 +
6946 +      out:
6947 +       return err;
6948 +}
6949 +
6950 +/**
6951 + * find_eattr - find a matching eattr
6952 + *
6953 + * Returns: 1 if ea found, 0 if no ea found, -EXXX on error
6954 + */
6955 +int
6956 +find_eattr(struct gfs_inode *ip, char *name, int name_len, int type,
6957 +          struct gfs_ea_location *location)
6958 +{
6959 +       int err;
6960 +       struct buffer_head *bh;
6961 +       struct gfs_sbd *sdp = ip->i_sbd;
6962 +       uint64_t *eablk, *end;
6963 +
6964 +       memset(location, 0, sizeof (struct gfs_ea_location));
6965 +
6966 +       if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
6967 +               err =
6968 +                   gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
6969 +                             DIO_START | DIO_WAIT, &bh);
6970 +               if (err)
6971 +                       goto fail;
6972 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
6973 +               eablk =
6974 +                   (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
6975 +               end =
6976 +                   eablk +
6977 +                   ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
6978 +               while (eablk < end && *eablk) {
6979 +                       err =
6980 +                           find_direct_eattr(ip, gfs64_to_cpu(*eablk), name,
6981 +                                             name_len, type, location);
6982 +                       if (err || location->ea)
6983 +                               break;
6984 +                       eablk++;
6985 +               }
6986 +               brelse(bh);
6987 +               if (err < 0)
6988 +                       goto fail;
6989 +       } else {
6990 +               err =
6991 +                   find_direct_eattr(ip, ip->i_di.di_eattr, name, name_len,
6992 +                                     type, location);
6993 +               if (err < 0)
6994 +                       goto fail;
6995 +       }
6996 +
6997 +       return (location->ea != NULL);
6998 +
6999 +      fail:
7000 +       return err;
7001 +}
7002 +
7003 +static void
7004 +make_space(struct gfs_inode *ip, struct buffer_head *bh, uint32_t size,
7005 +          uint64_t blkno, struct gfs_ea_location *avail)
7006 +{
7007 +       struct gfs_sbd *sdp = ip->i_sbd;
7008 +       uint32_t free_size, avail_size;
7009 +       struct gfs_ea_header *ea, *new_ea;
7010 +       void *buf;
7011 +
7012 +       free_size = 0;
7013 +       avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7014 +       ea = GFS_FIRST_EA(bh);
7015 +       GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7016 +       if (ea->ea_type == GFS_EATYPE_UNUSED) {
7017 +               free_size = GFS_EA_REC_LEN(ea);
7018 +               ea = GFS_EA_NEXT(ea);
7019 +       }
7020 +       while (free_size < size) {
7021 +               free_size += (GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea));
7022 +               if (GFS_EA_IS_LAST(ea))
7023 +                       break;
7024 +               ea = GFS_EA_NEXT(ea);
7025 +       }
7026 +       if (free_size < size)
7027 +               goto out;
7028 +       buf = gmalloc(avail_size);
7029 +
7030 +       free_size = avail_size;
7031 +       ea = GFS_FIRST_EA(bh);
7032 +       if (ea->ea_type == GFS_EATYPE_UNUSED)
7033 +               ea = GFS_EA_NEXT(ea);
7034 +       new_ea = (struct gfs_ea_header *) buf;
7035 +       new_ea->ea_flags = 0;
7036 +       new_ea->ea_rec_len = cpu_to_gfs32(size);
7037 +       new_ea->ea_num_ptrs = 0;
7038 +       new_ea->ea_type = GFS_EATYPE_UNUSED;
7039 +       free_size -= size;
7040 +       new_ea = GFS_EA_NEXT(new_ea);
7041 +       while (1) {
7042 +               memcpy(new_ea, ea, GFS_EA_SIZE(ea));
7043 +               if (GFS_EA_IS_LAST(ea))
7044 +                       break;
7045 +               new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea));
7046 +               free_size -= GFS_EA_SIZE(ea);
7047 +               ea = GFS_EA_NEXT(ea);
7048 +               new_ea = GFS_EA_NEXT(new_ea);
7049 +       }
7050 +       new_ea->ea_rec_len = cpu_to_gfs32(free_size);
7051 +       memcpy(GFS_FIRST_EA(bh), buf, avail_size);
7052 +       kfree(buf);
7053 +       avail->ea = GFS_FIRST_EA(bh);
7054 +       avail->prev = NULL;
7055 +       avail->bh = bh;
7056 +
7057 +      out:
7058 +       return;
7059 +}
7060 +
7061 +static int
7062 +expand_to_indirect(struct gfs_inode *alloc_ip, struct gfs_inode *ip,
7063 +                  struct buffer_head **bh)
7064 +{
7065 +       int err;
7066 +       struct gfs_sbd *sdp = ip->i_sbd;
7067 +       struct buffer_head *bh1 = NULL, *bh2 = NULL, *indbh = NULL;
7068 +       uint64_t blkno, *blkptr;
7069 +       uint32_t free_size, avail_size;
7070 +       struct gfs_ea_header *prev, *curr, *new_ea = NULL;
7071 +
7072 +       avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7073 +       free_size = avail_size;
7074 +       ip->i_di.di_flags |= GFS_DIF_EA_INDIRECT;
7075 +       blkno = ip->i_di.di_eattr;
7076 +       err = gfs_metaalloc(alloc_ip, &ip->i_di.di_eattr);
7077 +       if (err)
7078 +               goto out;
7079 +       ip->i_di.di_blocks++;
7080 +       err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_NEW | DIO_START |
7081 +                       DIO_WAIT, &indbh);
7082 +       if (err)
7083 +               goto out;
7084 +       bh1 = *bh;
7085 +       *bh = indbh;
7086 +       gfs_trans_add_bh(ip->i_gl, indbh);
7087 +       gfs_metatype_set(sdp, indbh, GFS_METATYPE_IN, GFS_FORMAT_IN);
7088 +       memset((indbh)->b_data + sizeof (struct gfs_meta_header), 0,
7089 +              sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
7090 +       blkptr = (uint64_t *) ((indbh)->b_data + sizeof (struct gfs_indirect));
7091 +       *blkptr++ = cpu_to_gfs64(blkno);
7092 +       prev = NULL;
7093 +       curr = GFS_FIRST_EA(bh1);
7094 +       while (curr->ea_type != GFS_EATYPE_USR) {
7095 +               if (GFS_EA_IS_LAST(curr))
7096 +                       goto out_drelse1;
7097 +               free_size -= GFS_EA_REC_LEN(curr);
7098 +               prev = curr;
7099 +               curr = GFS_EA_NEXT(curr);
7100 +       }
7101 +       if (!prev || prev->ea_type == GFS_EATYPE_UNUSED)
7102 +               goto out_drelse1;
7103 +       gfs_trans_add_bh(ip->i_gl, bh1);
7104 +       prev->ea_rec_len = cpu_to_gfs32(GFS_EA_REC_LEN(prev) + free_size);
7105 +       prev->ea_flags |= GFS_EAFLAG_LAST;
7106 +       bh2 = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno);
7107 +       if (!bh2) {
7108 +               err = -EIO;
7109 +               goto out_drelse1;
7110 +       }
7111 +       free_size = avail_size;
7112 +       new_ea = GFS_FIRST_EA(bh2);
7113 +       while (1) {
7114 +               memcpy(new_ea, curr, GFS_EA_SIZE(curr));
7115 +               if (GFS_EA_IS_LAST(curr))
7116 +                       break;
7117 +               new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(curr));
7118 +               free_size -= GFS_EA_SIZE(curr);
7119 +               curr = GFS_EA_NEXT(curr);
7120 +               new_ea = GFS_EA_NEXT(new_ea);
7121 +       }
7122 +       new_ea->ea_rec_len = cpu_to_gfs32(free_size);
7123 +       *blkptr = cpu_to_gfs64(blkno);
7124 +       brelse(bh2);
7125 +
7126 +      out_drelse1:
7127 +       brelse(bh1);
7128 +
7129 +      out:
7130 +       return err;
7131 +}
7132 +
7133 +static void
7134 +find_direct_sys_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
7135 +                     struct gfs_ea_location *avail)
7136 +{
7137 +       struct gfs_ea_header *curr, *prev = NULL;
7138 +
7139 +       curr = GFS_FIRST_EA(bh);
7140 +       GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7141 +       if (curr->ea_type == GFS_EATYPE_UNUSED) {
7142 +               if (GFS_EA_REC_LEN(curr) >= size) {
7143 +                       avail->ea = curr;
7144 +                       avail->prev = NULL;
7145 +                       avail->bh = bh;
7146 +                       goto out;
7147 +               }
7148 +               prev = curr;
7149 +               curr = GFS_EA_NEXT(curr);
7150 +       }
7151 +       while (curr->ea_type == GFS_EATYPE_SYS) {
7152 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7153 +               if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7154 +                       avail->ea = curr;
7155 +                       avail->prev = prev;
7156 +                       avail->bh = bh;
7157 +                       goto out;
7158 +               }
7159 +               if (GFS_EA_IS_LAST(curr))
7160 +                       break;
7161 +               prev = curr;
7162 +               curr = GFS_EA_NEXT(curr);
7163 +       }
7164 +       make_space(ip, bh, size, ip->i_di.di_eattr, avail);
7165 +
7166 +      out:
7167 +       return;
7168 +}
7169 +
7170 +/**
7171 + * int find_indirect_space
7172 + *
7173 + * @space:
7174 + * @blktype: returns the type of block GFS_EATYPE_...
7175 + *
7176 + * returns 0 on success, -EXXX on failure
7177 + */
7178 +static int
7179 +find_indirect_space(struct gfs_inode *ip, uint64_t blkno, int type,
7180 +                   int size, struct gfs_ea_location *avail, int *blktype)
7181 +{
7182 +       int err;
7183 +       struct buffer_head *bh;
7184 +       struct gfs_sbd *sdp = ip->i_sbd;
7185 +       struct gfs_ea_header *curr, *prev = NULL;
7186 +
7187 +       err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
7188 +       if (err)
7189 +               goto out;
7190 +       gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7191 +       curr = GFS_FIRST_EA(bh);
7192 +       GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7193 +       if (curr->ea_type == GFS_EATYPE_UNUSED) {
7194 +               if (GFS_EA_IS_LAST(curr)) {
7195 +                       avail->ea = curr;
7196 +                       avail->prev = NULL;
7197 +                       avail->bh = bh;
7198 +                       *blktype = GFS_EATYPE_UNUSED;
7199 +                       goto out;
7200 +               }
7201 +               prev = curr;
7202 +               curr = GFS_EA_NEXT(curr);
7203 +       }
7204 +       if (type != curr->ea_type) {
7205 +               *blktype = curr->ea_type;
7206 +               goto out_drelse;
7207 +       } else
7208 +               *blktype = type;
7209 +       if (prev && GFS_EA_REC_LEN(prev) >= size) {
7210 +               avail->ea = prev;
7211 +               avail->prev = NULL;
7212 +               avail->bh = bh;
7213 +               goto out;
7214 +       }
7215 +       while (1) {
7216 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7217 +               if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7218 +                       avail->ea = curr;
7219 +                       avail->prev = prev;
7220 +                       avail->bh = bh;
7221 +                       goto out;
7222 +               }
7223 +               if (GFS_EA_IS_LAST(curr))
7224 +                       break;
7225 +               prev = curr;
7226 +               curr = GFS_EA_NEXT(curr);
7227 +       }
7228 +
7229 +      out_drelse:
7230 +       brelse(bh);
7231 +
7232 +      out:
7233 +       return err;
7234 +}
7235 +
7236 +static int
7237 +find_indirect_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip,
7238 +                       int size, struct buffer_head *bh,
7239 +                       struct gfs_ea_location *avail)
7240 +{
7241 +       int err = 0;
7242 +       struct gfs_sbd *sdp = ip->i_sbd;
7243 +       uint64_t *eablk, *end, *first_usr_blk = NULL;
7244 +       int blktype;
7245 +       uint64_t blkno;
7246 +
7247 +       eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
7248 +       end =
7249 +           eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
7250 +
7251 +       while (eablk < end && *eablk) {
7252 +               err =
7253 +                   find_indirect_space(ip, gfs64_to_cpu(*eablk),
7254 +                                       GFS_EATYPE_SYS, size, avail, &blktype);
7255 +               if (err)
7256 +                       goto out;
7257 +               if (blktype == GFS_EATYPE_USR && !first_usr_blk)
7258 +                       first_usr_blk = eablk;
7259 +               if (avail->ea) {
7260 +                       if (!first_usr_blk)
7261 +                               goto out;
7262 +                       gfs_trans_add_bh(ip->i_gl, bh);
7263 +                       blkno = *eablk;
7264 +                       *eablk = *first_usr_blk;
7265 +                       *first_usr_blk = blkno;
7266 +                       goto out;
7267 +               }
7268 +               eablk++;
7269 +       }
7270 +       if (eablk >= end) {
7271 +               err = -ENOSPC;
7272 +               goto out;
7273 +       }
7274 +       avail->bh = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno);
7275 +       if (!avail->bh) {
7276 +               err = -EIO;
7277 +               goto out;
7278 +       }
7279 +       avail->ea = GFS_FIRST_EA(avail->bh);
7280 +       avail->prev = NULL;
7281 +       gfs_trans_add_bh(ip->i_gl, bh);
7282 +       if (first_usr_blk) {
7283 +               *eablk = *first_usr_blk;
7284 +               *first_usr_blk = cpu_to_gfs64(blkno);
7285 +       } else
7286 +               *eablk = cpu_to_gfs64(blkno);
7287 +
7288 +      out:
7289 +       return err;
7290 +}
7291 +
7292 +int
7293 +find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size,
7294 +              struct gfs_ea_location *avail)
7295 +{
7296 +       int err;
7297 +       struct buffer_head *bh;
7298 +       struct gfs_sbd *sdp = ip->i_sbd;
7299 +
7300 +       err =
7301 +           gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
7302 +                     &bh);
7303 +       if (err)
7304 +               goto out;
7305 +
7306 +       if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
7307 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
7308 +               err = find_indirect_sys_space(alloc_ip, ip, size, bh, avail);
7309 +       } else {
7310 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7311 +               find_direct_sys_space(ip, size, bh, avail);
7312 +               if (!avail->ea) {
7313 +                       err = expand_to_indirect(alloc_ip, ip, &bh);
7314 +                       if (err)
7315 +                               goto out_drelse;
7316 +                       err =
7317 +                           find_indirect_sys_space(alloc_ip, ip, size, bh,
7318 +                                                   avail);
7319 +               }
7320 +       }
7321 +
7322 +      out_drelse:
7323 +       if (avail->bh != bh)
7324 +               brelse(bh);
7325 +
7326 +      out:
7327 +       return err;
7328 +}
7329 +
7330 +static int
7331 +get_blk_type(struct gfs_inode *ip, uint64_t blkno, int *blktype)
7332 +{
7333 +       int err = 0;
7334 +       struct gfs_sbd *sdp = ip->i_sbd;
7335 +       struct buffer_head *bh;
7336 +       struct gfs_ea_header *ea;
7337 +
7338 +       err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
7339 +       if (err)
7340 +               goto out;
7341 +       gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7342 +       ea = GFS_FIRST_EA(bh);
7343 +       GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7344 +       if (ea->ea_type == GFS_EATYPE_UNUSED) {
7345 +               if (GFS_EA_IS_LAST(ea)) {
7346 +                       *blktype = GFS_EATYPE_UNUSED;
7347 +                       goto out_drelse;
7348 +               }
7349 +               ea = GFS_EA_NEXT(ea);
7350 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7351 +       }
7352 +       *blktype = ea->ea_type;
7353 +
7354 +      out_drelse:
7355 +       brelse(bh);
7356 +
7357 +      out:
7358 +       return err;
7359 +}
7360 +
7361 +static void
7362 +find_direct_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
7363 +                     struct gfs_ea_location *avail)
7364 +{
7365 +       struct gfs_ea_header *curr, *prev = NULL;
7366 +
7367 +       curr = GFS_FIRST_EA(bh);
7368 +       GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7369 +       if (curr->ea_type == GFS_EATYPE_UNUSED) {
7370 +               if (GFS_EA_IS_LAST(curr)) {
7371 +                       avail->ea = curr;
7372 +                       avail->prev = NULL;
7373 +                       avail->bh = bh;
7374 +                       goto out;
7375 +               }
7376 +               prev = curr;
7377 +               curr = GFS_EA_NEXT(curr);
7378 +               if (curr->ea_type == GFS_EATYPE_USR
7379 +                   && GFS_EA_REC_LEN(prev) >= size) {
7380 +                       avail->ea = prev;
7381 +                       avail->prev = NULL;
7382 +                       avail->bh = bh;
7383 +                       goto out;
7384 +               }
7385 +       }
7386 +       while (curr->ea_type != GFS_EATYPE_USR) {
7387 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7388 +               if (GFS_EA_IS_LAST(curr))
7389 +                       break;
7390 +               prev = curr;
7391 +               curr = GFS_EA_NEXT(curr);
7392 +       }
7393 +       while (1) {
7394 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7395 +               if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7396 +                       avail->ea = curr;
7397 +                       avail->prev = prev;
7398 +                       avail->bh = bh;
7399 +                       goto out;
7400 +               }
7401 +               if (GFS_EA_IS_LAST(curr))
7402 +                       break;
7403 +               prev = curr;
7404 +               curr = GFS_EA_NEXT(curr);
7405 +       }
7406 +
7407 +      out:
7408 +       return;
7409 +}
7410 +
7411 +static int
7412 +find_indirect_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
7413 +                       struct gfs_ea_location *avail)
7414 +{
7415 +       int err = 0;
7416 +       struct gfs_sbd *sdp = ip->i_sbd;
7417 +       uint64_t *eablk, *end, *last_sys_blk = NULL, *first_usr_blk = NULL;
7418 +       int blktype;
7419 +       uint64_t blkno;
7420 +
7421 +       eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
7422 +       end =
7423 +           eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
7424 +
7425 +       while (eablk < end && *eablk) {
7426 +               err =
7427 +                   find_indirect_space(ip, gfs64_to_cpu(*eablk),
7428 +                                       GFS_EATYPE_USR, size, avail, &blktype);
7429 +               if (err)
7430 +                       goto out;
7431 +               if (blktype == GFS_EATYPE_SYS)
7432 +                       last_sys_blk = eablk;
7433 +               if (blktype == GFS_EATYPE_USR && !first_usr_blk)
7434 +                       first_usr_blk = eablk;
7435 +               if (avail->ea) {
7436 +                       if (first_usr_blk)
7437 +                               goto out;
7438 +                       first_usr_blk = eablk + 1;
7439 +                       while (first_usr_blk < end && *first_usr_blk) {
7440 +                               err =
7441 +                                   get_blk_type(ip,
7442 +                                                gfs64_to_cpu(*first_usr_blk),
7443 +                                                &blktype);
7444 +                               if (blktype == GFS_EATYPE_SYS)
7445 +                                       last_sys_blk = first_usr_blk;
7446 +                               if (blktype == GFS_EATYPE_USR)
7447 +                                       break;
7448 +                               first_usr_blk++;
7449 +                       }
7450 +                       if (last_sys_blk > eablk) {
7451 +                               gfs_trans_add_bh(ip->i_gl, bh);
7452 +                               blkno = *eablk;
7453 +                               *eablk = *last_sys_blk;
7454 +                               *last_sys_blk = blkno;
7455 +                       }
7456 +                       goto out;
7457 +               }
7458 +               eablk++;
7459 +       }
7460 +
7461 +       if (eablk >= end) {
7462 +               err = -ENOSPC;
7463 +               goto out;
7464 +       }
7465 +       avail->bh = alloc_eattr_blk(sdp, ip, ip, &blkno);
7466 +       if (!avail->bh) {
7467 +               err = -EIO;
7468 +               goto out;
7469 +       }
7470 +       avail->ea = GFS_FIRST_EA(avail->bh);
7471 +       avail->prev = NULL;
7472 +       gfs_trans_add_bh(ip->i_gl, bh);
7473 +       *eablk = cpu_to_gfs64(blkno);
7474 +
7475 +      out:
7476 +       return err;
7477 +}
7478 +
7479 +static int
7480 +find_usr_space(struct gfs_inode *ip, int size, struct gfs_ea_location *avail)
7481 +{
7482 +       int err;
7483 +       struct buffer_head *bh;
7484 +       struct gfs_sbd *sdp = ip->i_sbd;
7485 +
7486 +       err =
7487 +           gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
7488 +                     &bh);
7489 +       if (err)
7490 +               goto out;
7491 +
7492 +       if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
7493 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
7494 +               err = find_indirect_usr_space(ip, size, bh, avail);
7495 +       } else {
7496 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7497 +               find_direct_usr_space(ip, size, bh, avail);
7498 +               if (!avail->ea) {
7499 +                       err = expand_to_indirect(ip, ip, &bh);
7500 +                       if (err)
7501 +                               goto out_drelse;
7502 +                       err = find_indirect_usr_space(ip, size, bh, avail);
7503 +               }
7504 +       }
7505 +
7506 +      out_drelse:
7507 +       if (avail->bh != bh)
7508 +               brelse(bh);
7509 +
7510 +      out:
7511 +       return err;
7512 +}
7513 +
7514 +static int
7515 +find_space(struct gfs_inode *ip, int size, int type,
7516 +          struct gfs_ea_location *avail)
7517 +{
7518 +       int err;
7519 +
7520 +       memset(avail, 0, sizeof (struct gfs_ea_location));
7521 +
7522 +       if (type == GFS_EATYPE_SYS)
7523 +               err = find_sys_space(ip, ip, size, avail);
7524 +       else
7525 +               err = find_usr_space(ip, size, avail);
7526 +
7527 +       return err;
7528 +}
7529 +
7530 +static int
7531 +can_replace_in_block(struct gfs_inode *ip, int size,
7532 +                    struct gfs_ea_location found, struct gfs_ea_header **space)
7533 +{
7534 +       struct gfs_ea_header *curr, *prev = NULL;
7535 +
7536 +       *space = NULL;
7537 +       curr = GFS_FIRST_EA(found.bh);
7538 +       GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7539 +       if (curr->ea_type == GFS_EATYPE_UNUSED) {
7540 +               if (GFS_EA_REC_LEN(curr) >= size) {
7541 +                       *space = curr;
7542 +                       goto out;
7543 +               }
7544 +               prev = curr;
7545 +               curr = GFS_EA_NEXT(curr);
7546 +       }
7547 +       while (1) {
7548 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7549 +               if (curr == found.ea) {
7550 +                       /*
7551 +                        * See if there will be enough space after the old version of the eattr
7552 +                        * is deleted.
7553 +                        */
7554 +                       if (prev) {
7555 +                               if (prev->ea_type == GFS_EATYPE_UNUSED) {
7556 +                                       if (GFS_EA_REC_LEN(prev) +
7557 +                                           GFS_EA_REC_LEN(curr) >= size) {
7558 +                                               *space = prev;
7559 +                                               goto out;
7560 +                                       }
7561 +                               } else if (GFS_EA_REC_LEN(prev) +
7562 +                                          GFS_EA_REC_LEN(curr) >=
7563 +                                          GFS_EA_SIZE(prev) + size) {
7564 +                                       *space = prev;
7565 +                                       goto out;
7566 +                               }
7567 +                       } else if (GFS_EA_REC_LEN(curr) >= size) {
7568 +                               *space = curr;
7569 +                               goto out;
7570 +                       }
7571 +               } else if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7572 +                       *space = curr;
7573 +                       goto out;
7574 +               }
7575 +               if (GFS_EA_IS_LAST(curr))
7576 +                       break;
7577 +               prev = curr;
7578 +               curr = GFS_EA_NEXT(curr);
7579 +       }
7580 +
7581 +      out:
7582 +       return (*space != NULL);
7583 +}
7584 +
7585 +/**
7586 + * read_unstuffed - actually copies the unstuffed data into the
7587 + *                  request buffer
7588 + */
7589 +
7590 +int
7591 +read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp,
7592 +              struct gfs_ea_header *ea, uint32_t avail_size,
7593 +              gfs_ea_copy_fn_t copy_fn)
7594 +{
7595 +       struct buffer_head *bh[66];     /*  This is the maximum number of data ptrs possible  */
7596 +       int err = 0;
7597 +       int max = GFS_EADATA_NUM_PTRS(GFS_EA_DATA_LEN(ea), avail_size);
7598 +       int i, j, left = GFS_EA_DATA_LEN(ea);
7599 +       char *outptr, *buf;
7600 +       uint64_t *indptr = GFS_EA_DATA_PTRS(ea);
7601 +
7602 +       for (i = 0; i < max; i++) {
7603 +               err =
7604 +                   gfs_dread(sdp, gfs64_to_cpu(*indptr), ip->i_gl, DIO_START,
7605 +                             &bh[i]);
7606 +               indptr++;
7607 +               if (err) {
7608 +                       for (j = 0; j < i; j++)
7609 +                               brelse(bh[j]);
7610 +                       goto out;
7611 +               }
7612 +       }
7613 +
7614 +       outptr = dest;
7615 +
7616 +       for (i = 0; i < max; i++) {
7617 +               err = gfs_dreread(sdp, bh[i], DIO_WAIT);
7618 +               if (err) {
7619 +                       for (j = i; j < max; j++)
7620 +                               brelse(bh[j]);
7621 +                       goto out;
7622 +               }
7623 +               gfs_metatype_check(sdp, bh[i], GFS_METATYPE_EA);
7624 +               buf = (bh[i])->b_data + sizeof (struct gfs_meta_header);
7625 +               err =
7626 +                   copy_fn(outptr, buf,
7627 +                           (avail_size > left) ? left : avail_size);
7628 +               if (err) {
7629 +                       for (j = i; j < max; j++)
7630 +                               brelse(bh[j]);
7631 +                       goto out;
7632 +               }
7633 +               left -= avail_size;
7634 +               outptr += avail_size;
7635 +               brelse(bh[i]);
7636 +       }
7637 +
7638 +      out:
7639 +
7640 +       return err;
7641 +}
7642 +
7643 +/**
7644 + * functionname - summary
7645 + * @param1: description
7646 + * @param2: description
7647 + * @param3: description
7648 + *
7649 + * Function description
7650 + *
7651 + * Returns: what is returned
7652 + */
7653 +int
7654 +get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
7655 +       gfs_ea_copy_fn_t copy_fn)
7656 +{
7657 +       int err;
7658 +       struct gfs_ea_location location;
7659 +       uint32_t avail_size;
7660 +
7661 +       avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7662 +
7663 +       err = find_eattr(ip, req->eg_name, req->eg_name_len, req->eg_type,
7664 +                        &location);
7665 +       if (err != 1) {
7666 +               if (err == 0)
7667 +                       err = -ENODATA;
7668 +               goto out;
7669 +       }
7670 +
7671 +       if (req->eg_data_len) {
7672 +               if (req->eg_data_len < GFS_EA_DATA_LEN(location.ea))
7673 +                       err = -ERANGE;
7674 +               else if (GFS_EA_IS_UNSTUFFED(location.ea))
7675 +                       err =
7676 +                           read_unstuffed(req->eg_data, ip, sdp, location.ea,
7677 +                                          avail_size, copy_fn);
7678 +               else
7679 +                       err = copy_fn(req->eg_data, GFS_EA_DATA(location.ea),
7680 +                                     GFS_EA_DATA_LEN(location.ea));
7681 +               if (!err)
7682 +                       err = GFS_EA_DATA_LEN(location.ea);
7683 +       } else
7684 +               err = GFS_EA_DATA_LEN(location.ea);
7685 +
7686 +       brelse(location.bh);
7687 +
7688 +      out:
7689 +       return err;
7690 +}
7691 +
7692 +/**
7693 + * functionname - summary
7694 + * @param1: description
7695 + * @param2: description
7696 + * @param3: description
7697 + *
7698 + * Function description
7699 + *
7700 + * Returns: what is returned
7701 + */
7702 +
7703 +struct gfs_ea_header *
7704 +prep_ea(struct gfs_ea_header *ea)
7705 +{
7706 +       struct gfs_ea_header *new = ea;
7707 +
7708 +       if (ea->ea_type == GFS_EATYPE_UNUSED) {
7709 +               if (GFS_EA_IS_LAST(ea))
7710 +                       ea->ea_flags = GFS_EAFLAG_LAST;
7711 +               else
7712 +                       ea->ea_flags = 0;
7713 +       } else {
7714 +               new = GFS_EA_FREESPACE(ea);
7715 +               new->ea_rec_len =
7716 +                   cpu_to_gfs32(GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea));
7717 +               ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea));
7718 +               if (GFS_EA_IS_LAST(ea)) {
7719 +                       ea->ea_flags &= ~GFS_EAFLAG_LAST;
7720 +                       new->ea_flags = GFS_EAFLAG_LAST;
7721 +               } else
7722 +                       new->ea_flags = 0;
7723 +       }
7724 +
7725 +       return new;
7726 +}
7727 +
7728 +/**
7729 + * replace_ea - replaces the existing data with the request data
7730 + */
7731 +int
7732 +replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_ea_header *ea,
7733 +          struct gfs_easet_io *req)
7734 +{
7735 +       int err = 0;
7736 +       int i;
7737 +       uint32_t copy_size, data_left = req->es_data_len;
7738 +       struct buffer_head *bh;
7739 +       uint64_t *datablk = GFS_EA_DATA_PTRS(ea);
7740 +       const char *dataptr = req->es_data;
7741 +       uint32_t avail_size =
7742 +           sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7743 +
7744 +       ea->ea_data_len = cpu_to_gfs32(req->es_data_len);
7745 +       if (!GFS_EA_IS_UNSTUFFED(ea))
7746 +               memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len);
7747 +       else {
7748 +               for (i = 0; i < ea->ea_num_ptrs && data_left > 0; i++) {
7749 +                       err = gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl,
7750 +                                       DIO_START | DIO_WAIT, &bh);
7751 +                       if (err)
7752 +                               goto out;
7753 +                       gfs_trans_add_bh(ip->i_gl, bh);
7754 +                       gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7755 +                       copy_size =
7756 +                           (data_left > avail_size) ? avail_size : data_left;
7757 +                       memcpy((bh)->b_data + sizeof (struct gfs_meta_header),
7758 +                              dataptr, copy_size);
7759 +                       dataptr += copy_size;
7760 +                       data_left -= copy_size;
7761 +                       datablk++;
7762 +                       brelse(bh);
7763 +               }
7764 +               GFS_ASSERT_INODE(data_left == 0, ip,
7765 +                                printk
7766 +                                ("req->es_data_len = %u, ea->ea_num_ptrs = %d\n",
7767 +                                 req->es_data_len, ea->ea_num_ptrs);
7768 +                   );
7769 +       }
7770 +
7771 +      out:
7772 +       return err;
7773 +}
7774 +
7775 +/**
7776 + * write_ea - writes the request info to an ea, creating new blocks if
7777 + *            necessary
7778 + *
7779 + * @sdp: superblock pointer
7780 + * @alloc_ip: inode that has the blocks reserved for allocation
7781 + * @ip:  inode that is being modified
7782 + * @ea:  the location of the new ea in a block
7783 + * @req: the write request
7784 + *
7785 + * Note: does not update ea_rec_len or the GFS_EAFLAG_LAST bin of ea_flags
7786 + *
7787 + * returns : 0 on success, -EXXX on error
7788 + */
7789 +
7790 +int
7791 +write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip, struct gfs_inode *ip,
7792 +        struct gfs_ea_header *ea, struct gfs_easet_io *req)
7793 +{
7794 +       int err = 0;
7795 +       uint64_t *blkptr;
7796 +       uint64_t temp;
7797 +       const char *dataptr;
7798 +       uint32_t data_left, copy;
7799 +       uint32_t avail_size =
7800 +           sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7801 +       int i;
7802 +       struct buffer_head *bh = NULL;
7803 +
7804 +       ea->ea_data_len = cpu_to_gfs32(req->es_data_len);
7805 +       ea->ea_name_len = req->es_name_len;
7806 +       ea->ea_type = req->es_type;
7807 +       ea->ea_pad = 0;
7808 +
7809 +       memcpy(GFS_EA_NAME(ea), req->es_name, req->es_name_len);
7810 +
7811 +       if (GFS_EAREQ_IS_STUFFED(req, avail_size)) {
7812 +               ea->ea_num_ptrs = 0;
7813 +               memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len);
7814 +       } else {
7815 +               blkptr = GFS_EA_DATA_PTRS(ea);
7816 +               dataptr = req->es_data;
7817 +               data_left = req->es_data_len;
7818 +               ea->ea_num_ptrs =
7819 +                   GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size);
7820 +
7821 +               for (i = 0; i < ea->ea_num_ptrs; i++) {
7822 +                       if ((bh =
7823 +                            alloc_eattr_blk(sdp, alloc_ip, ip,
7824 +                                            &temp)) == NULL) {
7825 +                               err = -EIO;
7826 +                               goto out;
7827 +                       }
7828 +                       copy =
7829 +                           (data_left > avail_size) ? avail_size : data_left;
7830 +                       memcpy((bh)->b_data + sizeof (struct gfs_meta_header),
7831 +                              dataptr, copy);
7832 +                       *blkptr = cpu_to_gfs64(temp);
7833 +                       dataptr += copy;
7834 +                       data_left -= copy;
7835 +                       blkptr++;
7836 +                       brelse(bh);
7837 +               }
7838 +
7839 +               GFS_ASSERT_INODE(!data_left, ip,);
7840 +       }
7841 +
7842 +      out:
7843 +
7844 +       return err;
7845 +}
7846 +
7847 +/**
7848 + * erase_ea_data_ptrs - deallocate all the unstuffed data blocks pointed to
7849 + *                          ea records in this block
7850 + * @sdp: the superblock
7851 + * @ip: the inode
7852 + * @blk: the block to check for data pointers
7853 + *
7854 + *
7855 + * Returns: 0 on success, -EXXX on failure
7856 + */
7857 +
7858 +static int
7859 +erase_ea_data_ptrs(struct gfs_sbd *sdp, struct gfs_inode *ip,
7860 +                  struct buffer_head *dibh, uint64_t blk)
7861 +{
7862 +       struct gfs_holder rgd_gh;
7863 +       int i, err = 0;
7864 +       uint64_t *datablk;
7865 +       struct buffer_head *eabh;
7866 +       char *buf;
7867 +       struct gfs_ea_header *ea;
7868 +       struct gfs_rgrpd *rgd = NULL;
7869 +
7870 +       err = gfs_dread(sdp, blk, ip->i_gl, DIO_WAIT | DIO_START, &eabh);
7871 +       if (err)
7872 +               goto fail;
7873 +
7874 +       gfs_metatype_check(sdp, eabh, GFS_METATYPE_EA);
7875 +       buf = (eabh)->b_data + sizeof (struct gfs_meta_header);
7876 +       ea = (struct gfs_ea_header *) buf;
7877 +
7878 +       while (1) {
7879 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7880 +               if (GFS_EA_IS_UNSTUFFED(ea)) {
7881 +                       datablk = GFS_EA_DATA_PTRS(ea);
7882 +                       rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*datablk));
7883 +                       GFS_ASSERT_INODE(rgd, ip,
7884 +                                        printk("block = %" PRIu64 "\n",
7885 +                                               gfs64_to_cpu(*datablk)););
7886 +                       err =
7887 +                           gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
7888 +                                             &rgd_gh);
7889 +                       if (err)
7890 +                               goto fail_eabh;
7891 +                       /* Trans may require:
7892 +                          One block for the RG header. One block for each ea data block. One
7893 +                          One block for the dinode. One block for the current ea block.
7894 +                          One block for a quote change.
7895 +                          FIXME */
7896 +                       err =
7897 +                           gfs_trans_begin(sdp,
7898 +                                           3 + ea->ea_num_ptrs, 1);
7899 +                       if (err)
7900 +                               goto fail_glock_rg;
7901 +                       gfs_trans_add_bh(ip->i_gl, dibh);
7902 +                       for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) {
7903 +                               gfs_metafree(ip, gfs64_to_cpu(*datablk), 1);
7904 +                               ip->i_di.di_blocks--;
7905 +                       }
7906 +                       ea->ea_num_ptrs = 0;
7907 +                       gfs_trans_add_bh(ip->i_gl, eabh);
7908 +                       gfs_dinode_out(&ip->i_di, (dibh)->b_data);
7909 +                       gfs_trans_end(sdp);
7910 +                       gfs_glock_dq_uninit(&rgd_gh);
7911 +               }
7912 +               if (GFS_EA_IS_LAST(ea))
7913 +                       break;
7914 +               ea = GFS_EA_NEXT(ea);
7915 +       }
7916 +
7917 +       brelse(eabh);
7918 +
7919 +       return err;
7920 +
7921 +      fail_glock_rg:
7922 +       gfs_glock_dq_uninit(&rgd_gh);
7923 +
7924 +      fail_eabh:
7925 +       brelse(eabh);
7926 +
7927 +      fail:
7928 +       return err;
7929 +}
7930 +
7931 +/**
7932 + * gfs_ea_dealloc - deallocate the extended attribute fork
7933 + * @ip: the inode
7934 + *
7935 + * Returns: 0 on success, -EXXX on failure
7936 + */
7937 +
7938 +int
7939 +gfs_ea_dealloc(struct gfs_inode *ip)
7940 +{
7941 +       struct gfs_holder ri_gh, rgd_gh;
7942 +       int err = 0;
7943 +       struct gfs_sbd *sdp = ip->i_sbd;
7944 +       struct buffer_head *dibh, *indbh = NULL;
7945 +       uint64_t *startblk, *eablk, *end, *next;
7946 +       uint64_t temp;
7947 +       int num_blks;
7948 +       struct gfs_rgrpd *rgd = NULL;
7949 +
7950 +       if (!ip->i_di.di_eattr)
7951 +               goto out;
7952 +
7953 +       gfs_alloc_get(ip);
7954 +
7955 +       err = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
7956 +       if (err)
7957 +               goto out_alloc;
7958 +
7959 +       err = gfs_rindex_hold(sdp, &ri_gh);
7960 +       if (err)
7961 +               goto out_unhold_q;
7962 +
7963 +       err = gfs_get_inode_buffer(ip, &dibh);
7964 +       if (err)
7965 +               goto out_rindex_release;
7966 +
7967 +       if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
7968 +               err =
7969 +                   gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
7970 +                             DIO_WAIT | DIO_START, &indbh);
7971 +               if (err)
7972 +                       goto out_dibh;
7973 +
7974 +               gfs_metatype_check(sdp, indbh, GFS_METATYPE_IN);
7975 +
7976 +               eablk =
7977 +                   (uint64_t *) ((indbh)->b_data +
7978 +                                 sizeof (struct gfs_indirect));
7979 +               end =
7980 +                   eablk +
7981 +                   ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
7982 +
7983 +               while (*eablk && eablk < end) {
7984 +                       err =
7985 +                           erase_ea_data_ptrs(sdp, ip, dibh,
7986 +                                              gfs64_to_cpu(*eablk));
7987 +                       if (err)
7988 +                               goto out_indbh;
7989 +                       eablk++;
7990 +               }
7991 +
7992 +               startblk = eablk - 1;
7993 +               end =
7994 +                   (uint64_t *) ((indbh)->b_data +
7995 +                                 sizeof (struct gfs_indirect));
7996 +
7997 +               while (startblk >= end) {
7998 +                       rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*startblk));
7999 +                       GFS_ASSERT_INODE(rgd, ip,);
8000 +
8001 +                       num_blks = 1;
8002 +                       next = eablk = startblk - 1;
8003 +
8004 +                       while (eablk >= end) {
8005 +                               if (rgd ==
8006 +                                   gfs_blk2rgrpd(sdp, gfs64_to_cpu(*eablk))) {
8007 +                                       if (eablk != next) {
8008 +                                               temp = *eablk;
8009 +                                               *eablk = *next;
8010 +                                               *next = temp;
8011 +                                       }
8012 +                                       num_blks++;
8013 +                                       next--;
8014 +                               }
8015 +                               eablk--;
8016 +                       }
8017 +
8018 +                       err =
8019 +                           gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
8020 +                                             &rgd_gh);
8021 +                       if (err)
8022 +                               goto out_rindex_release;
8023 +
8024 +                       /* Trans may require:
8025 +                          One block for the RG header. One block for each block from this
8026 +                          resource group. One block for the indirect ea block,
8027 +                          One block for the quote change */
8028 +
8029 +                       err =
8030 +                           gfs_trans_begin(sdp, 3 + num_blks,
8031 +                                           1);
8032 +                       if (err)
8033 +                               goto out_gunlock_rg;
8034 +
8035 +                       gfs_trans_add_bh(ip->i_gl, dibh);
8036 +
8037 +                       while (startblk > next) {
8038 +                               gfs_metafree(ip, gfs64_to_cpu(*startblk), 1);
8039 +                               ip->i_di.di_blocks--;
8040 +                               *startblk = 0;
8041 +                               startblk--;
8042 +                       }
8043 +
8044 +                       gfs_trans_add_bh(ip->i_gl, indbh);
8045 +                       gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8046 +
8047 +                       gfs_trans_end(sdp);
8048 +
8049 +                       gfs_glock_dq_uninit(&rgd_gh);
8050 +               }
8051 +
8052 +               brelse(indbh);
8053 +               indbh = NULL;
8054 +       } else {
8055 +               err = erase_ea_data_ptrs(sdp, ip, dibh, ip->i_di.di_eattr);
8056 +               if (err)
8057 +                       goto out_rindex_release;
8058 +       }
8059 +
8060 +       rgd = gfs_blk2rgrpd(sdp, ip->i_di.di_eattr);
8061 +       GFS_ASSERT_INODE(rgd, ip,
8062 +                        printk("block = %" PRIu64 "\n", ip->i_di.di_eattr);
8063 +           );
8064 +
8065 +       err = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
8066 +       if (err)
8067 +               goto out_rindex_release;
8068 +
8069 +       err = gfs_trans_begin(sdp, 3, 1);
8070 +       if (err)
8071 +               goto out_gunlock_rg;
8072 +
8073 +       gfs_metafree(ip, ip->i_di.di_eattr, 1);
8074 +
8075 +       ip->i_di.di_blocks--;
8076 +       ip->i_di.di_eattr = 0;
8077 +
8078 +       gfs_trans_add_bh(ip->i_gl, dibh);
8079 +       gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8080 +
8081 +       gfs_trans_end(sdp);
8082 +
8083 +      out_gunlock_rg:
8084 +       gfs_glock_dq_uninit(&rgd_gh);
8085 +
8086 +      out_indbh:
8087 +       if (indbh)
8088 +               brelse(indbh);
8089 +
8090 +      out_dibh:
8091 +       brelse(dibh);
8092 +
8093 +      out_rindex_release:
8094 +       gfs_glock_dq_uninit(&ri_gh);
8095 +
8096 +      out_unhold_q:
8097 +       gfs_quota_unhold_m(ip);
8098 +
8099 +      out_alloc:
8100 +       gfs_alloc_put(ip);
8101 +
8102 +      out:
8103 +
8104 +       return err;
8105 +}
8106 +
8107 +/**
8108 + * functionname - summary
8109 + * @param1: description
8110 + * @param2: description
8111 + * @param3: description
8112 + *
8113 + * Function description
8114 + *
8115 + * Returns: what is returned
8116 + */
8117 +
8118 +static void
8119 +remove_ea(struct gfs_inode *ip, struct gfs_ea_header *ea,
8120 +         struct gfs_ea_header *prev)
8121 +{
8122 +       uint64_t *datablk;
8123 +       int i;
8124 +
8125 +       if (GFS_EA_IS_UNSTUFFED(ea)) {
8126 +               datablk = GFS_EA_DATA_PTRS(ea);
8127 +               for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) {
8128 +                       gfs_metafree(ip, gfs64_to_cpu(*datablk), 1);
8129 +                       ip->i_di.di_blocks--;
8130 +               }
8131 +       }
8132 +
8133 +       ea->ea_type = GFS_EATYPE_UNUSED;
8134 +       ea->ea_num_ptrs = 0;
8135 +
8136 +       if (prev && prev != ea) {
8137 +               prev->ea_rec_len =
8138 +                   cpu_to_gfs32(GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea));
8139 +               if (GFS_EA_IS_LAST(ea))
8140 +                       prev->ea_flags |= GFS_EAFLAG_LAST;
8141 +       }
8142 +}
8143 +
8144 +int
8145 +init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip,
8146 +                    struct gfs_easet_io *req)
8147 +{
8148 +       int err;
8149 +       struct buffer_head *bh;
8150 +       struct gfs_sbd *sdp = ip->i_sbd;
8151 +       struct gfs_ea_header *ea;
8152 +
8153 +       err = gfs_metaalloc(dip, &ip->i_di.di_eattr);
8154 +       if (err)
8155 +               goto out;
8156 +
8157 +       err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
8158 +                       DIO_NEW | DIO_START | DIO_WAIT, &bh);
8159 +       if (err)
8160 +               goto out;
8161 +
8162 +       gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA);
8163 +
8164 +       ip->i_di.di_blocks++;
8165 +
8166 +       ea = GFS_FIRST_EA(bh);
8167 +       ea->ea_flags = GFS_EAFLAG_LAST;
8168 +       ea->ea_rec_len =
8169 +           cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
8170 +       ea->ea_num_ptrs = 0;
8171 +       ea->ea_type = GFS_EATYPE_UNUSED;
8172 +       err = write_ea(sdp, dip, ip, ea, req);
8173 +       if (err)
8174 +               goto out_drelse;
8175 +
8176 +       gfs_trans_add_bh(ip->i_gl, bh);
8177 +
8178 +      out_drelse:
8179 +       brelse(bh);
8180 +
8181 +      out:
8182 +       return err;
8183 +}
8184 +
8185 +int
8186 +do_init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
8187 +             struct gfs_easet_io *req)
8188 +{
8189 +       int err;
8190 +       struct buffer_head *bh;
8191 +       struct gfs_ea_header *ea;
8192 +
8193 +       bh = alloc_eattr_blk(sdp, ip, ip, &ip->i_di.di_eattr);
8194 +       if (bh) {
8195 +               ea = GFS_FIRST_EA(bh);
8196 +               err = write_ea(sdp, ip, ip, ea, req);
8197 +               brelse(bh);
8198 +       } else
8199 +               err = -EIO;
8200 +
8201 +       return err;
8202 +}
8203 +
8204 +/**
8205 + * init_eattr - initializes a new eattr block
8206 + */
8207 +
8208 +static int
8209 +init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req)
8210 +{
8211 +       int err = 0;
8212 +       struct gfs_alloc *al;
8213 +       uint32_t ea_metablks;
8214 +       struct buffer_head *dibh;
8215 +       struct posix_acl *acl = NULL;
8216 +       uint32_t avail_size =
8217 +           sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8218 +
8219 +       ea_metablks =
8220 +           GFS_EAREQ_IS_STUFFED(req,
8221 +                                avail_size) ? 1 : (1 +
8222 +                                                   GFS_EADATA_NUM_PTRS(req->
8223 +                                                                       es_data_len,
8224 +                                                                       avail_size));
8225 +
8226 +       if (IS_ACCESS_ACL(req->es_name, req->es_name_len)){
8227 +                acl = posix_acl_from_xattr(req->es_data, req->es_data_len);
8228 +                if (IS_ERR(acl)) {
8229 +                        err = PTR_ERR(acl);
8230 +                        goto out;
8231 +                }
8232 +        }
8233 +
8234 +       al = gfs_alloc_get(ip);
8235 +
8236 +       err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
8237 +       if (err)
8238 +               goto out_alloc;
8239 +
8240 +       al->al_requested_meta = ea_metablks;
8241 +
8242 +       err = gfs_inplace_reserve(ip);
8243 +       if (err)
8244 +               goto out_gunlock_q;
8245 +
8246 +       err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
8247 +       if (err)
8248 +               goto out_ipres;
8249 +
8250 +       err = gfs_get_inode_buffer(ip, &dibh);
8251 +       if (err)
8252 +               goto out_ipres;
8253 +
8254 +       /* Trans may require:
8255 +          A modified dinode, multiple EA metadata blocks, and all blocks for a RG
8256 +          bitmap */
8257 +
8258 +       err =
8259 +           gfs_trans_begin(sdp,
8260 +                           1 + ea_metablks + al->al_rgd->rd_ri.ri_length, 1);
8261 +       if (err)
8262 +               goto out_dibh;
8263 +
8264 +       err = do_init_eattr(sdp, ip, req);
8265 +       if (err)
8266 +               goto out_end_trans;
8267 +
8268 +       if (acl)
8269 +                gfs_acl_set_mode(ip, acl);
8270 +
8271 +       gfs_trans_add_bh(ip->i_gl, dibh);
8272 +       gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8273 +
8274 +      out_end_trans:
8275 +       gfs_trans_end(sdp);
8276 +
8277 +      out_dibh:
8278 +       brelse(dibh);
8279 +
8280 +      out_ipres:
8281 +       gfs_inplace_release(ip);
8282 +
8283 +      out_gunlock_q:
8284 +       gfs_quota_unlock_m(ip);
8285 +
8286 +      out_alloc:
8287 +       gfs_alloc_put(ip);
8288 +       posix_acl_release(acl);
8289 +
8290 +      out:
8291 +       return err;
8292 +}
8293 +
8294 +/**
8295 + * alloc_eattr_blk - allocates a new block for extended attributes.
8296 + * @sdp: A pointer to the superblock
8297 + * @alloc_ip: A pointer to the inode that has reserved the blocks for
8298 + *            allocation
8299 + * @ip: A pointer to the inode that's getting extended attributes
8300 + * @block: the block allocated
8301 + *
8302 + * Returns: the buffer head on success, NULL on failure
8303 + */
8304 +
8305 +static struct buffer_head *
8306 +alloc_eattr_blk(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip,
8307 +               struct gfs_inode *ip, uint64_t * block)
8308 +{
8309 +       int err = 0;
8310 +       struct buffer_head *bh = NULL;
8311 +       struct gfs_ea_header *ea;
8312 +
8313 +       err = gfs_metaalloc(alloc_ip, block);
8314 +       if (err)
8315 +               goto out;
8316 +
8317 +       err =
8318 +           gfs_dread(sdp, *block, ip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh);
8319 +       if (err)
8320 +               goto out;
8321 +
8322 +       gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA);
8323 +
8324 +       ip->i_di.di_blocks++;
8325 +
8326 +       ea = GFS_FIRST_EA(bh);
8327 +       ea->ea_flags = GFS_EAFLAG_LAST;
8328 +       ea->ea_rec_len =
8329 +           cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
8330 +       ea->ea_num_ptrs = 0;
8331 +       ea->ea_type = GFS_EATYPE_UNUSED;
8332 +
8333 +       gfs_trans_add_bh(ip->i_gl, bh);
8334 +
8335 +      out:
8336 +
8337 +       return bh;
8338 +}
8339 +
8340 +/**
8341 + * functionname - summary
8342 + * @param1: description
8343 + * @param2: description
8344 + * @param3: description
8345 + *
8346 + * Function description
8347 + *
8348 + * Returns: what is returned
8349 + */
8350 +
8351 +static int
8352 +list_direct_ea(struct gfs_sbd *sdp, struct gfs_inode *ip,
8353 +              struct buffer_head *bh, struct gfs_eaget_io *req,
8354 +              gfs_ea_copy_fn_t copy_fn, uint32_t * size)
8355 +{
8356 +       int err = 0;
8357 +       struct gfs_ea_header *ea;
8358 +       char buf[256];
8359 +       char *ptr;
8360 +
8361 +       gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
8362 +
8363 +       ea = (struct gfs_ea_header *) ((bh)->b_data +
8364 +                                      sizeof (struct gfs_meta_header));
8365 +       if (ea->ea_type == GFS_EATYPE_UNUSED) {
8366 +               if (GFS_EA_IS_LAST(ea))
8367 +                       goto out;
8368 +               else
8369 +                       ea = GFS_EA_NEXT(ea);
8370 +       }
8371 +
8372 +       while (1) {
8373 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
8374 +
8375 +               if (req->eg_data_len) {
8376 +                       if (*size > req->eg_data_len) {
8377 +                               err = -ERANGE;
8378 +                               break;
8379 +                       }
8380 +                       ptr = buf;
8381 +
8382 +                       GFS_ASSERT_INODE(GFS_EATYPE_VALID(ea->ea_type), ip,);
8383 +                       if (ea->ea_type == GFS_EATYPE_USR) {
8384 +                               memcpy(ptr, "user.", 5);
8385 +                               ptr += 5;
8386 +                       } else {
8387 +                               memcpy(ptr, "system.", 7);
8388 +                               ptr += 7;
8389 +                       }
8390 +                       memcpy(ptr, GFS_EA_NAME(ea), ea->ea_name_len);
8391 +                       ptr += ea->ea_name_len;
8392 +                       *ptr = 0;
8393 +                       err =
8394 +                           copy_fn(req->eg_data + *size, buf,
8395 +                                   GFS_EA_STRLEN(ea));
8396 +                       if (err)
8397 +                               break;
8398 +               }
8399 +
8400 +               *size = *size + GFS_EA_STRLEN(ea);
8401 +
8402 +               if (GFS_EA_IS_LAST(ea))
8403 +                       break;
8404 +               ea = GFS_EA_NEXT(ea);
8405 +       }
8406 +
8407 +      out:
8408 +
8409 +       return err;
8410 +}
8411 +
8412 +/**
8413 + * functionname - summary
8414 + * @param1: description
8415 + * @param2: description
8416 + * @param3: description
8417 + *
8418 + * Function description
8419 + *
8420 + * Returns: what is returned
8421 + */
8422 +
8423 +static int
8424 +list_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
8425 +       gfs_ea_copy_fn_t copy_fn)
8426 +{
8427 +       int err;
8428 +       struct buffer_head *bh, *eabh;
8429 +       uint64_t *eablk, *end;
8430 +       uint32_t size = 0;
8431 +
8432 +       err =
8433 +           gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
8434 +                     &bh);
8435 +       if (err)
8436 +               goto out;
8437 +
8438 +       if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
8439 +               gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
8440 +               eablk =
8441 +                   (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
8442 +               end =
8443 +                   eablk +
8444 +                   ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
8445 +
8446 +               while (*eablk && eablk < end) {
8447 +                       err =
8448 +                           gfs_dread(sdp, gfs64_to_cpu(*eablk), ip->i_gl,
8449 +                                     DIO_START | DIO_WAIT, &eabh);
8450 +                       if (err)
8451 +                               goto out_drelse;
8452 +                       err = list_direct_ea(sdp, ip, eabh, req, copy_fn, &size);
8453 +                       brelse(eabh);
8454 +                       if (err)
8455 +                               goto out_drelse;
8456 +                       eablk++;
8457 +               }
8458 +       } else {
8459 +               err = list_direct_ea(sdp, ip, bh, req, copy_fn, &size);
8460 +               if (err)
8461 +                       goto out_drelse;
8462 +       }
8463 +
8464 +       if (!err)
8465 +               err = size;
8466 +
8467 +      out_drelse:
8468 +       brelse(bh);
8469 +
8470 +      out:
8471 +
8472 +       return err;
8473 +}
8474 +
8475 +/**
8476 + * gfs_get_eattr - read an extended attribute, or a list of ea names
8477 + * @sdp: pointer to the superblock
8478 + * @ip: pointer to the inode for the target file
8479 + * @req: the request information
8480 + * @copy_fn: the function to use to do the actual copying
8481 + *
8482 + * Returns: actual size of data on success, -EXXX on error
8483 + */
8484 +int
8485 +gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
8486 +             struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn)
8487 +{
8488 +       struct gfs_holder i_gh;
8489 +       int err;
8490 +
8491 +       if (req->eg_name) {
8492 +               err = gfs_ea_read_permission(req, ip);
8493 +               if (err)
8494 +                       goto out;
8495 +       }
8496 +
8497 +       /*  This seems to be a read.  Are we sure we don't want to acquire the lock in LM_ST_SHARED?  */
8498 +
8499 +       err = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
8500 +       if (err)
8501 +               goto out;
8502 +
8503 +       if (ip->i_di.di_eattr == 0) {
8504 +               if (!req->eg_name) {
8505 +                       if (!req->eg_data_len && req->eg_len) {
8506 +                               uint32_t no_data = 0;
8507 +
8508 +                               err =
8509 +                                   copy_fn(req->eg_len, &no_data,
8510 +                                           sizeof (uint32_t));
8511 +                       }
8512 +               } else
8513 +                       err = -ENODATA;
8514 +
8515 +               goto out_gunlock;
8516 +       }
8517 +
8518 +       if (req->eg_name)
8519 +               err = get_ea(sdp, ip, req, copy_fn);
8520 +       else
8521 +               err = list_ea(sdp, ip, req, copy_fn);
8522 +
8523 +      out_gunlock:
8524 +       gfs_glock_dq_uninit(&i_gh);
8525 +
8526 +      out:
8527 +
8528 +       return err;
8529 +}
8530 +
8531 +static int
8532 +do_set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req,
8533 +         struct gfs_ea_location location)
8534 +{
8535 +       int err = 0;
8536 +       int req_size;
8537 +       uint32_t avail_size =
8538 +           sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8539 +       struct gfs_ea_location space;
8540 +
8541 +       req_size = get_req_size(req, avail_size);
8542 +
8543 +       if (location.ea) {
8544 +               struct gfs_ea_header *new_space;
8545 +               if (req->es_cmd == GFS_EACMD_REMOVE) {
8546 +                       remove_ea(ip, location.ea, location.prev);
8547 +                       gfs_trans_add_bh(ip->i_gl, location.bh);
8548 +                       goto out;
8549 +               }
8550 +               if (can_replace(location.ea, req, avail_size)) {
8551 +                       err = replace_ea(sdp, ip, location.ea, req);
8552 +                       if (!err)
8553 +                               gfs_trans_add_bh(ip->i_gl, location.bh);
8554 +                       goto out;
8555 +               }
8556 +               /*
8557 +                * This part is kind of confusing.  If the inode has direct EAs
8558 +                * Then adding another EA can't run it out of space, so it is safe to
8559 +                * delete the EA before looking for space.  If the inode has indirect
8560 +                * EAs, there may not be enough space left, so first you check for space
8561 +                * and they you delete the EA.
8562 +                */
8563 +               if ((ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) == 0) {
8564 +                       remove_ea(ip, location.ea, location.prev);
8565 +                       err = find_space(ip, req_size, req->es_type, &space);
8566 +                       if (err)
8567 +                               goto out;
8568 +                       new_space = prep_ea(space.ea);
8569 +                       err = write_ea(sdp, ip, ip, new_space, req);
8570 +                       if (!err) {
8571 +                               gfs_trans_add_bh(ip->i_gl, location.bh);
8572 +                               gfs_trans_add_bh(ip->i_gl, space.bh);
8573 +                       }
8574 +                       brelse(space.bh);
8575 +                       goto out;
8576 +               }
8577 +               if (can_replace_in_block(ip, req_size, location, &new_space)) {
8578 +                       remove_ea(ip, location.ea, location.prev);
8579 +                       new_space = prep_ea(new_space);
8580 +                       err = write_ea(sdp, ip, ip, new_space, req);
8581 +                       if (!err)
8582 +                               gfs_trans_add_bh(ip->i_gl, location.bh);
8583 +                       goto out;
8584 +               }
8585 +               err = find_space(ip, req_size, req->es_type, &space);
8586 +               if (err)
8587 +                       /* You can return a non IO error here.  If there is no space left,
8588 +                        * you can return -ENOSPC. So you must not have added a buffer to
8589 +                        * the transaction yet.
8590 +                        */
8591 +                       goto out;
8592 +               remove_ea(ip, location.ea, location.prev);
8593 +               new_space = prep_ea(space.ea);
8594 +               err = write_ea(sdp, ip, ip, new_space, req);
8595 +               if (!err) {
8596 +                       gfs_trans_add_bh(ip->i_gl, location.bh);
8597 +                       gfs_trans_add_bh(ip->i_gl, space.bh);
8598 +               }
8599 +               brelse(space.bh);
8600 +               goto out;
8601 +       }
8602 +       err = find_space(ip, req_size, req->es_type, &space);
8603 +       if (err)
8604 +               /* you can also get -ENOSPC here */
8605 +               goto out;
8606 +       space.ea = prep_ea(space.ea);
8607 +       err = write_ea(sdp, ip, ip, space.ea, req);
8608 +       if (!err)
8609 +               gfs_trans_add_bh(ip->i_gl, space.bh);
8610 +       brelse(space.bh);
8611 +
8612 +      out:
8613 +       return err;
8614 +}
8615 +
8616 +static int
8617 +set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req,
8618 +       struct gfs_ea_location location)
8619 +{
8620 +       int err;
8621 +       struct gfs_alloc *al;
8622 +       struct gfs_rgrpd *rgd = NULL;
8623 +       struct buffer_head *dibh;
8624 +       uint32_t avail_size =
8625 +           sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8626 +       int unstuffed_ea_blks = 0;
8627 +       struct gfs_holder ri_gh, rgd_gh;
8628 +       struct posix_acl *acl = NULL;
8629 +
8630 +       if (IS_ACCESS_ACL(req->es_name, req->es_name_len) && req->es_data){
8631 +                acl = posix_acl_from_xattr(req->es_data, req->es_data_len);
8632 +                if (IS_ERR(acl)) {
8633 +                        err = PTR_ERR(acl);
8634 +                        goto out;
8635 +                }
8636 +        }
8637 +
8638 +       err = gfs_get_inode_buffer(ip, &dibh);
8639 +       if (err)
8640 +               goto out_acl;
8641 +       al = gfs_alloc_get(ip);
8642 +
8643 +       err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
8644 +       if (err)
8645 +               goto out_alloc;
8646 +
8647 +       /*
8648 +        * worst case, you need to switch from direct to indirect, which can
8649 +        * take up to 3 new blocks, and you need to create enough unstuffed data
8650 +        * blocks to hold all the data
8651 +        */
8652 +       al->al_requested_meta = 3 + GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size);
8653 +
8654 +       err = gfs_inplace_reserve(ip);
8655 +       if (err)
8656 +               goto out_lock_quota;
8657 +
8658 +       err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
8659 +       if (err)
8660 +               goto out_reserve;
8661 +
8662 +       if (location.ea && GFS_EA_IS_UNSTUFFED(location.ea)) {
8663 +               /*
8664 +                * If there is an EA, we might need to delete it.
8665 +                * Since all unstuffed data blocks are added at the same time,
8666 +                * they are all from the same resource group.
8667 +                */
8668 +               err = gfs_rindex_hold(sdp, &ri_gh);
8669 +               if (err)
8670 +                       goto out_reserve;
8671 +               rgd =
8672 +                   gfs_blk2rgrpd(sdp,
8673 +                                 gfs64_to_cpu(*GFS_EA_DATA_PTRS(location.ea)));
8674 +               GFS_ASSERT_INODE(rgd, ip,
8675 +                                printk("block = %" PRIu64 "\n",
8676 +                                       gfs64_to_cpu(*GFS_EA_DATA_PTRS
8677 +                                                    (location.ea)));
8678 +                   );
8679 +               err =
8680 +                   gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
8681 +               if (err)
8682 +                       goto out_rindex;
8683 +               unstuffed_ea_blks = location.ea->ea_num_ptrs;
8684 +       }
8685 +
8686 +       /*
8687 +        * The transaction may require:
8688 +        * Modifying the dinode block, Modifying the indirect ea block,
8689 +        * modifying an ea block, all the allocation blocks, all the blocks for
8690 +        * a RG bitmap,  the RG header block, a RG block for each unstuffed data
8691 +        * block you might be deleting.
8692 +        */
8693 +       err = gfs_trans_begin(sdp, 4 + al->al_requested_meta +
8694 +                             al->al_rgd->rd_ri.ri_length + unstuffed_ea_blks,
8695 +                             1);
8696 +       if (err)
8697 +               goto out_lock_rg;
8698 +
8699 +       err = do_set_ea(sdp, ip, req, location);
8700 +
8701 +       if (!err) {
8702 +               if (acl)
8703 +                       gfs_acl_set_mode(ip, acl);
8704 +               gfs_trans_add_bh(ip->i_gl, dibh);
8705 +               gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8706 +       }
8707 +
8708 +       gfs_trans_end(sdp);
8709 +
8710 +      out_lock_rg:
8711 +       if (rgd)
8712 +               gfs_glock_dq_uninit(&rgd_gh);
8713 +
8714 +      out_rindex:
8715 +       if (rgd)
8716 +               gfs_glock_dq_uninit(&ri_gh);
8717 +
8718 +      out_reserve:
8719 +       gfs_inplace_release(ip);
8720 +
8721 +      out_lock_quota:
8722 +       gfs_quota_unlock_m(ip);
8723 +
8724 +      out_alloc:
8725 +       gfs_alloc_put(ip);
8726 +       brelse(dibh);
8727 +
8728 +      out_acl:
8729 +       posix_acl_release(acl);
8730 +
8731 +      out:
8732 +       return err;
8733 +}
8734 +
8735 +/**
8736 + * gfs_set_eattr - sets (or creates or replaces) an extended attribute
8737 + * @sdp: pointer to the superblock
8738 + * @ip: pointer to the inode of the target file
8739 + * @req: request information
8740 + *
8741 + * Returns: 0 on success -EXXX on error
8742 + */
8743 +int
8744 +gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
8745 +             struct gfs_easet_io *req)
8746 +{
8747 +       struct gfs_holder i_gh;
8748 +       int err;
8749 +       uint32_t req_size;
8750 +       uint32_t avail_size =
8751 +           sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8752 +       struct gfs_ea_location location;
8753 +
8754 +       if (!GFS_EACMD_VALID(req->es_cmd)) {
8755 +               err = -EOPNOTSUPP;
8756 +               goto out;
8757 +       }
8758 +
8759 +       if (strlen(req->es_name) == 0) {
8760 +               err = -EINVAL;
8761 +               goto out;
8762 +       }
8763 +
8764 +       err = gfs_ea_write_permission(req, ip);
8765 +       if (err)
8766 +               goto out;
8767 +
8768 +       if ((req_size = get_req_size(req, avail_size)) > avail_size) {
8769 +               /* This can only happen with 512 byte blocks */
8770 +               err = -ERANGE;
8771 +               goto out;
8772 +       }
8773 +       err = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
8774 +       if (err)
8775 +               goto out;
8776 +
8777 +       if (ip->i_di.di_eattr == 0) {
8778 +               if (req->es_cmd == GFS_EACMD_REPLACE
8779 +                   || req->es_cmd == GFS_EACMD_REMOVE) {
8780 +                       err = -ENODATA;
8781 +                       goto out_gunlock;
8782 +               }
8783 +               err = init_eattr(sdp, ip, req);
8784 +               goto out_gunlock;
8785 +       }
8786 +
8787 +       err = find_eattr(ip, req->es_name, req->es_name_len, req->es_type,
8788 +                        &location);
8789 +       if (err < 0)
8790 +               goto out_gunlock;
8791 +       if (err == 0 && (req->es_cmd == GFS_EACMD_REPLACE ||
8792 +                        req->es_cmd == GFS_EACMD_REMOVE)) {
8793 +               err = -ENODATA;
8794 +               goto out_relse;
8795 +       }
8796 +       err = set_ea(sdp, ip, req, location);
8797 +
8798 +      out_relse:
8799 +       if (location.bh)
8800 +               brelse(location.bh);
8801 +
8802 +      out_gunlock:
8803 +       gfs_glock_dq_uninit(&i_gh);
8804 +
8805 +      out:
8806 +       return err;
8807 +}
8808 +
8809 +/**
8810 + * gfs_set_eattr_ioctl - creates, modifies, or removes an extended attribute.
8811 + * @sdp: pointer to the superblock
8812 + * @ip: a pointer to the gfs inode for the file
8813 + * @arg: a pointer to gfs_set_eattr_io_t struct with the request
8814 + *
8815 + * Notes: ioctl wrapper for gfs_set_eattr
8816 + * Returns: 0 on success, -EXXX or error
8817 + */
8818 +
8819 +int
8820 +gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg)
8821 +{
8822 +       struct gfs_easet_io req;
8823 +       int err = 0;
8824 +       char *name = NULL;
8825 +       char *data = NULL;
8826 +
8827 +       if (copy_from_user(&req, arg, sizeof (struct gfs_easet_io))) {
8828 +               err = -EFAULT;
8829 +               goto out;
8830 +       }
8831 +
8832 +       name = gmalloc(req.es_name_len);
8833 +
8834 +       if (req.es_data) {
8835 +               data = gmalloc(req.es_data_len);
8836 +
8837 +               if (copy_from_user(data, req.es_data, req.es_data_len)) {
8838 +                       err = -EFAULT;
8839 +                       goto out_free;
8840 +               }
8841 +       }
8842 +       if (copy_from_user(name, req.es_name, req.es_name_len)) {
8843 +               err = -EFAULT;
8844 +               goto out_free;
8845 +       }
8846 +       req.es_data = data;
8847 +       req.es_name = name;
8848 +       err = gfs_set_eattr(sdp, ip, &req);
8849 +
8850 +      out_free:
8851 +       kfree(name);
8852 +       if (data)
8853 +               kfree(data);
8854 +
8855 +      out:
8856 +       return err;
8857 +}
8858 +
8859 +/**
8860 + * gfs_get_eattr_ioctl - gets the value for the requested attribute name,
8861 + *                       or a list of all the extended attribute names.
8862 + * @sdp: pointer to the superblock
8863 + * @ip: a pointer to the inode for the file
8864 + * @arg: a pointer to the struct gfs_eaget_io struct holding the request
8865 + *
8866 + * Notes: ioctl wrapper for the gfs_get_eattr function
8867 + * Returns: 0 on success, -EXXX on error.
8868 + */
8869 +
8870 +int
8871 +gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg)
8872 +{
8873 +       struct gfs_eaget_io req;
8874 +       int result = 0;
8875 +       char *name = NULL;
8876 +       uint32_t size;
8877 +
8878 +       if (copy_from_user(&req, arg, sizeof (struct gfs_eaget_io))) {
8879 +               result = -EFAULT;
8880 +               goto out;
8881 +       }
8882 +
8883 +       if (req.eg_name) {
8884 +               name = gmalloc(req.eg_name_len);
8885 +
8886 +               if (copy_from_user(name, req.eg_name, req.eg_name_len)) {
8887 +                       result = -EFAULT;
8888 +                       goto out_free;
8889 +               }
8890 +               req.eg_name = name;
8891 +       }
8892 +       result = gfs_get_eattr(sdp, ip, &req, gfs_ea_copy_to_user);
8893 +
8894 +      out_free:
8895 +       if (name)
8896 +               kfree(name);
8897 +
8898 +       if (result >= 0) {
8899 +               size = result;
8900 +               result =
8901 +                   gfs_ea_copy_to_user(req.eg_len, &size, sizeof(uint32_t));
8902 +       }
8903 +
8904 +      out:
8905 +
8906 +       return result;
8907 +}
8908 +
8909 +/**
8910 + * functionname - summary
8911 + * @param1: description
8912 + * @param2: description
8913 + * @param3: description
8914 + *
8915 + * Function description
8916 + *
8917 + * Returns: what is returned
8918 + */
8919 +
8920 +static int
8921 +gfs_get_direct_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub,
8922 +                         uint64_t blk)
8923 +{
8924 +       struct gfs_sbd *sdp = ip->i_sbd;
8925 +       struct buffer_head *databh, *bh;
8926 +       struct gfs_ea_header *ea;
8927 +       uint64_t *datablk;
8928 +       unsigned int i;
8929 +       int error;
8930 +
8931 +       error = gfs_dread(sdp, blk, ip->i_gl, DIO_START | DIO_WAIT, &bh);
8932 +       if (error)
8933 +               goto out;
8934 +
8935 +       error = gfs_add_bh_to_ub(ub, bh);
8936 +
8937 +       ea = (struct gfs_ea_header *) ((bh)->b_data +
8938 +                                      sizeof (struct gfs_meta_header));
8939 +       for (;;) {
8940 +               GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
8941 +
8942 +               datablk = GFS_EA_DATA_PTRS(ea);
8943 +
8944 +               for (i = 0; i < ea->ea_num_ptrs; i++) {
8945 +                       error =
8946 +                           gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl,
8947 +                                     DIO_START | DIO_WAIT, &databh);
8948 +                       if (error)
8949 +                               goto out_relse;
8950 +
8951 +                       error = gfs_add_bh_to_ub(ub, databh);
8952 +
8953 +                       brelse(databh);
8954 +
8955 +                       if (error)
8956 +                               goto out_relse;
8957 +
8958 +                       datablk++;
8959 +               }
8960 +
8961 +               if (GFS_EA_IS_LAST(ea))
8962 +                       break;
8963 +               ea = GFS_EA_NEXT(ea);
8964 +       }
8965 +
8966 +      out_relse:
8967 +       brelse(bh);
8968 +
8969 +      out:
8970 +
8971 +       return error;
8972 +}
8973 +
8974 +/**
8975 + * gfs_get_eattr_meta - return all the eattr blocks of a file
8976 + * @dip: the directory
8977 + * @ub: the structure representing the user buffer to copy to
8978 + *
8979 + * Returns: 0 on success, -EXXX on failure
8980 + */
8981 +
8982 +int
8983 +gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub)
8984 +{
8985 +       struct gfs_sbd *sdp = ip->i_sbd;
8986 +       struct buffer_head *bh;
8987 +       int error;
8988 +       uint64_t *eablk, *end;
8989 +
8990 +       if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
8991 +               error =
8992 +                   gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
8993 +                             DIO_WAIT | DIO_START, &bh);
8994 +               if (error)
8995 +                       goto out;
8996 +
8997 +               error = gfs_add_bh_to_ub(ub, bh);
8998 +
8999 +               eablk =
9000 +                   (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
9001 +               end =
9002 +                   eablk +
9003 +                   ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
9004 +
9005 +               while (*eablk && eablk < end) {
9006 +                       error =
9007 +                           gfs_get_direct_eattr_meta(ip, ub,
9008 +                                                     gfs64_to_cpu(*eablk));
9009 +                       if (error) {
9010 +                               brelse(bh);
9011 +                               goto out;
9012 +                       }
9013 +                       eablk++;
9014 +               }
9015 +               brelse(bh);
9016 +       } else
9017 +               error = gfs_get_direct_eattr_meta(ip, ub, ip->i_di.di_eattr);
9018 +
9019 +      out:
9020 +
9021 +       return error;
9022 +}
9023 diff -urN linux-orig/fs/gfs/eattr.h linux-patched/fs/gfs/eattr.h
9024 --- linux-orig/fs/gfs/eattr.h   1969-12-31 18:00:00.000000000 -0600
9025 +++ linux-patched/fs/gfs/eattr.h        2004-06-30 13:27:49.338712290 -0500
9026 @@ -0,0 +1,90 @@
9027 +/******************************************************************************
9028 +*******************************************************************************
9029 +**
9030 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9031 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9032 +**
9033 +**  This copyrighted material is made available to anyone wishing to use,
9034 +**  modify, copy, or redistribute it subject to the terms and conditions
9035 +**  of the GNU General Public License v.2.
9036 +**
9037 +*******************************************************************************
9038 +******************************************************************************/
9039 +
9040 +#ifndef __EATTR_DOT_H__
9041 +#define __EATTR_DOT_H__
9042 +
9043 +#define GFS_EA_MAY_WRITE 1
9044 +#define GFS_EA_MAY_READ 2
9045 +
9046 +#define GFS_EA_DATA_LEN(x) gfs32_to_cpu((x)->ea_data_len)
9047 +#define GFS_EA_IS_UNSTUFFED(x) ((x)->ea_num_ptrs)
9048 +#define GFS_EA_DATA(x) ((char *)(x) + sizeof(struct gfs_ea_header) + (x)->ea_name_len)
9049 +
9050 +struct gfs_ea_location {
9051 +       struct buffer_head *bh;
9052 +       struct gfs_ea_header *ea;
9053 +       struct gfs_ea_header *prev;
9054 +};
9055 +
9056 +#define GFS_POSIX_ACL_ACCESS  "posix_acl_access"
9057 +#define GFS_POSIX_ACL_ACCESS_LEN 16
9058 +#define GFS_POSIX_ACL_DEFAULT "posix_acl_default"
9059 +#define GFS_POSIX_ACL_DEFAULT_LEN 17
9060 +
9061 +#define IS_ACCESS_ACL(name, len) \
9062 +        ((len) == GFS_POSIX_ACL_ACCESS_LEN && \
9063 +         !memcmp(GFS_POSIX_ACL_ACCESS, (name), (len)))
9064 +
9065 +#define IS_DEFAULT_ACL(name, len) \
9066 +        ((len) == GFS_POSIX_ACL_DEFAULT_LEN && \
9067 +         !memcmp(GFS_POSIX_ACL_DEFAULT, (name), (len)))
9068 +
9069 +#define GFS_MAX_EA_ACL_BLKS 66 /* 65 for unstuffed data blocks, 1 for the ea
9070 +                                  itself */
9071 +
9072 +typedef int (*gfs_ea_copy_fn_t) (void *dest, void *src, unsigned long size);
9073 +
9074 +int gfs_ea_memcpy(void *dest, void *src, unsigned long size);
9075 +int gfs_ea_copy_to_user(void *dest, void *src, unsigned long size);
9076 +
9077 +int find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size,
9078 +                  struct gfs_ea_location *avail);
9079 +
9080 +struct gfs_ea_header *prep_ea(struct gfs_ea_header *ea);
9081 +
9082 +int write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip,
9083 +            struct gfs_inode *ip, struct gfs_ea_header *ea,
9084 +            struct gfs_easet_io *req);
9085 +
9086 +int gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
9087 +                 struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn);
9088 +int gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
9089 +                 struct gfs_easet_io *req);
9090 +
9091 +int gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg);
9092 +int gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg);
9093 +
9094 +int gfs_ea_dealloc(struct gfs_inode *ip);
9095 +
9096 +int gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
9097 +
9098 +int replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip,
9099 +              struct gfs_ea_header *ea, struct gfs_easet_io *req);
9100 +
9101 +int find_eattr(struct gfs_inode *ip, char *name, int name_len, int type,
9102 +              struct gfs_ea_location *location);
9103 +
9104 +int read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp,
9105 +                  struct gfs_ea_header *ea, uint32_t avail_size,
9106 +                  gfs_ea_copy_fn_t copy_fn);
9107 +
9108 +int get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
9109 +          gfs_ea_copy_fn_t copy_fn);
9110 +
9111 +int init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip,
9112 +                        struct gfs_easet_io *req);
9113 +
9114 +int gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip);
9115 +
9116 +#endif /* __EATTR_DOT_H__ */
9117 diff -urN linux-orig/fs/gfs/file.c linux-patched/fs/gfs/file.c
9118 --- linux-orig/fs/gfs/file.c    1969-12-31 18:00:00.000000000 -0600
9119 +++ linux-patched/fs/gfs/file.c 2004-06-30 13:27:49.339712058 -0500
9120 @@ -0,0 +1,382 @@
9121 +/******************************************************************************
9122 +*******************************************************************************
9123 +**
9124 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9125 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9126 +**
9127 +**  This copyrighted material is made available to anyone wishing to use,
9128 +**  modify, copy, or redistribute it subject to the terms and conditions
9129 +**  of the GNU General Public License v.2.
9130 +**
9131 +*******************************************************************************
9132 +******************************************************************************/
9133 +
9134 +#include <linux/sched.h>
9135 +#include <linux/slab.h>
9136 +#include <linux/smp_lock.h>
9137 +#include <linux/spinlock.h>
9138 +#include <asm/semaphore.h>
9139 +#include <linux/completion.h>
9140 +#include <linux/buffer_head.h>
9141 +#include <asm/uaccess.h>
9142 +
9143 +#include "gfs.h"
9144 +#include "bmap.h"
9145 +#include "dio.h"
9146 +#include "file.h"
9147 +#include "inode.h"
9148 +#include "trans.h"
9149 +
9150 +/**
9151 + * gfs_copy2mem - Trivial copy function for gfs_readi()
9152 + * @bh: The buffer to copy from, or NULL meaning zero the buffer
9153 + * @buf: The buffer to copy/zero
9154 + * @offset: The offset in the buffer to copy from
9155 + * @size: The amount of data to copy/zero
9156 + *
9157 + * Returns: 0 on success, -EXXX on failure
9158 + */
9159 +
9160 +int
9161 +gfs_copy2mem(struct buffer_head *bh, void **buf, unsigned int offset,
9162 +            unsigned int size)
9163 +{
9164 +       char **p = (char **)buf;
9165 +
9166 +       if (bh)
9167 +               memcpy(*p, bh->b_data + offset, size);
9168 +       else
9169 +               memset(*p, 0, size);
9170 +
9171 +       *p += size;
9172 +
9173 +       return 0;
9174 +}
9175 +
9176 +/**
9177 + * gfs_copy2user - Copy data to user space
9178 + * @bh: The buffer
9179 + * @buf: The destination of the data
9180 + * @offset: The offset into the buffer
9181 + * @size: The amount of data to copy
9182 + *
9183 + * Returns: 0 on success, -EXXX on failure
9184 + */
9185 +
9186 +int
9187 +gfs_copy2user(struct buffer_head *bh, void **buf,
9188 +             unsigned int offset, unsigned int size)
9189 +{
9190 +       char **p = (char **)buf;
9191 +       int error;
9192 +
9193 +       if (bh)
9194 +               error = copy_to_user(*p, bh->b_data + offset, size);
9195 +       else
9196 +               error = clear_user(*p, size);
9197 +
9198 +       if (error)
9199 +               error = -EFAULT;
9200 +       else
9201 +               *p += size;
9202 +
9203 +       return error;
9204 +}
9205 +
9206 +/**
9207 + * gfs_readi - Read a file
9208 + * @ip: The GFS Inode
9209 + * @buf: The buffer to place result into
9210 + * @offset: File offset to begin reading from
9211 + * @size: Amount of data to transfer
9212 + * @copy_fn: Function to actually perform the copy
9213 + *
9214 + * The @copy_fn only copies a maximum of a single block at once so
9215 + * we are safe calling it with int arguments. It is done so that
9216 + * we don't needlessly put 64bit arguments on the stack and it
9217 + * also makes the code in the @copy_fn nicer too.
9218 + *
9219 + * Returns: The amount of data actually copied or the error
9220 + */
9221 +
9222 +int
9223 +gfs_readi(struct gfs_inode *ip, void *buf,
9224 +         uint64_t offset, unsigned int size,
9225 +         read_copy_fn_t copy_fn)
9226 +{
9227 +       struct gfs_sbd *sdp = ip->i_sbd;
9228 +       struct buffer_head *bh;
9229 +       uint64_t lblock, dblock;
9230 +       unsigned int o;
9231 +       uint32_t extlen = 0;
9232 +       unsigned int amount;
9233 +       int not_new = 0;
9234 +       int journaled = gfs_is_jdata(ip);
9235 +       int copied = 0;
9236 +       int error = 0;
9237 +
9238 +       if (offset >= ip->i_di.di_size)
9239 +               return 0;
9240 +
9241 +       if ((offset + size) > ip->i_di.di_size)
9242 +               size = ip->i_di.di_size - offset;
9243 +
9244 +       if (!size)
9245 +               return 0;
9246 +
9247 +       if (journaled) {
9248 +               lblock = offset;
9249 +               o = do_div(lblock, sdp->sd_jbsize);
9250 +       } else {
9251 +               lblock = offset >> sdp->sd_sb.sb_bsize_shift;
9252 +               o = offset & (sdp->sd_sb.sb_bsize - 1);
9253 +       }
9254 +
9255 +       if (gfs_is_stuffed(ip))
9256 +               o += sizeof(struct gfs_dinode);
9257 +       else if (journaled)
9258 +               o += sizeof(struct gfs_meta_header);
9259 +
9260 +       while (copied < size) {
9261 +               amount = size - copied;
9262 +               if (amount > sdp->sd_sb.sb_bsize - o)
9263 +                       amount = sdp->sd_sb.sb_bsize - o;
9264 +
9265 +               if (!extlen) {
9266 +                       error = gfs_block_map(ip, lblock, &not_new,
9267 +                                             &dblock, &extlen);
9268 +                       if (error)
9269 +                               goto fail;
9270 +               }
9271 +
9272 +               if (extlen > 1)
9273 +                       gfs_start_ra(ip->i_gl, dblock, extlen);
9274 +
9275 +               if (dblock) {
9276 +                       error = gfs_get_data_buffer(ip, dblock, not_new, &bh);
9277 +                       if (error)
9278 +                               goto fail;
9279 +
9280 +                       dblock++;
9281 +                       extlen--;
9282 +               } else
9283 +                       bh = NULL;
9284 +
9285 +               error = copy_fn(bh, &buf, o, amount);
9286 +               if (bh)
9287 +                       brelse(bh);
9288 +               if (error)
9289 +                       goto fail;
9290 +
9291 +               copied += amount;
9292 +               lblock++;
9293 +
9294 +               o = (journaled) ? sizeof(struct gfs_meta_header) : 0;
9295 +       }
9296 +
9297 +       return copied;
9298 +
9299 + fail:
9300 +       return (copied) ? copied : error;
9301 +}
9302 +
9303 +/**
9304 + * gfs_copy_from_mem - Trivial copy function for gfs_writei()
9305 + * @ip: The file to write to
9306 + * @bh: The buffer to copy to or clear
9307 + * @buf: The buffer to copy from
9308 + * @offset: The offset in the buffer to write to
9309 + * @size: The amount of data to write
9310 + * @new: Flag indicating that remaining space in the buffer should be zeroed
9311 + *
9312 + * Returns: 0 on success, -EXXX on failure
9313 + */
9314 +
9315 +int
9316 +gfs_copy_from_mem(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9317 +                 unsigned int offset, unsigned int size, int new)
9318 +{
9319 +       char **p = (char **)buf;
9320 +       int error = 0;
9321 +
9322 +       if (bh->b_blocknr == ip->i_num.no_addr) {
9323 +               GFS_ASSERT_INODE(!new, ip,);
9324 +               gfs_trans_add_bh(ip->i_gl, bh);
9325 +               memcpy(bh->b_data + offset, *p, size);
9326 +       } else if (gfs_is_jdata(ip)) {
9327 +               gfs_trans_add_bh(ip->i_gl, bh);
9328 +               memcpy(bh->b_data + offset, *p, size);
9329 +               if (new)
9330 +                       gfs_buffer_clear_ends(bh, offset, size, TRUE);
9331 +       } else {
9332 +               memcpy(bh->b_data + offset, *p, size);
9333 +               if (new)
9334 +                       gfs_buffer_clear_ends(bh, offset, size, FALSE);
9335 +               error = gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY);
9336 +       }
9337 +
9338 +       if (!error)
9339 +               *p += size;
9340 +
9341 +       return error;
9342 +}
9343 +
9344 +/**
9345 + * gfs_copy_from_user - Copy bytes from user space for gfs_writei()
9346 + * @ip: The file to write to
9347 + * @bh: The buffer to copy to or clear
9348 + * @buf: The buffer to copy from
9349 + * @offset: The offset in the buffer to write to
9350 + * @size: The amount of data to write
9351 + * @new: Flag indicating that remaining space in the buffer should be zeroed
9352 + *
9353 + * Returns: 0 on success, -EXXX on failure
9354 + */
9355 +
9356 +int
9357 +gfs_copy_from_user(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9358 +                  unsigned int offset, unsigned int size, int new)
9359 +{
9360 +       char **p = (char **)buf;
9361 +       int error = 0;
9362 +
9363 +       if (bh->b_blocknr == ip->i_num.no_addr) {
9364 +               GFS_ASSERT_INODE(!new, ip,);
9365 +               gfs_trans_add_bh(ip->i_gl, bh);
9366 +               if (copy_from_user(bh->b_data + offset, *p, size))
9367 +                       error = -EFAULT;
9368 +       } else if (gfs_is_jdata(ip)) {
9369 +               gfs_trans_add_bh(ip->i_gl, bh);
9370 +               if (copy_from_user(bh->b_data + offset, *p, size))
9371 +                       error = -EFAULT;
9372 +               if (new) {
9373 +                       gfs_buffer_clear_ends(bh, offset, size, TRUE);
9374 +                       if (error)
9375 +                               memset(bh->b_data + offset, 0, size);
9376 +               }
9377 +       } else {
9378 +               if (copy_from_user(bh->b_data + offset, *p, size))
9379 +                       error = -EFAULT;
9380 +               if (error) {
9381 +                       if (new)
9382 +                               gfs_buffer_clear(bh);
9383 +                       gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY);
9384 +               } else {
9385 +                       if (new)
9386 +                               gfs_buffer_clear_ends(bh, offset, size, FALSE);
9387 +                       error = gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY);
9388 +               }
9389 +       }
9390 +
9391 +       if (!error)
9392 +               *p += size;
9393 +
9394 +       return error;
9395 +}
9396 +
9397 +/**
9398 + * gfs_writei - Write bytes to a file
9399 + * @ip: The GFS inode
9400 + * @buf: The buffer containing information to be written
9401 + * @offset: The file offset to start writing at
9402 + * @size: The amount of data to write
9403 + * @copy_fn: Function to do the actual copying
9404 + *
9405 + * Returns: The number of bytes correctly written or error code
9406 + */
9407 +
9408 +int
9409 +gfs_writei(struct gfs_inode *ip, void *buf,
9410 +          uint64_t offset, unsigned int size,
9411 +          write_copy_fn_t copy_fn)
9412 +{
9413 +       struct gfs_sbd *sdp = ip->i_sbd;
9414 +       struct buffer_head *dibh, *bh;
9415 +       uint64_t lblock, dblock;
9416 +       unsigned int o;
9417 +       uint32_t extlen = 0;
9418 +       unsigned int amount;
9419 +       int new;
9420 +       int journaled = gfs_is_jdata(ip);
9421 +       const uint64_t start = offset;
9422 +       int copied = 0;
9423 +       int error = 0;
9424 +
9425 +       if (!size)
9426 +               return 0;
9427 +
9428 +       if (gfs_is_stuffed(ip) &&
9429 +           ((start + size) > (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)))) {
9430 +               error = gfs_unstuff_dinode(ip, gfs_unstuffer_async, NULL);
9431 +               if (error)
9432 +                       return error;
9433 +       }
9434 +
9435 +       if (journaled) {
9436 +               lblock = offset;
9437 +               o = do_div(lblock, sdp->sd_jbsize);
9438 +       } else {
9439 +               lblock = offset >> sdp->sd_sb.sb_bsize_shift;
9440 +               o = offset & (sdp->sd_sb.sb_bsize - 1);
9441 +       }
9442 +
9443 +       if (gfs_is_stuffed(ip))
9444 +               o += sizeof(struct gfs_dinode);
9445 +       else if (journaled)
9446 +               o += sizeof(struct gfs_meta_header);
9447 +
9448 +       while (copied < size) {
9449 +               amount = size - copied;
9450 +               if (amount > sdp->sd_sb.sb_bsize - o)
9451 +                       amount = sdp->sd_sb.sb_bsize - o;
9452 +
9453 +               if (!extlen) {
9454 +                       new = TRUE;
9455 +                       error = gfs_block_map(ip, lblock, &new, &dblock, &extlen);
9456 +                       if (error)
9457 +                               goto fail;
9458 +                       GFS_ASSERT_INODE(dblock, ip,);
9459 +               }
9460 +
9461 +               if (journaled && extlen > 1)
9462 +                       gfs_start_ra(ip->i_gl, dblock, extlen);
9463 +
9464 +               error = gfs_get_data_buffer(ip, dblock,
9465 +                                           (amount == sdp->sd_sb.sb_bsize) ? TRUE : new,
9466 +                                           &bh);
9467 +               if (error)
9468 +                       goto fail;
9469 +
9470 +               error = copy_fn(ip, bh, &buf, o, amount, new);
9471 +               brelse(bh);
9472 +               if (error)
9473 +                       goto fail;
9474 +
9475 +               copied += amount;
9476 +               lblock++;
9477 +               dblock++;
9478 +               extlen--;
9479 +
9480 +               o = (journaled) ? sizeof(struct gfs_meta_header) : 0;
9481 +       }
9482 +
9483 + out:
9484 +       error = gfs_get_inode_buffer(ip, &dibh);
9485 +       if (error)
9486 +               return error;
9487 +
9488 +       if (ip->i_di.di_size < start + copied)
9489 +               ip->i_di.di_size = start + copied;
9490 +       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
9491 +
9492 +       gfs_trans_add_bh(ip->i_gl, dibh);
9493 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
9494 +       brelse(dibh);
9495 +
9496 +       return copied;
9497 +
9498 + fail:
9499 +       if (copied)
9500 +               goto out;
9501 +       return error;
9502 +}
9503 diff -urN linux-orig/fs/gfs/file.h linux-patched/fs/gfs/file.h
9504 --- linux-orig/fs/gfs/file.h    1969-12-31 18:00:00.000000000 -0600
9505 +++ linux-patched/fs/gfs/file.h 2004-06-30 13:27:49.339712058 -0500
9506 @@ -0,0 +1,51 @@
9507 +/******************************************************************************
9508 +*******************************************************************************
9509 +**
9510 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9511 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9512 +**
9513 +**  This copyrighted material is made available to anyone wishing to use,
9514 +**  modify, copy, or redistribute it subject to the terms and conditions
9515 +**  of the GNU General Public License v.2.
9516 +**
9517 +*******************************************************************************
9518 +******************************************************************************/
9519 +
9520 +#ifndef __FILE_DOT_H__
9521 +#define __FILE_DOT_H__
9522 +
9523 +typedef int (*read_copy_fn_t) (struct buffer_head * bh, void **buf,
9524 +                              unsigned int offset, unsigned int size);
9525 +typedef int (*write_copy_fn_t) (struct gfs_inode * ip, struct buffer_head * bh,
9526 +                               void **buf, unsigned int offset,
9527 +                               unsigned int size, int new);
9528 +
9529 +int gfs_copy2mem(struct buffer_head *bh, void **buf,
9530 +                unsigned int offset, unsigned int size);
9531 +int gfs_copy2user(struct buffer_head *bh, void **buf,
9532 +                 unsigned int offset, unsigned int size);
9533 +int gfs_readi(struct gfs_inode *ip, void *buf, uint64_t offset,
9534 +             unsigned int size, read_copy_fn_t copy_fn);
9535 +
9536 +int gfs_copy_from_mem(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9537 +                     unsigned int offset, unsigned int size, int new);
9538 +int gfs_copy_from_user(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9539 +                      unsigned int offset, unsigned int size, int new);
9540 +int gfs_writei(struct gfs_inode *ip, void *buf, uint64_t offset,
9541 +              unsigned int size, write_copy_fn_t copy_fn);
9542 +
9543 +static __inline__ int
9544 +gfs_internal_read(struct gfs_inode *ip, char *buf, uint64_t offset,
9545 +                 unsigned int size)
9546 +{
9547 +       return gfs_readi(ip, buf, offset, size, gfs_copy2mem);
9548 +}
9549 +
9550 +static __inline__ int
9551 +gfs_internal_write(struct gfs_inode *ip, char *buf, uint64_t offset,
9552 +                  unsigned int size)
9553 +{
9554 +       return gfs_writei(ip, buf, offset, size, gfs_copy_from_mem);
9555 +}
9556 +
9557 +#endif /* __FILE_DOT_H__ */
9558 diff -urN linux-orig/fs/gfs/fixed_div64.h linux-patched/fs/gfs/fixed_div64.h
9559 --- linux-orig/fs/gfs/fixed_div64.h     1969-12-31 18:00:00.000000000 -0600
9560 +++ linux-patched/fs/gfs/fixed_div64.h  2004-06-30 13:27:49.339712058 -0500
9561 @@ -0,0 +1,142 @@
9562 +/*
9563 + * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
9564 + *
9565 + * This program is free software; you can redistribute it and/or modify it
9566 + * under the terms of version 2 of the GNU General Public License as
9567 + * published by the Free Software Foundation.
9568 + *
9569 + * This program is distributed in the hope that it would be useful, but
9570 + * WITHOUT ANY WARRANTY; without even the implied warranty of
9571 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9572 + *
9573 + * Further, this software is distributed without any warranty that it is
9574 + * free of the rightful claim of any third person regarding infringement
9575 + * or the like.  Any license provided herein, whether implied or
9576 + * otherwise, applies only to this software file.  Patent licenses, if
9577 + * any, provided herein do not apply to combinations of this program with
9578 + * other software, or any other product whatsoever.
9579 + *
9580 + * You should have received a copy of the GNU General Public License along
9581 + * with this program; if not, write the Free Software Foundation, Inc., 59
9582 + * Temple Place - Suite 330, Boston MA 02111-1307, USA.
9583 + *
9584 + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
9585 + * Mountain View, CA  94043, or:
9586 + *
9587 + * http://www.sgi.com
9588 + *
9589 + * For further information regarding this notice, see:
9590 + *
9591 + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
9592 + *
9593 + * Additional munging:
9594 + * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9595 + */
9596 +
9597 +#ifndef __FIXED_DIV64_DOT_H__
9598 +#define __FIXED_DIV64_DOT_H__
9599 +
9600 +#include <asm/div64.h>
9601 +
9602 +#if defined __i386__
9603 +/* For ia32 we need to pull some tricks to get past various versions
9604 + * of the compiler which do not like us using do_div in the middle
9605 + * of large functions.
9606 + */
9607 +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n)
9608 +{
9609 +       __u32   mod;
9610 +
9611 +       switch (n) {
9612 +               case 4:
9613 +                       mod = *(__u32 *)a % b;
9614 +                       *(__u32 *)a = *(__u32 *)a / b;
9615 +                       return mod;
9616 +               case 8:
9617 +                       {
9618 +                       unsigned long __upper, __low, __high, __mod;
9619 +                       __u64   c = *(__u64 *)a;
9620 +                       __upper = __high = c >> 32;
9621 +                       __low = c;
9622 +                       if (__high) {
9623 +                               __upper = __high % (b);
9624 +                               __high = __high / (b);
9625 +                       }
9626 +                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
9627 +                       asm("":"=A" (c):"a" (__low),"d" (__high));
9628 +                       *(__u64 *)a = c;
9629 +                       return __mod;
9630 +                       }
9631 +       }
9632 +
9633 +       /* NOTREACHED */
9634 +       return 0;
9635 +}
9636 +
9637 +/* Side effect free 64 bit mod operation */
9638 +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n)
9639 +{
9640 +       switch (n) {
9641 +               case 4:
9642 +                       return *(__u32 *)a % b;
9643 +               case 8:
9644 +                       {
9645 +                       unsigned long __upper, __low, __high, __mod;
9646 +                       __u64   c = *(__u64 *)a;
9647 +                       __upper = __high = c >> 32;
9648 +                       __low = c;
9649 +                       if (__high) {
9650 +                               __upper = __high % (b);
9651 +                               __high = __high / (b);
9652 +                       }
9653 +                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
9654 +                       asm("":"=A" (c):"a" (__low),"d" (__high));
9655 +                       return __mod;
9656 +                       }
9657 +       }
9658 +
9659 +       /* NOTREACHED */
9660 +       return 0;
9661 +}
9662 +#else
9663 +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n)
9664 +{
9665 +       __u32   mod;
9666 +
9667 +       switch (n) {
9668 +               case 4:
9669 +                       mod = *(__u32 *)a % b;
9670 +                       *(__u32 *)a = *(__u32 *)a / b;
9671 +                       return mod;
9672 +               case 8:
9673 +                       mod = do_div(*(__u64 *)a, b);
9674 +                       return mod;
9675 +       }
9676 +
9677 +       /* NOTREACHED */
9678 +       return 0;
9679 +}
9680 +
9681 +/* Side effect free 64 bit mod operation */
9682 +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n)
9683 +{
9684 +       switch (n) {
9685 +               case 4:
9686 +                       return *(__u32 *)a % b;
9687 +               case 8:
9688 +                       {
9689 +                       __u64   c = *(__u64 *)a;
9690 +                       return do_div(c, b);
9691 +                       }
9692 +       }
9693 +
9694 +       /* NOTREACHED */
9695 +       return 0;
9696 +}
9697 +#endif
9698 +
9699 +#undef do_div
9700 +#define do_div(a, b)   fixed_div64_do_div(&(a), (b), sizeof(a))
9701 +#define do_mod(a, b)   fixed_div64_do_mod(&(a), (b), sizeof(a))
9702 +
9703 +#endif /* __FIXED_DIV64_DOT_H__ */
9704 diff -urN linux-orig/fs/gfs/flock.c linux-patched/fs/gfs/flock.c
9705 --- linux-orig/fs/gfs/flock.c   1969-12-31 18:00:00.000000000 -0600
9706 +++ linux-patched/fs/gfs/flock.c        2004-06-30 13:27:49.339712058 -0500
9707 @@ -0,0 +1,98 @@
9708 +/******************************************************************************
9709 +*******************************************************************************
9710 +**
9711 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9712 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9713 +**
9714 +**  This copyrighted material is made available to anyone wishing to use,
9715 +**  modify, copy, or redistribute it subject to the terms and conditions
9716 +**  of the GNU General Public License v.2.
9717 +**
9718 +*******************************************************************************
9719 +******************************************************************************/
9720 +
9721 +#include <linux/sched.h>
9722 +#include <linux/slab.h>
9723 +#include <linux/smp_lock.h>
9724 +#include <linux/spinlock.h>
9725 +#include <asm/semaphore.h>
9726 +#include <linux/completion.h>
9727 +#include <linux/buffer_head.h>
9728 +
9729 +#include "gfs.h"
9730 +#include "flock.h"
9731 +#include "glock.h"
9732 +#include "glops.h"
9733 +
9734 +/**
9735 + * gfs_flock - Acquire a flock on a file
9736 + * @fp: the file
9737 + * @ex: exclusive lock
9738 + * @wait: wait for lock
9739 + *
9740 + * Returns: 0 on success, -EXXX on failure
9741 + */
9742 +
9743 +int
9744 +gfs_flock(struct gfs_file *fp, int ex, int wait)
9745 +{
9746 +       struct gfs_holder *fl_gh = &fp->f_fl_gh;
9747 +       struct gfs_inode *ip = fp->f_inode;
9748 +       struct gfs_sbd *sdp = ip->i_sbd;
9749 +       struct gfs_glock *gl;
9750 +       int error = 0;
9751 +
9752 +       down(&fp->f_fl_lock);
9753 +
9754 +       if (fl_gh->gh_gl) {
9755 +               gfs_glock_dq_uninit(fl_gh);
9756 +               error = -EDEADLK;
9757 +               goto out;
9758 +       }
9759 +
9760 +       error = gfs_glock_get(sdp,
9761 +                             ip->i_num.no_formal_ino, &gfs_flock_glops,
9762 +                             CREATE, &gl);
9763 +       if (error)
9764 +               goto out;
9765 +
9766 +       gfs_holder_init(gl, (ex) ? LM_ST_EXCLUSIVE : LM_ST_SHARED,
9767 +                       ((wait) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE,
9768 +                       fl_gh);
9769 +       fl_gh->gh_owner = NULL;
9770 +
9771 +       gfs_glock_put(gl);
9772 +
9773 +       error = gfs_glock_nq(fl_gh);
9774 +       if (error) {
9775 +               gfs_holder_uninit(fl_gh);
9776 +               if (error == GLR_TRYFAILED) {
9777 +                       GFS_ASSERT_INODE(!wait, ip,);
9778 +                       error = -EAGAIN;
9779 +               }
9780 +       }
9781 +
9782 + out:
9783 +       up(&fp->f_fl_lock);
9784 +
9785 +       return error;
9786 +}
9787 +
9788 +/**
9789 + * gfs_funlock - Release a flock on a file
9790 + * @fp: the file
9791 + *
9792 + */
9793 +
9794 +int
9795 +gfs_funlock(struct gfs_file *fp)
9796 +{
9797 +       struct gfs_holder *fl_gh = &fp->f_fl_gh;
9798 +
9799 +       down(&fp->f_fl_lock);
9800 +       if (fl_gh->gh_gl)
9801 +               gfs_glock_dq_uninit(fl_gh);
9802 +       up(&fp->f_fl_lock);
9803 +
9804 +       return 0;
9805 +}
9806 diff -urN linux-orig/fs/gfs/flock.h linux-patched/fs/gfs/flock.h
9807 --- linux-orig/fs/gfs/flock.h   1969-12-31 18:00:00.000000000 -0600
9808 +++ linux-patched/fs/gfs/flock.h        2004-06-30 13:27:49.339712058 -0500
9809 @@ -0,0 +1,20 @@
9810 +/******************************************************************************
9811 +*******************************************************************************
9812 +**
9813 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9814 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9815 +**
9816 +**  This copyrighted material is made available to anyone wishing to use,
9817 +**  modify, copy, or redistribute it subject to the terms and conditions
9818 +**  of the GNU General Public License v.2.
9819 +**
9820 +*******************************************************************************
9821 +******************************************************************************/
9822 +
9823 +#ifndef __FLOCK_DOT_H__
9824 +#define __FLOCK_DOT_H__
9825 +
9826 +int gfs_flock(struct gfs_file *fp, int ex, int wait);
9827 +int gfs_funlock(struct gfs_file *fp);
9828 +
9829 +#endif /* __FLOCK_DOT_H__ */
9830 diff -urN linux-orig/fs/gfs/format.h linux-patched/fs/gfs/format.h
9831 --- linux-orig/fs/gfs/format.h  1969-12-31 18:00:00.000000000 -0600
9832 +++ linux-patched/fs/gfs/format.h       2004-06-30 13:27:49.340711826 -0500
9833 @@ -0,0 +1,30 @@
9834 +/******************************************************************************
9835 +*******************************************************************************
9836 +**
9837 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9838 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9839 +**
9840 +**  This copyrighted material is made available to anyone wishing to use,
9841 +**  modify, copy, or redistribute it subject to the terms and conditions
9842 +**  of the GNU General Public License v.2.
9843 +**
9844 +*******************************************************************************
9845 +******************************************************************************/
9846 +
9847 +#ifndef __FORMAT_DOT_H__
9848 +#define __FORMAT_DOT_H__
9849 +
9850 +static const uint32_t gfs_old_fs_formats[] = {
9851 +       1308,
9852 +       1307,
9853 +       1306,
9854 +       1305,
9855 +       0
9856 +};
9857 +
9858 +static const uint32_t gfs_old_multihost_formats[] = {
9859 +       1400,
9860 +       0
9861 +};
9862 +
9863 +#endif /* __FORMAT_DOT_H__ */
9864 diff -urN linux-orig/fs/gfs/gfs.h linux-patched/fs/gfs/gfs.h
9865 --- linux-orig/fs/gfs/gfs.h     1969-12-31 18:00:00.000000000 -0600
9866 +++ linux-patched/fs/gfs/gfs.h  2004-06-30 13:27:49.340711826 -0500
9867 @@ -0,0 +1,130 @@
9868 +/******************************************************************************
9869 +*******************************************************************************
9870 +**
9871 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
9872 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
9873 +**
9874 +**  This copyrighted material is made available to anyone wishing to use,
9875 +**  modify, copy, or redistribute it subject to the terms and conditions
9876 +**  of the GNU General Public License v.2.
9877 +**
9878 +*******************************************************************************
9879 +******************************************************************************/
9880 +
9881 +#ifndef __GFS_DOT_H__
9882 +#define __GFS_DOT_H__
9883 +
9884 +#define GFS_RELEASE_NAME "<CVS>"
9885 +
9886 +#include <linux/lm_interface.h>
9887 +#include <linux/gfs_ondisk.h>
9888 +#include <linux/gfs_ioctl.h>
9889 +
9890 +#include "fixed_div64.h"
9891 +#include "lvb.h"
9892 +#include "incore.h"
9893 +#include "util.h"
9894 +
9895 +#ifndef TRUE
9896 +#define TRUE (1)
9897 +#endif
9898 +
9899 +#ifndef FALSE
9900 +#define FALSE (0)
9901 +#endif
9902 +
9903 +#define NO_CREATE (0)
9904 +#define CREATE (1)
9905 +
9906 +#if (BITS_PER_LONG == 64)
9907 +#define PRIu64 "lu"
9908 +#define PRId64 "ld"
9909 +#define PRIo64 "lo"
9910 +#define PRIx64 "lx"
9911 +#define PRIX64 "lX"
9912 +#define SCNu64 "lu"
9913 +#define SCNd64 "ld"
9914 +#define SCNo64 "lo"
9915 +#define SCNx64 "lx"
9916 +#define SCNX64 "lX"
9917 +#else
9918 +#define PRIu64 "Lu"
9919 +#define PRId64 "Ld"
9920 +#define PRIo64 "Lo"
9921 +#define PRIx64 "Lx"
9922 +#define PRIX64 "LX"
9923 +#define SCNu64 "Lu"
9924 +#define SCNd64 "Ld"
9925 +#define SCNo64 "Lo"
9926 +#define SCNx64 "Lx"
9927 +#define SCNX64 "LX"
9928 +#endif
9929 +
9930 +/*  Divide x by y.  Round up if there is a remainder.  */
9931 +#define DIV_RU(x, y) (((x) + (y) - 1) / (y))
9932 +
9933 +#define GFS_FAST_NAME_SIZE (8)
9934 +
9935 +#define vfs2sdp(sb) ((struct gfs_sbd *)(sb)->s_fs_info)
9936 +#define vn2ip(inode) ((struct gfs_inode *)(inode)->u.generic_ip)
9937 +#define vf2fp(file) ((struct gfs_file *)(file)->private_data)
9938 +#define bh2bd(bh) ((struct gfs_bufdata *)(bh)->b_private)
9939 +#define current_transaction ((struct gfs_trans *)(current->journal_info))
9940 +
9941 +#define gl2ip(gl) ((struct gfs_inode *)(gl)->gl_object)
9942 +#define gl2rgd(gl) ((struct gfs_rgrpd *)(gl)->gl_object)
9943 +#define gl2gl(gl) ((struct gfs_glock *)(gl)->gl_object)
9944 +
9945 +#define gfs_meta_check(sdp, bh) \
9946 +do \
9947 +{ \
9948 +  uint32_t meta_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \
9949 +  meta_check_magic = gfs32_to_cpu(meta_check_magic); \
9950 +  GFS_ASSERT_SBD(meta_check_magic == GFS_MAGIC, (sdp), \
9951 +                struct gfs_meta_header meta_check_mh; \
9952 +                printk("Bad metadata at %"PRIu64"\n", (uint64_t)(bh)->b_blocknr); \
9953 +                gfs_meta_header_in(&meta_check_mh, (bh)->b_data); \
9954 +                gfs_meta_header_print(&meta_check_mh);); \
9955 +} \
9956 +while (0)
9957 +
9958 +#define gfs_metatype_check(sdp, bh, type) \
9959 +do \
9960 +{ \
9961 +  uint32_t metatype_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \
9962 +  uint32_t metatype_check_type = ((struct gfs_meta_header *)(bh)->b_data)->mh_type; \
9963 +  metatype_check_magic = gfs32_to_cpu(metatype_check_magic); \
9964 +  metatype_check_type = gfs32_to_cpu(metatype_check_type); \
9965 +  GFS_ASSERT_SBD(metatype_check_magic == GFS_MAGIC && \
9966 +                metatype_check_type == (type), (sdp), \
9967 +                struct gfs_meta_header metatype_check_mh; \
9968 +                printk("Bad metadata at %"PRIu64", should be %u\n", (uint64_t)(bh)->b_blocknr, (type)); \
9969 +                gfs_meta_header_in(&metatype_check_mh, (bh)->b_data); \
9970 +                gfs_meta_header_print(&metatype_check_mh);); \
9971 +} \
9972 +while (0)
9973 +
9974 +#define gfs_metatype_set(sdp, bh, type, format) \
9975 +do \
9976 +{ \
9977 +  gfs_meta_check((sdp), (bh)); \
9978 +  ((struct gfs_meta_header *)(bh)->b_data)->mh_type = cpu_to_gfs32((type)); \
9979 +  ((struct gfs_meta_header *)(bh)->b_data)->mh_format = cpu_to_gfs32((format)); \
9980 +} \
9981 +while (0)
9982 +
9983 +#define gfs_sprintf(fmt, args...) \
9984 +do { \
9985 +  if (buf) { \
9986 +    if (*count + 256 > size) { \
9987 +      error = -ENOMEM; \
9988 +      goto out; \
9989 +    } \
9990 +    *count += snprintf(buf + *count, 256, fmt, ##args); \
9991 +  } \
9992 +  else \
9993 +    printk(fmt, ##args); \
9994 +} \
9995 +while (0)
9996 +
9997 +#endif /* __GFS_DOT_H__ */
9998 diff -urN linux-orig/fs/gfs/glock.c linux-patched/fs/gfs/glock.c
9999 --- linux-orig/fs/gfs/glock.c   1969-12-31 18:00:00.000000000 -0600
10000 +++ linux-patched/fs/gfs/glock.c        2004-06-30 13:27:49.341711594 -0500
10001 @@ -0,0 +1,2524 @@
10002 +/******************************************************************************
10003 +*******************************************************************************
10004 +**
10005 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
10006 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
10007 +**
10008 +**  This copyrighted material is made available to anyone wishing to use,
10009 +**  modify, copy, or redistribute it subject to the terms and conditions
10010 +**  of the GNU General Public License v.2.
10011 +**
10012 +*******************************************************************************
10013 +******************************************************************************/
10014 +
10015 +#include <linux/sched.h>
10016 +#include <linux/slab.h>
10017 +#include <linux/smp_lock.h>
10018 +#include <linux/spinlock.h>
10019 +#include <asm/semaphore.h>
10020 +#include <linux/completion.h>
10021 +#include <linux/buffer_head.h>
10022 +#include <asm/uaccess.h>
10023 +
10024 +#include "gfs.h"
10025 +#include "dio.h"
10026 +#include "glock.h"
10027 +#include "glops.h"
10028 +#include "inode.h"
10029 +#include "lops.h"
10030 +#include "quota.h"
10031 +#include "recovery.h"
10032 +
10033 +/*  Must be kept in sync with the beginning of struct gfs_glock  */
10034 +struct glock_plug {
10035 +       struct list_head gl_list;
10036 +       unsigned long gl_flags;
10037 +};
10038 +
10039 +typedef void (*glock_examiner) (struct gfs_glock * gl);
10040 +
10041 +/**
10042 + * relaxed_state_ok - is a requested lock compatible with the current lock mode?
10043 + * @actual: the current state of the lock
10044 + * @requested: the lock state that was requested by the caller
10045 + * @flags: the modifier flags passed in by the caller
10046 + *
10047 + * Returns: TRUE if the locks are compatible, FALSE otherwise
10048 + */
10049 +
10050 +static __inline__ int
10051 +relaxed_state_ok(unsigned int actual, unsigned requested, int flags)
10052 +{
10053 +       if (actual == requested)
10054 +               return TRUE;
10055 +
10056 +       if (flags & GL_EXACT)
10057 +               return FALSE;
10058 +
10059 +       if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
10060 +               return TRUE;
10061 +
10062 +       if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
10063 +               return TRUE;
10064 +
10065 +       return FALSE;
10066 +}
10067 +
10068 +/**
10069 + * gl_hash() - Turn glock number into hash bucket number
10070 + * @lock: The glock number
10071 + *
10072 + * Returns: The number of the corresponding hash bucket
10073 + */
10074 +
10075 +static unsigned int
10076 +gl_hash(struct lm_lockname *name)
10077 +{
10078 +       unsigned int h;
10079 +
10080 +       h = gfs_hash(&name->ln_number, sizeof(uint64_t));
10081 +       h = gfs_hash_more(&name->ln_type, sizeof(unsigned int), h);
10082 +       h &= GFS_GL_HASH_MASK;
10083 +
10084 +       return h;
10085 +}
10086 +
10087 +/**
10088 + * glock_hold() - increment reference count on glock
10089 + * @gl: The glock to put
10090 + *
10091 + */
10092 +
10093 +static __inline__ void
10094 +glock_hold(struct gfs_glock *gl)
10095 +{
10096 +       atomic_inc(&gl->gl_count);
10097 +}
10098 +
10099 +/**
10100 + * glock_put() - Decrement reference count on glock
10101 + * @gl: The glock to put
10102 + *
10103 + */
10104 +
10105 +static __inline__ void
10106 +glock_put(struct gfs_glock *gl)
10107 +{
10108 +       if (atomic_read(&gl->gl_count) == 1)
10109 +               gfs_glock_schedule_for_reclaim(gl);
10110 +       atomic_dec(&gl->gl_count);
10111 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) >= 0, gl,);
10112 +}
10113 +
10114 +/**
10115 + * queue_empty - check to see if a glock's queue is empty
10116 + * @gl: the glock
10117 + * @head: the head of the queue to check
10118 + *
10119 + * Returns: TRUE if the queue is empty
10120 + */
10121 +
10122 +static __inline__ int
10123 +queue_empty(struct gfs_glock *gl, struct list_head *head)
10124 +{
10125 +       int empty;
10126 +       spin_lock(&gl->gl_spin);
10127 +       empty = list_empty(head);
10128 +       spin_unlock(&gl->gl_spin);
10129 +       return empty;
10130 +}
10131 +
10132 +/**
10133 + * search_bucket() - Find struct gfs_glock by lock number
10134 + * @bucket: the bucket to search
10135 + * @name: The lock name
10136 + *
10137 + * Returns: NULL, or the struct gfs_glock with the requested number
10138 + */
10139 +
10140 +static struct gfs_glock *
10141 +search_bucket(struct gfs_gl_hash_bucket *bucket, struct lm_lockname *name)
10142 +{
10143 +       struct list_head *tmp, *head;
10144 +       struct gfs_glock *gl;
10145 +
10146 +       for (head = &bucket->hb_list, tmp = head->next;
10147 +            tmp != head;
10148 +            tmp = tmp->next) {
10149 +               gl = list_entry(tmp, struct gfs_glock, gl_list);
10150 +
10151 +               if (test_bit(GLF_PLUG, &gl->gl_flags))
10152 +                       continue;
10153 +               if (!lm_name_equal(&gl->gl_name, name))
10154 +                       continue;
10155 +
10156 +               glock_hold(gl);
10157 +
10158 +               return gl;
10159 +       }
10160 +
10161 +       return NULL;
10162 +}
10163 +
10164 +/**
10165 + * gfs_glock_find() - Find glock by lock number
10166 + * @sdp: The GFS superblock
10167 + * @name: The lock name
10168 + *
10169 + * Figure out what bucket the lock is in, acquire the read lock on
10170 + * it and call search_bucket().
10171 + *
10172 + * Returns: NULL, or the struct gfs_glock with the requested number
10173 + */
10174 +
10175 +struct gfs_glock *
10176 +gfs_glock_find(struct gfs_sbd *sdp, struct lm_lockname *name)
10177 +{
10178 +       struct gfs_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
10179 +       struct gfs_glock *gl;
10180 +
10181 +       read_lock(&bucket->hb_lock);
10182 +       gl = search_bucket(bucket, name);
10183 +       read_unlock(&bucket->hb_lock);
10184 +
10185 +       return gl;
10186 +}
10187 +
10188 +/**
10189 + * glock_free() - Perform a few checks and then release struct gfs_glock
10190 + * @gl: The glock to release
10191 + *
10192 + */
10193 +
10194 +static void
10195 +glock_free(struct gfs_glock *gl)
10196 +{
10197 +       struct gfs_sbd *sdp = gl->gl_sbd;
10198 +       struct inode *aspace = gl->gl_aspace;
10199 +
10200 +       GFS_ASSERT_GLOCK(list_empty(&gl->gl_list), gl,);
10201 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) == 1, gl,);
10202 +       GFS_ASSERT_GLOCK(list_empty(&gl->gl_holders), gl,);
10203 +       GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters1), gl,);
10204 +       GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters2), gl,);
10205 +       GFS_ASSERT_GLOCK(gl->gl_state == LM_ST_UNLOCKED, gl,);
10206 +       GFS_ASSERT_GLOCK(!gl->gl_object, gl,);
10207 +       GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,);
10208 +       GFS_ASSERT_GLOCK(list_empty(&gl->gl_reclaim), gl,);
10209 +
10210 +       sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
10211 +
10212 +       if (aspace)
10213 +               gfs_aspace_put(aspace);
10214 +
10215 +       kmem_cache_free(gfs_glock_cachep, gl);
10216 +
10217 +       atomic_dec(&sdp->sd_glock_count);
10218 +}
10219 +
10220 +/**
10221 + * gfs_glock_get() - Get a glock, or create one if one doesn't exist
10222 + * @sdp: The GFS superblock
10223 + * @number: the lock number
10224 + * @glops: The glock_operations to use
10225 + * @create: If FALSE, don't create the glock if it doesn't exist
10226 + * @glp: the glock is returned here
10227 + *
10228 + * Returns: 0 on success, -EXXX on failure
10229 + */
10230 +
10231 +int
10232 +gfs_glock_get(struct gfs_sbd *sdp,
10233 +             uint64_t number, struct gfs_glock_operations *glops,
10234 +             int create, struct gfs_glock **glp)
10235 +{
10236 +       struct lm_lockname name;
10237 +       struct gfs_glock *gl, *tmp;
10238 +       struct gfs_gl_hash_bucket *bucket;
10239 +       int error;
10240 +
10241 +       name.ln_number = number;
10242 +       name.ln_type = glops->go_type;
10243 +       bucket = &sdp->sd_gl_hash[gl_hash(&name)];
10244 +
10245 +       read_lock(&bucket->hb_lock);
10246 +       gl = search_bucket(bucket, &name);
10247 +       read_unlock(&bucket->hb_lock);
10248 +
10249 +       if (gl || !create) {
10250 +               *glp = gl;
10251 +               return 0;
10252 +       }
10253 +
10254 +       gl = kmem_cache_alloc(gfs_glock_cachep, GFP_KERNEL);
10255 +       if (!gl)
10256 +               return -ENOMEM;
10257 +
10258 +       memset(gl, 0, sizeof(struct gfs_glock));
10259 +
10260 +       INIT_LIST_HEAD(&gl->gl_list);
10261 +       gl->gl_name = name;
10262 +       atomic_set(&gl->gl_count, 1);
10263 +
10264 +       spin_lock_init(&gl->gl_spin);
10265 +
10266 +       gl->gl_state = LM_ST_UNLOCKED;
10267 +       INIT_LIST_HEAD(&gl->gl_holders);
10268 +       INIT_LIST_HEAD(&gl->gl_waiters1);
10269 +       INIT_LIST_HEAD(&gl->gl_waiters2);
10270 +
10271 +       gl->gl_ops = glops;
10272 +
10273 +       INIT_LE(&gl->gl_new_le, &gfs_glock_lops);
10274 +       INIT_LE(&gl->gl_incore_le, &gfs_glock_lops);
10275 +
10276 +       gl->gl_bucket = bucket;
10277 +       INIT_LIST_HEAD(&gl->gl_reclaim);
10278 +
10279 +       gl->gl_sbd = sdp;
10280 +
10281 +       INIT_LIST_HEAD(&gl->gl_dirty_buffers);
10282 +       INIT_LIST_HEAD(&gl->gl_ail_bufs);
10283 +
10284 +       if (glops == &gfs_inode_glops ||
10285 +           glops == &gfs_rgrp_glops ||
10286 +           glops == &gfs_meta_glops) {
10287 +               gl->gl_aspace = gfs_aspace_get(sdp);
10288 +               if (!gl->gl_aspace) {
10289 +                       error = -ENOMEM;
10290 +                       goto fail;
10291 +               }
10292 +       }
10293 +
10294 +       error = sdp->sd_lockstruct.ls_ops->lm_get_lock(sdp->sd_lockstruct.ls_lockspace,
10295 +                                                      &name,
10296 +                                                      &gl->gl_lock);
10297 +       if (error)
10298 +               goto fail_aspace;
10299 +
10300 +       atomic_inc(&sdp->sd_glock_count);
10301 +
10302 +       write_lock(&bucket->hb_lock);
10303 +       tmp = search_bucket(bucket, &name);
10304 +       if (tmp) {
10305 +               write_unlock(&bucket->hb_lock);
10306 +               glock_free(gl);
10307 +               gl = tmp;
10308 +       } else {
10309 +               list_add_tail(&gl->gl_list, &bucket->hb_list);
10310 +               write_unlock(&bucket->hb_lock);
10311 +       }
10312 +
10313 +       *glp = gl;
10314 +
10315 +       return 0;
10316 +
10317 + fail_aspace:
10318 +       if (gl->gl_aspace)
10319 +               gfs_aspace_put(gl->gl_aspace);
10320 +
10321 + fail:
10322 +       kmem_cache_free(gfs_glock_cachep, gl);
10323 +
10324 +       return error;
10325 +}
10326 +
10327 +/**
10328 + * gfs_glock_hold() - As glock_hold(), but suitable for exporting
10329 + * @gl: The glock to hold
10330 + *
10331 + */
10332 +
10333 +void
10334 +gfs_glock_hold(struct gfs_glock *gl)
10335 +{
10336 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
10337 +       glock_hold(gl);
10338 +}
10339 +
10340 +/**
10341 + * gfs_glock_put() - As glock_put(), but suitable for exporting
10342 + * @gl: The glock to put
10343 + *
10344 + */
10345 +
10346 +void
10347 +gfs_glock_put(struct gfs_glock *gl)
10348 +{
10349 +       glock_put(gl);
10350 +}
10351 +
10352 +/**
10353 + * gfs_holder_init - initialize a struct gfs_holder in the default way
10354 + * @gl: the glock
10355 + * @state: the state we're requesting
10356 + * @flags: the modifier flags
10357 + * @gh: the holder structure
10358 + *
10359 + */
10360 +
10361 +void
10362 +gfs_holder_init(struct gfs_glock *gl, unsigned int state, int flags,
10363 +               struct gfs_holder *gh)
10364 +{
10365 +       memset(gh, 0, sizeof(struct gfs_holder));
10366 +
10367 +       INIT_LIST_HEAD(&gh->gh_list);
10368 +       gh->gh_gl = gl;
10369 +       gh->gh_owner = current;
10370 +       gh->gh_state = state;
10371 +       gh->gh_flags = flags;
10372 +
10373 +       if (gh->gh_state == LM_ST_EXCLUSIVE)
10374 +               gh->gh_flags |= GL_LOCAL_EXCL;
10375 +
10376 +       init_completion(&gh->gh_wait);
10377 +
10378 +       glock_hold(gl);
10379 +}
10380 +
10381 +/**
10382 + * gfs_holder_reinit - reinitialize a struct gfs_holder so we can requeue it
10383 + * @state: the state we're requesting
10384 + * @flags: the modifier flags
10385 + * @gh: the holder structure
10386 + *
10387 + * Don't mess with the glock.
10388 + *
10389 + */
10390 +
10391 +void
10392 +gfs_holder_reinit(unsigned int state, int flags, struct gfs_holder *gh)
10393 +{
10394 +       int alloced;
10395 +
10396 +       GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gh->gh_gl,);
10397 +
10398 +       gh->gh_state = state;
10399 +       gh->gh_flags = flags;
10400 +
10401 +       if (gh->gh_state == LM_ST_EXCLUSIVE)
10402 +               gh->gh_flags |= GL_LOCAL_EXCL;
10403 +
10404 +       alloced = test_bit(HIF_ALLOCED, &gh->gh_iflags);
10405 +       memset(&gh->gh_iflags, 0, sizeof(unsigned long));
10406 +       if (alloced)
10407 +               set_bit(HIF_ALLOCED, &gh->gh_iflags);
10408 +}
10409 +
10410 +/**
10411 + * gfs_holder_uninit - uninitialize a holder structure (drop reference on glock)
10412 + * @gh: the holder structure
10413 + *
10414 + */
10415 +
10416 +void
10417 +gfs_holder_uninit(struct gfs_holder *gh)
10418 +{
10419 +       struct gfs_glock *gl = gh->gh_gl;
10420 +
10421 +       GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gl,);
10422 +       gh->gh_gl = NULL;
10423 +
10424 +       glock_put(gl);
10425 +}
10426 +
10427 +/**
10428 + * gfs_holder_get - get a struct gfs_holder structure
10429 + * @gl: the glock
10430 + * @state: the state we're requesting
10431 + * @flags: the modifier flags
10432 + *
10433 + * Figure out how big an impact this function has.  Either:
10434 + * 1) Replace it with a cache of structures hanging off the struct gfs_sbd
10435 + * 2) Get rid of it and call gmalloc() directly
10436 + * 3) Leave it like it is
10437 + *
10438 + * Returns: the holder structure
10439 + */
10440 +
10441 +struct gfs_holder *
10442 +gfs_holder_get(struct gfs_glock *gl, unsigned int state, int flags)
10443 +{
10444 +       struct gfs_holder *gh;
10445 +
10446 +       gh = gmalloc(sizeof(struct gfs_holder));
10447 +       gfs_holder_init(gl, state, flags, gh);
10448 +       set_bit(HIF_ALLOCED, &gh->gh_iflags);
10449 +
10450 +       return gh;
10451 +}
10452 +
10453 +/**
10454 + * gfs_holder_put - get rid of a struct gfs_holder structure
10455 + * @gh: the holder structure
10456 + *
10457 + */
10458 +
10459 +void
10460 +gfs_holder_put(struct gfs_holder *gh)
10461 +{
10462 +       GFS_ASSERT_GLOCK(test_bit(HIF_ALLOCED, &gh->gh_iflags), gh->gh_gl,);
10463 +       gfs_holder_uninit(gh);
10464 +       kfree(gh);
10465 +}
10466 +
10467 +/**
10468 + * handle_recurse - put other holder structures (marked recursive) into the holders list
10469 + * @gh: the holder structure
10470 + *
10471 + */
10472 +
10473 +static void
10474 +handle_recurse(struct gfs_holder *gh)
10475 +{
10476 +       struct gfs_glock *gl = gh->gh_gl;
10477 +       struct list_head *tmp, *head, *next;
10478 +       struct gfs_holder *tmp_gh;
10479 +       int found = FALSE;
10480 +
10481 +       GFS_ASSERT_GLOCK(gh->gh_owner, gl,);
10482 +
10483 +       for (head = &gl->gl_waiters2, tmp = head->next, next = tmp->next;
10484 +            tmp != head;
10485 +            tmp = next, next = tmp->next) {
10486 +               tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
10487 +               if (tmp_gh->gh_owner != gh->gh_owner)
10488 +                       continue;
10489 +
10490 +               GFS_ASSERT_GLOCK(test_bit(HIF_RECURSE, &tmp_gh->gh_iflags),
10491 +                                gl,);
10492 +
10493 +               list_move_tail(&tmp_gh->gh_list, &gl->gl_holders);
10494 +               tmp_gh->gh_error = 0;
10495 +               set_bit(HIF_HOLDER, &tmp_gh->gh_iflags);
10496 +
10497 +               complete(&tmp_gh->gh_wait);
10498 +
10499 +               found = TRUE;
10500 +       }
10501 +
10502 +       GFS_ASSERT_GLOCK(found, gl,);
10503 +}
10504 +
10505 +/**
10506 + * do_unrecurse - a recursive holder was just dropped of the waiters2 list
10507 + * @gh: the holder
10508 + *
10509 + * If there is only one other recursive holder, clear is HIF_RECURSE bit.
10510 + * If there is more than one, leave them alone.
10511 + *
10512 + */
10513 +
10514 +static void
10515 +do_unrecurse(struct gfs_holder *gh)
10516 +{
10517 +       struct gfs_glock *gl = gh->gh_gl;
10518 +       struct list_head *tmp, *head;
10519 +       struct gfs_holder *tmp_gh, *last_gh = NULL;
10520 +       int found = FALSE;
10521 +
10522 +       GFS_ASSERT_GLOCK(gh->gh_owner, gl,);
10523 +
10524 +       for (head = &gl->gl_waiters2, tmp = head->next;
10525 +            tmp != head;
10526 +            tmp = tmp->next) {
10527 +               tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
10528 +               if (tmp_gh->gh_owner != gh->gh_owner)
10529 +                       continue;
10530 +
10531 +               GFS_ASSERT_GLOCK(test_bit(HIF_RECURSE, &tmp_gh->gh_iflags),
10532 +                                gl,);
10533 +
10534 +               if (found)
10535 +                       return;
10536 +
10537 +               found = TRUE;
10538 +               last_gh = tmp_gh;
10539 +       }
10540 +
10541 +       GFS_ASSERT_GLOCK(found, gl,);
10542 +       clear_bit(HIF_RECURSE, &last_gh->gh_iflags);
10543 +}
10544 +
10545 +/**
10546 + * rq_mutex - process a mutex request in the queue
10547 + * @gh: the glock holder
10548 + *
10549 + * Returns: TRUE if the queue is blocked,
10550 + */
10551 +
10552 +static int
10553 +rq_mutex(struct gfs_holder *gh)
10554 +{
10555 +       struct gfs_glock *gl = gh->gh_gl;
10556 +
10557 +       list_del_init(&gh->gh_list);
10558 +       /*  gh->gh_error never examined.  */
10559 +       set_bit(GLF_LOCK, &gl->gl_flags);
10560 +       complete(&gh->gh_wait);
10561 +
10562 +       return TRUE;
10563 +}
10564 +
10565 +/**
10566 + * rq_promote - process a promote request in the queue
10567 + * @gh: the glock holder
10568 + * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module
10569 + *
10570 + * Returns: TRUE if the queue is blocked,
10571 + */
10572 +
10573 +static int
10574 +rq_promote(struct gfs_holder *gh, int promote_ok)
10575 +{
10576 +       struct gfs_glock *gl = gh->gh_gl;
10577 +       struct gfs_sbd *sdp = gl->gl_sbd;
10578 +       struct gfs_glock_operations *glops = gl->gl_ops;
10579 +       int recurse;
10580 +
10581 +       if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
10582 +               if (list_empty(&gl->gl_holders)) {
10583 +                       if (promote_ok || GFS_ASYNC_LM(sdp)) {
10584 +                               gl->gl_req_gh = gh;
10585 +                               set_bit(GLF_LOCK, &gl->gl_flags);
10586 +                               spin_unlock(&gl->gl_spin);
10587 +
10588 +                               if (atomic_read(&sdp->sd_reclaim_count) >
10589 +                                   sdp->sd_tune.gt_reclaim_limit &&
10590 +                                   !(gh->gh_flags & LM_FLAG_PRIORITY)) {
10591 +                                       gfs_reclaim_glock(sdp);
10592 +                                       gfs_reclaim_glock(sdp);
10593 +                               }
10594 +
10595 +                               glops->go_xmote_th(gl, gh->gh_state,
10596 +                                                  gh->gh_flags);
10597 +
10598 +                               spin_lock(&gl->gl_spin);
10599 +                       } else
10600 +                           if (!test_and_set_bit(HIF_WAKEUP, &gh->gh_iflags))
10601 +                               complete(&gh->gh_wait);
10602 +               }
10603 +               return TRUE;
10604 +       }
10605 +
10606 +       if (list_empty(&gl->gl_holders)) {
10607 +               set_bit(HIF_FIRST, &gh->gh_iflags);
10608 +               set_bit(GLF_LOCK, &gl->gl_flags);
10609 +               recurse = FALSE;
10610 +       } else {
10611 +               struct gfs_holder *next_gh;
10612 +               if (gh->gh_flags & GL_LOCAL_EXCL)
10613 +                       return TRUE;
10614 +               next_gh = list_entry(gl->gl_holders.next, struct gfs_holder, gh_list);
10615 +               if (next_gh->gh_flags & GL_LOCAL_EXCL)
10616 +                        return TRUE;
10617 +               recurse = test_bit(HIF_RECURSE, &gh->gh_iflags);
10618 +       }
10619 +
10620 +       list_move_tail(&gh->gh_list, &gl->gl_holders);
10621 +       gh->gh_error = 0;
10622 +       set_bit(HIF_HOLDER, &gh->gh_iflags);
10623 +
10624 +       if (recurse)
10625 +               handle_recurse(gh);
10626 +
10627 +       complete(&gh->gh_wait);
10628 +
10629 +       return FALSE;
10630 +}
10631 +
10632 +/**
10633 + * rq_demote - process a demote request in the queue
10634 + * @gh: the glock holder
10635 + *
10636 + * Returns: TRUE if the queue is blocked,
10637 + */
10638 +
10639 +static int
10640 +rq_demote(struct gfs_holder *gh)
10641 +{
10642 +       struct gfs_glock *gl = gh->gh_gl;
10643 +       struct gfs_glock_operations *glops = gl->gl_ops;
10644 +
10645 +       if (!list_empty(&gl->gl_holders))
10646 +               return TRUE;
10647 +
10648 +       if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
10649 +               list_del_init(&gh->gh_list);
10650 +               gh->gh_error = 0;
10651 +               spin_unlock(&gl->gl_spin);
10652 +               if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
10653 +                       gfs_holder_put(gh);
10654 +               else
10655 +                       complete(&gh->gh_wait);
10656 +               spin_lock(&gl->gl_spin);
10657 +       } else {
10658 +               gl->gl_req_gh = gh;
10659 +               set_bit(GLF_LOCK, &gl->gl_flags);
10660 +               spin_unlock(&gl->gl_spin);
10661 +
10662 +               if (gh->gh_state == LM_ST_UNLOCKED ||
10663 +                   gl->gl_state != LM_ST_EXCLUSIVE)
10664 +                       glops->go_drop_th(gl);
10665 +               else
10666 +                       glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
10667 +
10668 +               spin_lock(&gl->gl_spin);
10669 +       }
10670 +
10671 +       return FALSE;
10672 +}
10673 +
10674 +/**
10675 + * run_queue - process holder structures on a glock
10676 + * @gl: the glock
10677 + * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module
10678 + *
10679 + */
10680 +
10681 +static void
10682 +run_queue(struct gfs_glock *gl, int promote_ok)
10683 +{
10684 +       struct gfs_holder *gh;
10685 +       int blocked;
10686 +
10687 +       for (;;) {
10688 +               if (test_bit(GLF_LOCK, &gl->gl_flags))
10689 +                       break;
10690 +
10691 +               if (!list_empty(&gl->gl_waiters1)) {
10692 +                       gh = list_entry(gl->gl_waiters1.next,
10693 +                                       struct gfs_holder, gh_list);
10694 +
10695 +                       if (test_bit(HIF_MUTEX, &gh->gh_iflags))
10696 +                               blocked = rq_mutex(gh);
10697 +                       else
10698 +                               GFS_ASSERT_GLOCK(FALSE, gl,);
10699 +
10700 +               } else if (!list_empty(&gl->gl_waiters2)) {
10701 +                       gh = list_entry(gl->gl_waiters2.next,
10702 +                                       struct gfs_holder, gh_list);
10703 +
10704 +                       if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
10705 +                               blocked = rq_promote(gh, promote_ok);
10706 +                       else if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
10707 +                               blocked = rq_demote(gh);
10708 +                       else
10709 +                               GFS_ASSERT_GLOCK(FALSE, gl,);
10710 +
10711 +               } else
10712 +                       break;
10713 +
10714 +               if (blocked)
10715 +                       break;
10716 +       }
10717 +}
10718 +
10719 +/**
10720 + * lock_on_glock - acquire a local lock on a glock
10721 + * @gl: the glock
10722 + *
10723 + */
10724 +
10725 +static void
10726 +lock_on_glock(struct gfs_glock *gl)
10727 +{
10728 +       struct gfs_holder gh;
10729 +
10730 +       gfs_holder_init(gl, 0, 0, &gh);
10731 +       set_bit(HIF_MUTEX, &gh.gh_iflags);
10732 +
10733 +       spin_lock(&gl->gl_spin);
10734 +       if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
10735 +               list_add_tail(&gh.gh_list, &gl->gl_waiters1);
10736 +       else
10737 +               complete(&gh.gh_wait);
10738 +       spin_unlock(&gl->gl_spin);
10739 +
10740 +       wait_for_completion(&gh.gh_wait);
10741 +       gfs_holder_uninit(&gh);
10742 +}
10743 +
10744 +/**
10745 + * trylock_on_glock - try to acquire a local lock on a glock
10746 + * @gl: the glock
10747 + *
10748 + * Returns: TRUE if the glock is acquired
10749 + */
10750 +
10751 +static int
10752 +trylock_on_glock(struct gfs_glock *gl)
10753 +{
10754 +       int acquired = TRUE;
10755 +
10756 +       spin_lock(&gl->gl_spin);
10757 +       if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
10758 +               acquired = FALSE;
10759 +       spin_unlock(&gl->gl_spin);
10760 +
10761 +       return acquired;
10762 +}
10763 +
10764 +/**
10765 + * unlock_on_glock - release a local lock on a glock
10766 + * @gl: the glock
10767 + *
10768 + */
10769 +
10770 +static void
10771 +unlock_on_glock(struct gfs_glock *gl)
10772 +{
10773 +       spin_lock(&gl->gl_spin);
10774 +       clear_bit(GLF_LOCK, &gl->gl_flags);
10775 +       run_queue(gl, FALSE);
10776 +       spin_unlock(&gl->gl_spin);
10777 +}
10778 +
10779 +/**
10780 + * handle_callback - add a demote request to a lock's queue
10781 + * @gl: the glock
10782 + * @state: the state the callback is us to change to
10783 + *
10784 + */
10785 +
10786 +static void
10787 +handle_callback(struct gfs_glock *gl, unsigned int state)
10788 +{
10789 +       struct list_head *tmp, *head;
10790 +       struct gfs_holder *gh, *new_gh = NULL;
10791 +
10792 +       GFS_ASSERT_GLOCK(state != LM_ST_EXCLUSIVE, gl,);
10793 +
10794 + restart:
10795 +       spin_lock(&gl->gl_spin);
10796 +
10797 +       for (head = &gl->gl_waiters2, tmp = head->next;
10798 +            tmp != head;
10799 +            tmp = tmp->next) {
10800 +               gh = list_entry(tmp, struct gfs_holder, gh_list);
10801 +               if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
10802 +                   gl->gl_req_gh != gh) {
10803 +                       if (gh->gh_state != state)
10804 +                               gh->gh_state = LM_ST_UNLOCKED;
10805 +                       goto out;
10806 +               }
10807 +       }
10808 +
10809 +       if (new_gh) {
10810 +               list_add(&new_gh->gh_list, &gl->gl_waiters2);
10811 +               new_gh = NULL;
10812 +       } else {
10813 +               spin_unlock(&gl->gl_spin);
10814 +
10815 +               new_gh = gfs_holder_get(gl, state, LM_FLAG_TRY);
10816 +               set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
10817 +               set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
10818 +               new_gh->gh_owner = NULL;
10819 +
10820 +               goto restart;
10821 +       }
10822 +
10823 + out:
10824 +       spin_unlock(&gl->gl_spin);
10825 +
10826 +       if (new_gh)
10827 +               gfs_holder_put(new_gh);
10828 +}
10829 +
10830 +/**
10831 + * state_change - record that the glock is now in a different state
10832 + * @gl: the glock
10833 + * @new_state the new state
10834 + *
10835 + */
10836 +
10837 +static void
10838 +state_change(struct gfs_glock *gl, unsigned int new_state)
10839 +{
10840 +       struct gfs_sbd *sdp = gl->gl_sbd;
10841 +       int held1, held2;
10842 +
10843 +       held1 = (gl->gl_state != LM_ST_UNLOCKED);
10844 +       held2 = (new_state != LM_ST_UNLOCKED);
10845 +
10846 +       if (held1 != held2) {
10847 +               if (held2) {
10848 +                       atomic_inc(&sdp->sd_glock_held_count);
10849 +                       glock_hold(gl);
10850 +               } else {
10851 +                       atomic_dec(&sdp->sd_glock_held_count);
10852 +                       glock_put(gl);
10853 +               }
10854 +       }
10855 +
10856 +       gl->gl_state = new_state;
10857 +}
10858 +
10859 +/**
10860 + * xmote_bh - Called after the lock module is done acquiring a lock
10861 + * @gl: The glock in question
10862 + * @ret: the int returned from the lock module
10863 + *
10864 + */
10865 +
10866 +static void
10867 +xmote_bh(struct gfs_glock *gl, unsigned int ret)
10868 +{
10869 +       struct gfs_glock_operations *glops = gl->gl_ops;
10870 +       struct gfs_holder *gh = gl->gl_req_gh;
10871 +       int prev_state = gl->gl_state;
10872 +       int op_done = TRUE;
10873 +
10874 +       GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
10875 +       GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
10876 +       GFS_ASSERT_GLOCK(!(ret & LM_OUT_ASYNC), gl,);
10877 +
10878 +       state_change(gl, ret & LM_OUT_ST_MASK);
10879 +
10880 +       if (ret & LM_OUT_NEED_E)
10881 +               handle_callback(gl, LM_ST_UNLOCKED);
10882 +       else if (ret & LM_OUT_NEED_D)
10883 +               handle_callback(gl, LM_ST_DEFERRED);
10884 +       else if (ret & LM_OUT_NEED_S)
10885 +               handle_callback(gl, LM_ST_SHARED);
10886 +
10887 +       if (ret & LM_OUT_LVB_INVALID)
10888 +               set_bit(GLF_LVB_INVALID, &gl->gl_flags);
10889 +
10890 +       if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
10891 +               if (glops->go_inval)
10892 +                       glops->go_inval(gl, DIO_METADATA | DIO_DATA);
10893 +       } else if (gl->gl_state == LM_ST_DEFERRED) {
10894 +               /* We might not want to do this here.
10895 +                  Look at moving to the inode glops. */
10896 +               if (glops->go_inval)
10897 +                       glops->go_inval(gl, DIO_DATA);
10898 +       }
10899 +
10900 +       /*  Deal with each possible exit condition  */
10901 +
10902 +       if (!gh)
10903 +               gl->gl_stamp = jiffies;
10904 +
10905 +       else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
10906 +               spin_lock(&gl->gl_spin);
10907 +               list_del_init(&gh->gh_list);
10908 +               if (gl->gl_state == gh->gh_state ||
10909 +                   gl->gl_state == LM_ST_UNLOCKED)
10910 +                       gh->gh_error = 0;
10911 +               else
10912 +                       gh->gh_error = GLR_TRYFAILED;
10913 +               spin_unlock(&gl->gl_spin);
10914 +
10915 +               if (ret & LM_OUT_CANCELED)
10916 +                       handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
10917 +
10918 +       } else if (ret & LM_OUT_CANCELED) {
10919 +               spin_lock(&gl->gl_spin);
10920 +               list_del_init(&gh->gh_list);
10921 +               gh->gh_error = GLR_CANCELED;
10922 +               if (test_bit(HIF_RECURSE, &gh->gh_iflags))
10923 +                       do_unrecurse(gh);
10924 +               spin_unlock(&gl->gl_spin);
10925 +
10926 +       } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
10927 +               spin_lock(&gl->gl_spin);
10928 +               list_move_tail(&gh->gh_list, &gl->gl_holders);
10929 +               gh->gh_error = 0;
10930 +               set_bit(HIF_HOLDER, &gh->gh_iflags);
10931 +               spin_unlock(&gl->gl_spin);
10932 +
10933 +               set_bit(HIF_FIRST, &gh->gh_iflags);
10934 +
10935 +               op_done = FALSE;
10936 +
10937 +       } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
10938 +               spin_lock(&gl->gl_spin);
10939 +               list_del_init(&gh->gh_list);
10940 +               gh->gh_error = GLR_TRYFAILED;
10941 +               if (test_bit(HIF_RECURSE, &gh->gh_iflags))
10942 +                       do_unrecurse(gh);
10943 +               spin_unlock(&gl->gl_spin);
10944 +
10945 +       } else
10946 +               GFS_ASSERT_GLOCK(FALSE, gl,);
10947 +
10948 +       if (glops->go_xmote_bh)
10949 +               glops->go_xmote_bh(gl);
10950 +
10951 +       if (op_done) {
10952 +               spin_lock(&gl->gl_spin);
10953 +               gl->gl_req_gh = NULL;
10954 +               gl->gl_req_bh = NULL;
10955 +               clear_bit(GLF_LOCK, &gl->gl_flags);
10956 +               run_queue(gl, FALSE);
10957 +               spin_unlock(&gl->gl_spin);
10958 +       }
10959 +
10960 +       glock_put(gl);
10961 +
10962 +       if (gh) {
10963 +               if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
10964 +                       gfs_holder_put(gh);
10965 +               else
10966 +                       complete(&gh->gh_wait);
10967 +       }
10968 +}
10969 +
10970 +/**
10971 + * gfs_glock_xmote_th - Call into the lock module to acquire a glock
10972 + * @gl: The glock in question
10973 + * @state: the requested state
10974 + * @flags: modifier flags to the lock call
10975 + *
10976 + */
10977 +
10978 +void
10979 +gfs_glock_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
10980 +{
10981 +       struct gfs_sbd *sdp = gl->gl_sbd;
10982 +       struct gfs_glock_operations *glops = gl->gl_ops;
10983 +       int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
10984 +                                LM_FLAG_NOEXP | LM_FLAG_ANY |
10985 +                                LM_FLAG_PRIORITY);
10986 +       unsigned int lck_ret;
10987 +
10988 +       GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
10989 +       GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
10990 +       GFS_ASSERT_GLOCK(state != LM_ST_UNLOCKED, gl,);
10991 +       GFS_ASSERT_GLOCK(state != gl->gl_state, gl,);
10992 +
10993 +       if (gl->gl_state == LM_ST_EXCLUSIVE) {
10994 +               if (glops->go_sync)
10995 +                       glops->go_sync(gl, DIO_METADATA | DIO_DATA);
10996 +       }
10997 +
10998 +       glock_hold(gl);
10999 +       gl->gl_req_bh = xmote_bh;
11000 +
11001 +       atomic_inc(&sdp->sd_lm_lock_calls);
11002 +
11003 +       lck_ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl->gl_lock,
11004 +                                                    gl->gl_state,
11005 +                                                    state, lck_flags);
11006 +
11007 +       if (lck_ret & LM_OUT_ASYNC)
11008 +               GFS_ASSERT_GLOCK(lck_ret == LM_OUT_ASYNC, gl,);
11009 +       else
11010 +               xmote_bh(gl, lck_ret);
11011 +}
11012 +
11013 +/**
11014 + * drop_bh - Called after a lock module unlock completes
11015 + * @gl: the glock
11016 + * @ret: the return status
11017 + *
11018 + * Doesn't wake up the process waiting on the struct gfs_holder (if any)
11019 + * Doesn't drop the reference on the glock the top half took out
11020 + *
11021 + */
11022 +
11023 +static void
11024 +drop_bh(struct gfs_glock *gl, unsigned int ret)
11025 +{
11026 +       struct gfs_glock_operations *glops = gl->gl_ops;
11027 +       struct gfs_holder *gh = gl->gl_req_gh;
11028 +
11029 +       clear_bit(GLF_PREFETCH, &gl->gl_flags);
11030 +
11031 +       GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
11032 +       GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
11033 +       GFS_ASSERT_GLOCK(!ret, gl,);
11034 +
11035 +       state_change(gl, LM_ST_UNLOCKED);
11036 +
11037 +       if (glops->go_inval)
11038 +               glops->go_inval(gl, DIO_METADATA | DIO_DATA);
11039 +
11040 +       if (gh) {
11041 +               spin_lock(&gl->gl_spin);
11042 +               list_del_init(&gh->gh_list);
11043 +               gh->gh_error = 0;
11044 +               spin_unlock(&gl->gl_spin);
11045 +       }
11046 +
11047 +       if (glops->go_drop_bh)
11048 +               glops->go_drop_bh(gl);
11049 +
11050 +       spin_lock(&gl->gl_spin);
11051 +       gl->gl_req_gh = NULL;
11052 +       gl->gl_req_bh = NULL;
11053 +       clear_bit(GLF_LOCK, &gl->gl_flags);
11054 +       run_queue(gl, FALSE);
11055 +       spin_unlock(&gl->gl_spin);
11056 +
11057 +       glock_put(gl);
11058 +
11059 +       if (gh) {
11060 +               if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
11061 +                       gfs_holder_put(gh);
11062 +               else
11063 +                       complete(&gh->gh_wait);
11064 +       }
11065 +}
11066 +
11067 +/**
11068 + * gfs_glock_drop_th - call into the lock module to unlock a lock
11069 + * @gl: the glock
11070 + *
11071 + */
11072 +
11073 +void
11074 +gfs_glock_drop_th(struct gfs_glock *gl)
11075 +{
11076 +       struct gfs_sbd *sdp = gl->gl_sbd;
11077 +       struct gfs_glock_operations *glops = gl->gl_ops;
11078 +       unsigned int ret;
11079 +
11080 +       GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
11081 +       GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
11082 +       GFS_ASSERT_GLOCK(gl->gl_state != LM_ST_UNLOCKED, gl,);
11083 +
11084 +       if (gl->gl_state == LM_ST_EXCLUSIVE) {
11085 +               if (glops->go_sync)
11086 +                       glops->go_sync(gl, DIO_METADATA | DIO_DATA);
11087 +       }
11088 +
11089 +       glock_hold(gl);
11090 +       gl->gl_req_bh = drop_bh;
11091 +
11092 +       atomic_inc(&sdp->sd_lm_unlock_calls);
11093 +
11094 +       ret = sdp->sd_lockstruct.ls_ops->lm_unlock(gl->gl_lock, gl->gl_state);
11095 +
11096 +       if (!ret)
11097 +               drop_bh(gl, ret);
11098 +       else
11099 +               GFS_ASSERT_GLOCK(ret == LM_OUT_ASYNC, gl,);
11100 +}
11101 +
11102 +/**
11103 + * handle_cancels - cancel requests for locks stuck waiting on an expire flag
11104 + * @gh: the LM_FLAG_NOEXP holder waiting to acquire the lock
11105 + *
11106 + */
11107 +
11108 +static void
11109 +handle_cancels(struct gfs_holder *gh)
11110 +{
11111 +       struct gfs_glock *gl = gh->gh_gl;
11112 +
11113 +       spin_lock(&gl->gl_spin);
11114 +
11115 +       while (gl->gl_req_gh != gh &&
11116 +              !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
11117 +              !test_bit(HIF_WAKEUP, &gh->gh_iflags) &&
11118 +              !list_empty(&gh->gh_list)) {
11119 +               if (gl->gl_req_bh) {
11120 +                       spin_unlock(&gl->gl_spin);
11121 +                       gl->gl_sbd->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
11122 +                       yield();
11123 +                       spin_lock(&gl->gl_spin);
11124 +               } else {
11125 +                       spin_unlock(&gl->gl_spin);
11126 +                       yield();
11127 +                       spin_lock(&gl->gl_spin);
11128 +               }
11129 +       }
11130 +
11131 +       spin_unlock(&gl->gl_spin);
11132 +}
11133 +
11134 +/**
11135 + * glock_wait_internal - wait on a glock acquisition
11136 + * @gh: the glock holder
11137 + *
11138 + * Returns: 0 on success
11139 + */
11140 +
11141 +static int
11142 +glock_wait_internal(struct gfs_holder *gh)
11143 +{
11144 +       struct gfs_glock *gl = gh->gh_gl;
11145 +       struct gfs_glock_operations *glops = gl->gl_ops;
11146 +       int error = 0;
11147 +
11148 +       if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
11149 +               spin_lock(&gl->gl_spin);
11150 +               if (gl->gl_req_gh != gh &&
11151 +                   !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
11152 +                   !test_bit(HIF_WAKEUP, &gh->gh_iflags) &&
11153 +                   !list_empty(&gh->gh_list)) {
11154 +                       list_del_init(&gh->gh_list);
11155 +                       gh->gh_error = GLR_TRYFAILED;
11156 +                       if (test_bit(HIF_RECURSE, &gh->gh_iflags))
11157 +                               do_unrecurse(gh);
11158 +                       run_queue(gl, FALSE);
11159 +                       spin_unlock(&gl->gl_spin);
11160 +                       return GLR_TRYFAILED;
11161 +               }
11162 +               spin_unlock(&gl->gl_spin);
11163 +       }
11164 +
11165 +       if (gh->gh_flags & LM_FLAG_NOEXP)
11166 +               handle_cancels(gh);
11167 +
11168 +       for (;;) {
11169 +               wait_for_completion(&gh->gh_wait);
11170 +
11171 +               spin_lock(&gl->gl_spin);
11172 +               if (test_and_clear_bit(HIF_WAKEUP, &gh->gh_iflags)) {
11173 +                       run_queue(gl, TRUE);
11174 +                       spin_unlock(&gl->gl_spin);
11175 +               } else {
11176 +                       spin_unlock(&gl->gl_spin);
11177 +                       break;
11178 +               }
11179 +       }
11180 +
11181 +       if (gh->gh_error)
11182 +               return gh->gh_error;
11183 +
11184 +       GFS_ASSERT_GLOCK(test_bit(HIF_HOLDER, &gh->gh_iflags), gl,);
11185 +       GFS_ASSERT_GLOCK(relaxed_state_ok(gl->gl_state, gh->gh_state,
11186 +                                         gh->gh_flags), gl,);
11187 +
11188 +       if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
11189 +               GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
11190 +
11191 +               if (glops->go_lock) {
11192 +                       error = glops->go_lock(gl, gh->gh_flags);
11193 +                       if (error) {
11194 +                               spin_lock(&gl->gl_spin);
11195 +                               list_del_init(&gh->gh_list);
11196 +                               gh->gh_error = error;
11197 +                               if (test_and_clear_bit(HIF_RECURSE, &gh->gh_iflags))
11198 +                                       do_unrecurse(gh);
11199 +                               spin_unlock(&gl->gl_spin);
11200 +                       }
11201 +               }
11202 +
11203 +               spin_lock(&gl->gl_spin);
11204 +               gl->gl_req_gh = NULL;
11205 +               gl->gl_req_bh = NULL;
11206 +               clear_bit(GLF_LOCK, &gl->gl_flags);
11207 +               if (test_bit(HIF_RECURSE, &gh->gh_iflags))
11208 +                       handle_recurse(gh);
11209 +               run_queue(gl, FALSE);
11210 +               spin_unlock(&gl->gl_spin);
11211 +       }
11212 +
11213 +       return error;
11214 +}
11215 +
11216 +/**
11217 + * add_to_queue - Add a holder to the wait queue (but look for recursion)
11218 + * @gh: the holder structure
11219 + *
11220 + */
11221 +
11222 +static void
11223 +add_to_queue(struct gfs_holder *gh)
11224 +{
11225 +       struct gfs_glock *gl = gh->gh_gl;
11226 +       struct list_head *tmp, *head;
11227 +       struct gfs_holder *tmp_gh;
11228 +
11229 +       if (gh->gh_owner) {
11230 +               for (head = &gl->gl_holders, tmp = head->next;
11231 +                    tmp != head;
11232 +                    tmp = tmp->next) {
11233 +                       tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
11234 +                       if (tmp_gh->gh_owner == gh->gh_owner) {
11235 +                               GFS_ASSERT_GLOCK((gh->gh_flags & LM_FLAG_ANY) ||
11236 +                                                !(tmp_gh->gh_flags & LM_FLAG_ANY),
11237 +                                                gl,);
11238 +                               GFS_ASSERT_GLOCK((tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
11239 +                                                !(gh->gh_flags & GL_LOCAL_EXCL),
11240 +                                                gl,);
11241 +                               GFS_ASSERT_GLOCK(relaxed_state_ok(gl->gl_state,
11242 +                                                                 gh->gh_state,
11243 +                                                                 gh->gh_flags),
11244 +                                                gl,);
11245 +
11246 +                               list_add_tail(&gh->gh_list, &gl->gl_holders);
11247 +                               set_bit(HIF_HOLDER, &gh->gh_iflags);
11248 +
11249 +                               gh->gh_error = 0;
11250 +                               complete(&gh->gh_wait);
11251 +
11252 +                               return;
11253 +                       }
11254 +               }
11255 +
11256 +               for (head = &gl->gl_waiters2, tmp = head->next;
11257 +                    tmp != head;
11258 +                    tmp = tmp->next) {
11259 +                       tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
11260 +                       if (tmp_gh->gh_owner == gh->gh_owner) {
11261 +                               GFS_ASSERT_GLOCK(test_bit(HIF_PROMOTE,
11262 +                                                         &tmp_gh->gh_iflags),
11263 +                                                gl,);
11264 +                               GFS_ASSERT_GLOCK((gh->gh_flags & LM_FLAG_ANY) ||
11265 +                                                !(tmp_gh->gh_flags & LM_FLAG_ANY),
11266 +                                                gl,);
11267 +                               GFS_ASSERT_GLOCK((tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
11268 +                                                !(gh->gh_flags & GL_LOCAL_EXCL),
11269 +                                                gl,);
11270 +                               GFS_ASSERT_GLOCK(relaxed_state_ok(tmp_gh->gh_state,
11271 +                                                                 gh->gh_state,
11272 +                                                                 gh->gh_flags),
11273 +                                                gl,);
11274 +
11275 +                               set_bit(HIF_RECURSE, &gh->gh_iflags);
11276 +                               set_bit(HIF_RECURSE, &tmp_gh->gh_iflags);
11277 +
11278 +                               list_add_tail(&gh->gh_list, &gl->gl_waiters2);
11279 +
11280 +                               return;
11281 +                       }
11282 +               }
11283 +       }
11284 +
11285 +       if (gh->gh_flags & LM_FLAG_PRIORITY)
11286 +               list_add(&gh->gh_list, &gl->gl_waiters2);
11287 +       else
11288 +               list_add_tail(&gh->gh_list, &gl->gl_waiters2);
11289 +}
11290 +
11291 +/**
11292 + * gfs_glock_nq - enqueue a struct gfs_holder onto a glock (acquire a glock)
11293 + * @gh: the holder structure
11294 + *
11295 + * if (gh->gh_flags & GL_ASYNC), this never returns an error
11296 + *
11297 + * Returns: 0, GLR_TRYFAILED, or -EXXX on failure
11298 + */
11299 +
11300 +int
11301 +gfs_glock_nq(struct gfs_holder *gh)
11302 +{
11303 +       struct gfs_glock *gl = gh->gh_gl;
11304 +       struct gfs_sbd *sdp = gl->gl_sbd;
11305 +       int error = 0;
11306 +
11307 +       GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gl,);
11308 +       GFS_ASSERT_GLOCK(gh->gh_state != LM_ST_UNLOCKED, gl,);
11309 +       GFS_ASSERT_GLOCK((gh->gh_flags & (LM_FLAG_ANY | GL_EXACT)) !=
11310 +                        (LM_FLAG_ANY | GL_EXACT), gl,);
11311 +       GFS_ASSERT_GLOCK(GFS_ASYNC_LM(sdp) ||
11312 +                        !(gh->gh_flags & GL_ASYNC), gl,);
11313 +
11314 +       atomic_inc(&sdp->sd_glock_nq_calls);
11315 +
11316 + restart:
11317 +       set_bit(HIF_PROMOTE, &gh->gh_iflags);
11318 +
11319 +       spin_lock(&gl->gl_spin);
11320 +       add_to_queue(gh);
11321 +       run_queue(gl, TRUE);
11322 +       spin_unlock(&gl->gl_spin);
11323 +
11324 +       if (!(gh->gh_flags & GL_ASYNC)) {
11325 +               error = glock_wait_internal(gh);
11326 +               if (error == GLR_CANCELED) {
11327 +                       current->state = TASK_UNINTERRUPTIBLE;
11328 +                       schedule_timeout(HZ);
11329 +                       goto restart;
11330 +               }
11331 +       }
11332 +
11333 +       clear_bit(GLF_PREFETCH, &gl->gl_flags);
11334 +
11335 +       return error;
11336 +}
11337 +
11338 +/**
11339 + * gfs_glock_poll - poll to see if an async request has been completed
11340 + * @gh: the holder
11341 + *
11342 + * Returns: TRUE if the request is ready to be gfs_glock_wait()ed on
11343 + */
11344 +
11345 +int
11346 +gfs_glock_poll(struct gfs_holder *gh)
11347 +{
11348 +       struct gfs_glock *gl = gh->gh_gl;
11349 +       int ready = FALSE;
11350 +
11351 +       GFS_ASSERT_GLOCK(gh->gh_flags & GL_ASYNC, gl,);
11352 +       GFS_ASSERT_GLOCK(!test_bit(HIF_WAKEUP, &gh->gh_iflags), gl,);
11353 +
11354 +       spin_lock(&gl->gl_spin);
11355 +
11356 +       if (test_bit(HIF_HOLDER, &gh->gh_iflags))
11357 +               ready = TRUE;
11358 +       else if (list_empty(&gh->gh_list)) {
11359 +               if (gh->gh_error == GLR_CANCELED) {
11360 +                       spin_unlock(&gl->gl_spin);
11361 +                       current->state = TASK_UNINTERRUPTIBLE;
11362 +                       schedule_timeout(HZ);
11363 +                       gfs_glock_nq(gh);
11364 +                       return FALSE;
11365 +               } else
11366 +                       ready = TRUE;
11367 +       }
11368 +
11369 +       spin_unlock(&gl->gl_spin);
11370 +
11371 +       return ready;
11372 +}
11373 +
11374 +/**
11375 + * gfs_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
11376 + * @gh: the holder structure
11377 + *
11378 + * Returns: 0, GLR_TRYFAILED, or -EXXX on failure
11379 + */
11380 +
11381 +int
11382 +gfs_glock_wait(struct gfs_holder *gh)
11383 +{
11384 +       struct gfs_glock *gl = gh->gh_gl;
11385 +       int error;
11386 +
11387 +       GFS_ASSERT_GLOCK(gh->gh_flags & GL_ASYNC, gl,);
11388 +       GFS_ASSERT_GLOCK(!test_bit(HIF_WAKEUP, &gh->gh_iflags), gl,);
11389 +
11390 +       error = glock_wait_internal(gh);
11391 +       if (error == GLR_CANCELED) {
11392 +               current->state = TASK_UNINTERRUPTIBLE;
11393 +               schedule_timeout(HZ);
11394 +               gh->gh_flags &= ~GL_ASYNC;
11395 +               error = gfs_glock_nq(gh);
11396 +       }
11397 +
11398 +       return error;
11399 +}
11400 +
11401 +/**
11402 + * gfs_glock_dq - dequeue a struct gfs_holder from a glock (release a glock)
11403 + * @gh: the glock holder
11404 + *
11405 + */
11406 +
11407 +void
11408 +gfs_glock_dq(struct gfs_holder *gh)
11409 +{
11410 +       struct gfs_glock *gl = gh->gh_gl;
11411 +       struct gfs_glock_operations *glops = gl->gl_ops;
11412 +
11413 +       GFS_ASSERT_GLOCK(!queue_empty(gl, &gh->gh_list), gl,);
11414 +       GFS_ASSERT_GLOCK(test_bit(HIF_HOLDER, &gh->gh_iflags), gl,);
11415 +
11416 +       atomic_inc(&gl->gl_sbd->sd_glock_dq_calls);
11417 +
11418 +       if (gh->gh_flags & GL_SYNC)
11419 +               set_bit(GLF_SYNC, &gl->gl_flags);
11420 +       if (gh->gh_flags & GL_NOCACHE)
11421 +               handle_callback(gl, LM_ST_UNLOCKED);
11422 +
11423 +       lock_on_glock(gl);
11424 +
11425 +       spin_lock(&gl->gl_spin);
11426 +       list_del_init(&gh->gh_list);
11427 +       if (list_empty(&gl->gl_holders)) {
11428 +               spin_unlock(&gl->gl_spin);
11429 +
11430 +               if (glops->go_unlock)
11431 +                       glops->go_unlock(gl, gh->gh_flags);
11432 +
11433 +               if (test_bit(GLF_SYNC, &gl->gl_flags)) {
11434 +                       if (glops->go_sync)
11435 +                               glops->go_sync(gl,
11436 +                                              DIO_METADATA |
11437 +                                              DIO_DATA |
11438 +                                              DIO_INVISIBLE);
11439 +               }
11440 +
11441 +               gl->gl_stamp = jiffies;
11442 +
11443 +               spin_lock(&gl->gl_spin);
11444 +       }
11445 +
11446 +       clear_bit(GLF_LOCK, &gl->gl_flags);
11447 +       run_queue(gl, FALSE);
11448 +       spin_unlock(&gl->gl_spin);
11449 +}
11450 +
11451 +/**
11452 + * gfs_glock_prefetch - Try to prefetch a glock
11453 + * @gl: the glock
11454 + * @state: the state to prefetch in
11455 + * @flags: flags passed to go_xmote_th()
11456 + *
11457 + */
11458 +
11459 +void
11460 +gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags)
11461 +{
11462 +       struct gfs_glock_operations *glops = gl->gl_ops;
11463 +
11464 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
11465 +       GFS_ASSERT_GLOCK(state != LM_ST_UNLOCKED, gl,);
11466 +       GFS_ASSERT_GLOCK((flags & (LM_FLAG_ANY | GL_EXACT)) !=
11467 +                        (LM_FLAG_ANY | GL_EXACT), gl,);
11468 +
11469 +       spin_lock(&gl->gl_spin);
11470 +
11471 +       if (test_bit(GLF_LOCK, &gl->gl_flags) ||
11472 +           !list_empty(&gl->gl_holders) ||
11473 +           !list_empty(&gl->gl_waiters1) ||
11474 +           !list_empty(&gl->gl_waiters2) ||
11475 +           relaxed_state_ok(gl->gl_state, state, flags)) {
11476 +               spin_unlock(&gl->gl_spin);
11477 +               return;
11478 +       }
11479 +
11480 +       set_bit(GLF_PREFETCH, &gl->gl_flags);
11481 +
11482 +       GFS_ASSERT_GLOCK(!gl->gl_req_gh, gl,);
11483 +       set_bit(GLF_LOCK, &gl->gl_flags);
11484 +       spin_unlock(&gl->gl_spin);
11485 +
11486 +       glops->go_xmote_th(gl, state, flags);
11487 +
11488 +       atomic_inc(&gl->gl_sbd->sd_glock_prefetch_calls);
11489 +}
11490 +
11491 +/**
11492 + * gfs_glock_force_drop - Force a glock to be uncached
11493 + * @gl: the glock
11494 + *
11495 + */
11496 +
11497 +void
11498 +gfs_glock_force_drop(struct gfs_glock *gl)
11499 +{
11500 +       struct gfs_holder gh;
11501 +
11502 +       gfs_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
11503 +       set_bit(HIF_DEMOTE, &gh.gh_iflags);
11504 +       gh.gh_owner = NULL;
11505 +
11506 +       spin_lock(&gl->gl_spin);
11507 +       list_add(&gh.gh_list, &gl->gl_waiters2);
11508 +       run_queue(gl, FALSE);
11509 +       spin_unlock(&gl->gl_spin);
11510 +
11511 +       wait_for_completion(&gh.gh_wait);
11512 +       gfs_holder_uninit(&gh);
11513 +}
11514 +
11515 +/**
11516 + * gfs_glock_nq_init - intialize a holder and enqueue it on a glock
11517 + * @gl: the glock
11518 + * @state: the state we're requesting
11519 + * @flags: the modifier flags
11520 + * @gh: the holder structure
11521 + *
11522 + * Returns: 0, GLR_*, or -EXXX
11523 + */
11524 +
11525 +int
11526 +gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags,
11527 +                 struct gfs_holder *gh)
11528 +{
11529 +       int error;
11530 +
11531 +       gfs_holder_init(gl, state, flags, gh);
11532 +
11533 +       error = gfs_glock_nq(gh);
11534 +       if (error)
11535 +               gfs_holder_uninit(gh);
11536 +
11537 +       return error;
11538 +}
11539 +
11540 +/**
11541 + * gfs_glock_dq_uninit - dequeue a holder from a glock and initialize it
11542 + * @gh: the holder structure
11543 + *
11544 + */
11545 +
11546 +void
11547 +gfs_glock_dq_uninit(struct gfs_holder *gh)
11548 +{
11549 +       gfs_glock_dq(gh);
11550 +       gfs_holder_uninit(gh);
11551 +}
11552 +
11553 +/**
11554 + * gfs_glock_nq_num - acquire a glock based on lock number
11555 + * @sdp: the filesystem
11556 + * @number: the lock number
11557 + * @glops: the glock operations for the type of glock
11558 + * @state: the state to acquire the glock in
11559 + * @flags: modifier flags for the aquisition
11560 + * @gh: the struct gfs_holder
11561 + *
11562 + * Returns: 0 on success, -EXXX on failure
11563 + */
11564 +
11565 +int
11566 +gfs_glock_nq_num(struct gfs_sbd *sdp,
11567 +                uint64_t number, struct gfs_glock_operations *glops,
11568 +                unsigned int state, int flags, struct gfs_holder *gh)
11569 +{
11570 +       struct gfs_glock *gl;
11571 +       int error;
11572 +
11573 +       error = gfs_glock_get(sdp, number, glops, CREATE, &gl);
11574 +       if (!error) {
11575 +               error = gfs_glock_nq_init(gl, state, flags, gh);
11576 +               glock_put(gl);
11577 +       }
11578 +
11579 +       return error;
11580 +}
11581 +
11582 +/**
11583 + * glock_compare - Compare two struct gfs_glock structures for sorting
11584 + * @arg_a: the first structure
11585 + * @arg_b: the second structure
11586 + *
11587 + */
11588 +
11589 +static int
11590 +glock_compare(const void *arg_a, const void *arg_b)
11591 +{
11592 +       struct gfs_holder *gh_a = *(struct gfs_holder **)arg_a;
11593 +       struct gfs_holder *gh_b = *(struct gfs_holder **)arg_b;
11594 +       struct lm_lockname *a = &gh_a->gh_gl->gl_name;
11595 +       struct lm_lockname *b = &gh_b->gh_gl->gl_name;
11596 +       int ret = 0;
11597 +
11598 +       if (a->ln_number > b->ln_number)
11599 +               ret = 1;
11600 +       else if (a->ln_number < b->ln_number)
11601 +               ret = -1;
11602 +       else {
11603 +               if (gh_a->gh_state == LM_ST_SHARED &&
11604 +                   gh_b->gh_state == LM_ST_EXCLUSIVE)
11605 +                       ret = 1;
11606 +               else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
11607 +                        (gh_b->gh_flags & GL_LOCAL_EXCL))
11608 +                       ret = 1;
11609 +       }
11610 +
11611 +       return ret;
11612 +}
11613 +
11614 +/**
11615 + * nq_m_sync - synchonously acquire more than one glock in deadlock free order
11616 + * @num_gh: the number of structures
11617 + * @ghs: an array of struct gfs_holder structures
11618 + *
11619 + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired)
11620 + */
11621 +
11622 +static int
11623 +nq_m_sync(unsigned int num_gh, struct gfs_holder *ghs)
11624 +{
11625 +       struct gfs_holder *p[num_gh];
11626 +       unsigned int x;
11627 +       int error = 0;
11628 +
11629 +       for (x = 0; x < num_gh; x++)
11630 +               p[x] = &ghs[x];
11631 +
11632 +       gfs_sort(p, num_gh, sizeof(struct gfs_holder *), glock_compare);
11633 +
11634 +       for (x = 0; x < num_gh; x++) {
11635 +               p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
11636 +
11637 +               error = gfs_glock_nq(p[x]);
11638 +               if (error) {
11639 +                       while (x--)
11640 +                               gfs_glock_dq(p[x]);
11641 +                       break;
11642 +               }
11643 +       }
11644 +
11645 +       return error;
11646 +}
11647 +
11648 +/**
11649 + * gfs_glock_nq_m - acquire multiple glocks
11650 + * @num_gh: the number of structures
11651 + * @ghs: an array of struct gfs_holder structures
11652 + *
11653 + * Figure out how big an impact this function has.  Either:
11654 + * 1) Replace this code with code that calls gfs_glock_prefetch()
11655 + * 2) Forget async stuff and just call nq_m_sync()
11656 + * 3) Leave it like it is
11657 + *
11658 + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired)
11659 + */
11660 +
11661 +int
11662 +gfs_glock_nq_m(unsigned int num_gh, struct gfs_holder *ghs)
11663 +{
11664 +       int e[num_gh];
11665 +       unsigned int x;
11666 +       int borked = FALSE, serious = 0;
11667 +       int error = 0;
11668 +
11669 +       GFS_ASSERT(num_gh,);
11670 +
11671 +       if (num_gh == 1) {
11672 +               ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
11673 +               error = gfs_glock_nq(ghs);
11674 +               return error;
11675 +       }
11676 +
11677 +       if (!GFS_ASYNC_LM(ghs->gh_gl->gl_sbd)) {
11678 +               error = nq_m_sync(num_gh, ghs);
11679 +               return error;
11680 +       }
11681 +
11682 +       for (x = 0; x < num_gh; x++) {
11683 +               ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
11684 +               gfs_glock_nq(&ghs[x]);
11685 +       }
11686 +
11687 +       for (x = 0; x < num_gh; x++) {
11688 +               error = e[x] = glock_wait_internal(&ghs[x]);
11689 +               if (error) {
11690 +                       borked = TRUE;
11691 +                       if (error != GLR_TRYFAILED && error != GLR_CANCELED)
11692 +                               serious = error;
11693 +               }
11694 +       }
11695 +
11696 +       if (!borked)
11697 +               return 0;
11698 +
11699 +       for (x = 0; x < num_gh; x++)
11700 +               if (!e[x])
11701 +                       gfs_glock_dq(&ghs[x]);
11702 +
11703 +       if (serious)
11704 +               error = serious;
11705 +       else {
11706 +               for (x = 0; x < num_gh; x++)
11707 +                       gfs_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
11708 +                                         &ghs[x]);
11709 +               error = nq_m_sync(num_gh, ghs);
11710 +       }
11711 +
11712 +       return error;
11713 +}
11714 +
11715 +/**
11716 + * gfs_glock_dq_m - release multiple glocks
11717 + * @num_gh: the number of structures
11718 + * @ghs: an array of struct gfs_holder structures
11719 + *
11720 + */
11721 +
11722 +void
11723 +gfs_glock_dq_m(unsigned int num_gh, struct gfs_holder *ghs)
11724 +{
11725 +       unsigned int x;
11726 +
11727 +       for (x = 0; x < num_gh; x++)
11728 +               gfs_glock_dq(&ghs[x]);
11729 +}
11730 +
11731 +/**
11732 + * gfs_glock_prefetch_num - prefetch a glock based on lock number
11733 + * @sdp: the filesystem
11734 + * @number: the lock number
11735 + * @glops: the glock operations for the type of glock
11736 + * @state: the state to acquire the glock in
11737 + * @flags: modifier flags for the aquisition
11738 + *
11739 + * Returns: 0 on success, -EXXX on failure
11740 + */
11741 +
11742 +void
11743 +gfs_glock_prefetch_num(struct gfs_sbd *sdp,
11744 +                      uint64_t number, struct gfs_glock_operations *glops,
11745 +                      unsigned int state, int flags)
11746 +{
11747 +       struct gfs_glock *gl;
11748 +       int error;
11749 +
11750 +       if (atomic_read(&sdp->sd_reclaim_count) < sdp->sd_tune.gt_reclaim_limit) {
11751 +               error = gfs_glock_get(sdp, number, glops, CREATE, &gl);
11752 +               if (!error) {
11753 +                       gfs_glock_prefetch(gl, state, flags);
11754 +                       glock_put(gl);
11755 +               }
11756 +       }
11757 +}
11758 +
11759 +/**
11760 + * gfs_lvb_hold - attach a LVB from a glock
11761 + * @gl: The glock in question
11762 + *
11763 + */
11764 +
11765 +int
11766 +gfs_lvb_hold(struct gfs_glock *gl)
11767 +{
11768 +       int error = 0;
11769 +
11770 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
11771 +
11772 +       lock_on_glock(gl);
11773 +
11774 +       atomic_inc(&gl->gl_lvb_count);
11775 +       if (atomic_read(&gl->gl_lvb_count) == 1) {
11776 +               glock_hold(gl);
11777 +               GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,);
11778 +               error = gl->gl_sbd->sd_lockstruct.ls_ops->lm_hold_lvb(gl->gl_lock,
11779 +                                                                     &gl->gl_lvb);
11780 +               if (error) {
11781 +                       glock_put(gl);
11782 +                       atomic_dec(&gl->gl_lvb_count);
11783 +               }
11784 +       }
11785 +
11786 +       unlock_on_glock(gl);
11787 +
11788 +       return error;
11789 +}
11790 +
11791 +/**
11792 + * gfs_lvb_unhold - detach a LVB from a glock
11793 + * @gl: The glock in question
11794 + *
11795 + */
11796 +
11797 +void
11798 +gfs_lvb_unhold(struct gfs_glock *gl)
11799 +{
11800 +       glock_hold(gl);
11801 +
11802 +       lock_on_glock(gl);
11803 +
11804 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_lvb_count), gl,);
11805 +       if (atomic_dec_and_test(&gl->gl_lvb_count)) {
11806 +               GFS_ASSERT_GLOCK(gl->gl_lvb, gl,);
11807 +               gl->gl_sbd->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock,
11808 +                                                               gl->gl_lvb);
11809 +               gl->gl_lvb = NULL;
11810 +               glock_put(gl);
11811 +       }
11812 +
11813 +       unlock_on_glock(gl);
11814 +
11815 +       glock_put(gl);
11816 +}
11817 +
11818 +/**
11819 + * gfs_lvb_sync - sync a LVB
11820 + * @gl: The glock in question
11821 + *
11822 + */
11823 +
11824 +void
11825 +gfs_lvb_sync(struct gfs_glock *gl)
11826 +{
11827 +       GFS_ASSERT_GLOCK(atomic_read(&gl->gl_lvb_count), gl,);
11828 +
11829 +       lock_on_glock(gl);
11830 +
11831 +       GFS_ASSERT_GLOCK(gfs_glock_is_held_excl(gl), gl,);
11832 +       gl->gl_sbd->sd_lockstruct.ls_ops->lm_sync_lvb(gl->gl_lock, gl->gl_lvb);
11833 +
11834 +       unlock_on_glock(gl);
11835 +}
11836 +
11837 +/**
11838 + * gfs_glock_cb - Callback used by locking module
11839 + * @fsdata: Pointer to the superblock
11840 + * @type: Type of callback
11841 + * @data: Type dependent data pointer
11842 + *
11843 + * Called by the locking module when it wants to tell us something.
11844 + * Either we need to drop a lock or another client expired.
11845 + */
11846 +
11847 +void
11848 +gfs_glock_cb(lm_fsdata_t * fsdata, unsigned int type, void *data)
11849 +{
11850 +       struct gfs_sbd *sdp = (struct gfs_sbd *)fsdata;
11851 +       struct gfs_glock *gl;
11852 +       struct lm_lockname *name = NULL;
11853 +       unsigned int state = 0;
11854 +       struct lm_async_cb *async;
11855 +       unsigned int journal;
11856 +
11857 +       atomic_inc(&sdp->sd_lm_callbacks);
11858 +
11859 +       switch (type) {
11860 +       case LM_CB_NEED_E:
11861 +               name = (struct lm_lockname *)data;
11862 +               state = LM_ST_UNLOCKED;
11863 +               break;
11864 +
11865 +       case LM_CB_NEED_D:
11866 +               name = (struct lm_lockname *)data;
11867 +               state = LM_ST_DEFERRED;
11868 +               break;
11869 +
11870 +       case LM_CB_NEED_S:
11871 +               name = (struct lm_lockname *)data;
11872 +               state = LM_ST_SHARED;
11873 +               break;
11874 +
11875 +       case LM_CB_ASYNC:
11876 +               async = (struct lm_async_cb *)data;
11877 +
11878 +               gl = gfs_glock_find(sdp, &async->lc_name);
11879 +               GFS_ASSERT_SBD(gl, sdp,);
11880 +               GFS_ASSERT_GLOCK(gl->gl_req_bh, gl,);
11881 +               gl->gl_req_bh(gl, async->lc_ret);
11882 +               glock_put(gl);
11883 +
11884 +               break;
11885 +
11886 +       case LM_CB_NEED_RECOVERY:
11887 +               journal = *(unsigned int *)data;
11888 +
11889 +               gfs_add_dirty_j(sdp, journal);
11890 +
11891 +               if (test_bit(SDF_RECOVERD_RUN, &sdp->sd_flags))
11892 +                       wake_up_process(sdp->sd_recoverd_process);
11893 +
11894 +               break;
11895 +
11896 +       case LM_CB_DROPLOCKS:
11897 +               gfs_gl_hash_clear(sdp, FALSE);
11898 +               gfs_quota_scan(sdp);
11899 +               break;
11900 +
11901 +       default:
11902 +               GFS_ASSERT_SBD(FALSE, sdp,
11903 +                              printk("type = %u\n", type););
11904 +               break;
11905 +       }
11906 +
11907 +       if (name) {
11908 +               gl = gfs_glock_find(sdp, name);
11909 +               if (gl) {
11910 +                       if (gl->gl_ops->go_callback)
11911 +                               gl->gl_ops->go_callback(gl, state);
11912 +                       handle_callback(gl, state);
11913 +                       spin_lock(&gl->gl_spin);
11914 +                       run_queue(gl, FALSE);
11915 +                       spin_unlock(&gl->gl_spin);
11916 +                       glock_put(gl);
11917 +               }
11918 +       }
11919 +}
11920 +
11921 +/**
11922 + * gfs_try_toss_inode - try to remove a particular inode from GFS' cache
11923 + * sdp: the filesystem
11924 + * inum: the inode number
11925 + *
11926 + */
11927 +
11928 +void
11929 +gfs_try_toss_inode(struct gfs_sbd *sdp, struct gfs_inum *inum)
11930 +{
11931 +       struct gfs_glock *gl;
11932 +       struct gfs_inode *ip;
11933 +       int error;
11934 +
11935 +       error = gfs_glock_get(sdp,
11936 +                             inum->no_formal_ino, &gfs_inode_glops,
11937 +                             NO_CREATE, &gl);
11938 +       if (error || !gl)
11939 +               return;
11940 +
11941 +       if (!trylock_on_glock(gl))
11942 +               goto out;
11943 +
11944 +       if (!queue_empty(gl, &gl->gl_holders))
11945 +               goto out_unlock;
11946 +
11947 +       ip = gl2ip(gl);
11948 +       if (!ip)
11949 +               goto out_unlock;
11950 +
11951 +       if (atomic_read(&ip->i_count))
11952 +               goto out_unlock;
11953 +
11954 +       gfs_inode_destroy(ip);
11955 +
11956 + out_unlock:
11957 +       unlock_on_glock(gl);
11958 +
11959 + out:
11960 +       glock_put(gl);
11961 +}
11962 +
11963 +/**
11964 + * gfs_iopen_go_callback - Try to kick the inode/vnode associated with an iopen glock from memory
11965 + * @io_gl: the iopen glock
11966 + * @state: the state into which the glock should be put
11967 + *
11968 + */
11969 +
11970 +void
11971 +gfs_iopen_go_callback(struct gfs_glock *io_gl, unsigned int state)
11972 +{
11973 +       struct gfs_glock *i_gl;
11974 +       struct gfs_inode *ip;
11975 +
11976 +       if (state != LM_ST_UNLOCKED)
11977 +               return;
11978 +
11979 +       spin_lock(&io_gl->gl_spin);
11980 +       i_gl = gl2gl(io_gl);
11981 +       if (i_gl) {
11982 +               glock_hold(i_gl);
11983 +               spin_unlock(&io_gl->gl_spin);
11984 +       } else {
11985 +               spin_unlock(&io_gl->gl_spin);
11986 +               return;
11987 +       }
11988 +
11989 +       if (trylock_on_glock(i_gl)) {
11990 +               if (queue_empty(i_gl, &i_gl->gl_holders)) {
11991 +                       ip = gl2ip(i_gl);
11992 +                       if (ip) {
11993 +                               gfs_try_toss_vnode(ip);
11994 +                               unlock_on_glock(i_gl);
11995 +                               gfs_glock_schedule_for_reclaim(i_gl);
11996 +                               goto out;
11997 +                       }
11998 +               }
11999 +               unlock_on_glock(i_gl);
12000 +       }
12001 +
12002 + out:
12003 +       glock_put(i_gl);
12004 +}
12005 +
12006 +/**
12007 + * demote_ok - check to see if it's ok to unlock a glock
12008 + * @gl: the glock
12009 + *
12010 + * Returns: TRUE if it's ok
12011 + */
12012 +
12013 +static int
12014 +demote_ok(struct gfs_glock *gl)
12015 +{
12016 +       struct gfs_sbd *sdp = gl->gl_sbd;
12017 +       struct gfs_glock_operations *glops = gl->gl_ops;
12018 +       int demote = TRUE;
12019 +
12020 +       if (test_bit(GLF_STICKY, &gl->gl_flags))
12021 +               demote = FALSE;
12022 +       else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
12023 +               demote = time_after_eq(jiffies,
12024 +                                      gl->gl_stamp +
12025 +                                      sdp->sd_tune.gt_prefetch_secs * HZ);
12026 +       else if (glops->go_demote_ok)
12027 +               demote = glops->go_demote_ok(gl);
12028 +
12029 +       return demote;
12030 +}
12031 +
12032 +/**
12033 + * gfs_glock_schedule_for_reclaim - Add a glock to the reclaim list
12034 + * @gl: the glock
12035 + *
12036 + */
12037 +
12038 +void
12039 +gfs_glock_schedule_for_reclaim(struct gfs_glock *gl)
12040 +{
12041 +       struct gfs_sbd *sdp = gl->gl_sbd;
12042 +
12043 +       spin_lock(&sdp->sd_reclaim_lock);
12044 +       if (list_empty(&gl->gl_reclaim)) {
12045 +               glock_hold(gl);
12046 +               list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
12047 +               atomic_inc(&sdp->sd_reclaim_count);
12048 +       }
12049 +       spin_unlock(&sdp->sd_reclaim_lock);
12050 +
12051 +       wake_up(&sdp->sd_reclaim_wchan);
12052 +}
12053 +
12054 +/**
12055 + * gfs_reclaim_glock - process an glock on the reclaim list
12056 + * @sdp: the filesystem
12057 + *
12058 + */
12059 +
12060 +void
12061 +gfs_reclaim_glock(struct gfs_sbd *sdp)
12062 +{
12063 +       struct gfs_glock *gl;
12064 +       struct gfs_gl_hash_bucket *bucket;
12065 +
12066 +       spin_lock(&sdp->sd_reclaim_lock);
12067 +
12068 +       if (list_empty(&sdp->sd_reclaim_list)) {
12069 +               spin_unlock(&sdp->sd_reclaim_lock);
12070 +               return;
12071 +       }
12072 +
12073 +       gl = list_entry(sdp->sd_reclaim_list.next,
12074 +                       struct gfs_glock, gl_reclaim);
12075 +       list_del_init(&gl->gl_reclaim);
12076 +
12077 +       spin_unlock(&sdp->sd_reclaim_lock);
12078 +
12079 +       atomic_dec(&sdp->sd_reclaim_count);
12080 +       atomic_inc(&sdp->sd_reclaimed);
12081 +
12082 +       if (trylock_on_glock(gl)) {
12083 +               if (queue_empty(gl, &gl->gl_holders)) {
12084 +                       if (gl->gl_ops == &gfs_inode_glops) {
12085 +                               struct gfs_inode *ip = gl2ip(gl);
12086 +                               if (ip && !atomic_read(&ip->i_count))
12087 +                                       gfs_inode_destroy(ip);
12088 +                       }
12089 +                       if (gl->gl_state != LM_ST_UNLOCKED &&
12090 +                           demote_ok(gl))
12091 +                               handle_callback(gl, LM_ST_UNLOCKED);
12092 +               }
12093 +               unlock_on_glock(gl);
12094 +       }
12095 +
12096 +       bucket = gl->gl_bucket;
12097 +
12098 +       write_lock(&bucket->hb_lock);
12099 +       if (atomic_read(&gl->gl_count) == 1) {
12100 +               list_del_init(&gl->gl_list);
12101 +               write_unlock(&bucket->hb_lock);
12102 +               glock_free(gl);
12103 +       } else {
12104 +               write_unlock(&bucket->hb_lock);
12105 +               glock_put(gl);
12106 +       }
12107 +}
12108 +
12109 +/**
12110 + * examine_bucket - Call a function for glock in a hash bucket
12111 + * @examiner: the function
12112 + * @sdp: the filesystem
12113 + * @bucket: the bucket
12114 + *
12115 + * Returns: TRUE if the bucket is has entries
12116 + */
12117 +
12118 +static int
12119 +examine_bucket(glock_examiner examiner,
12120 +              struct gfs_sbd *sdp, struct gfs_gl_hash_bucket *bucket)
12121 +{
12122 +       struct glock_plug plug;
12123 +       struct list_head *tmp;
12124 +       struct gfs_glock *gl;
12125 +       int entries;
12126 +
12127 +       memset(&plug.gl_flags, 0, sizeof(unsigned long));
12128 +       set_bit(GLF_PLUG, &plug.gl_flags);
12129 +
12130 +       write_lock(&bucket->hb_lock);
12131 +       list_add(&plug.gl_list, &bucket->hb_list);
12132 +       write_unlock(&bucket->hb_lock);
12133 +
12134 +       for (;;) {
12135 +               write_lock(&bucket->hb_lock);
12136 +
12137 +               for (;;) {
12138 +                       tmp = plug.gl_list.next;
12139 +                       if (tmp == &bucket->hb_list) {
12140 +                               list_del(&plug.gl_list);
12141 +                               entries = !list_empty(&bucket->hb_list);
12142 +                               write_unlock(&bucket->hb_lock);
12143 +                               return entries;
12144 +                       }
12145 +                       gl = list_entry(tmp, struct gfs_glock, gl_list);
12146 +
12147 +                       list_move(&plug.gl_list, &gl->gl_list);
12148 +
12149 +                       if (test_bit(GLF_PLUG, &gl->gl_flags))
12150 +                               continue;
12151 +
12152 +                       glock_hold(gl);
12153 +
12154 +                       break;
12155 +               }
12156 +
12157 +               write_unlock(&bucket->hb_lock);
12158 +
12159 +               examiner(gl);
12160 +       }
12161 +}
12162 +
12163 +/**
12164 + * scan_glock - lock at a glock and see if we can do stuff to it
12165 + * @gl: the glock to look at
12166 + *
12167 + */
12168 +
12169 +static void
12170 +scan_glock(struct gfs_glock *gl)
12171 +{
12172 +       if (trylock_on_glock(gl)) {
12173 +               if (queue_empty(gl, &gl->gl_holders)) {
12174 +                       if (gl->gl_ops == &gfs_inode_glops) {
12175 +                               struct gfs_inode *ip = gl2ip(gl);
12176 +                               if (ip && !atomic_read(&ip->i_count)) {
12177 +                                       unlock_on_glock(gl);
12178 +                                       gfs_glock_schedule_for_reclaim(gl);
12179 +                                       goto out;
12180 +                               }
12181 +                       }
12182 +                       if (gl->gl_state != LM_ST_UNLOCKED &&
12183 +                           demote_ok(gl)) {
12184 +                               unlock_on_glock(gl);
12185 +                               gfs_glock_schedule_for_reclaim(gl);
12186 +                               goto out;
12187 +                       }
12188 +               }
12189 +
12190 +               unlock_on_glock(gl);
12191 +       }
12192 +
12193 + out:
12194 +       glock_put(gl);
12195 +}
12196 +
12197 +/**
12198 + * gfs_scand_internal - Look for glocks and inodes to toss from memory
12199 + * @sdp: the filesystem
12200 + *
12201 + */
12202 +
12203 +void
12204 +gfs_scand_internal(struct gfs_sbd *sdp)
12205 +{
12206 +       unsigned int x;
12207 +
12208 +       for (x = 0; x < GFS_GL_HASH_SIZE; x++) {
12209 +               examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
12210 +               cond_resched();
12211 +       }
12212 +}
12213 +
12214 +/**
12215 + * clear_glock - lock at a glock and see if we can do stuff to it
12216 + * @gl: the glock to look at
12217 + * @timeout: demote locks left unused for longer than this many seconds
12218 + *
12219 + */
12220 +
12221 +static void
12222 +clear_glock(struct gfs_glock *gl)
12223 +{
12224 +       struct gfs_sbd *sdp = gl->gl_sbd;
12225 +       struct gfs_gl_hash_bucket *bucket = gl->gl_bucket;
12226 +
12227 +       spin_lock(&sdp->sd_reclaim_lock);
12228 +       if (!list_empty(&gl->gl_reclaim)) {
12229 +               list_del_init(&gl->gl_reclaim);
12230 +               atomic_dec(&sdp->sd_reclaim_count);
12231 +               glock_put(gl);
12232 +       }
12233 +       spin_unlock(&sdp->sd_reclaim_lock);
12234 +
12235 +       if (trylock_on_glock(gl)) {
12236 +               if (queue_empty(gl, &gl->gl_holders)) {
12237 +                       if (gl->gl_ops == &gfs_inode_glops) {
12238 +                               struct gfs_inode *ip = gl2ip(gl);
12239 +                               if (ip && !atomic_read(&ip->i_count))
12240 +                                       gfs_inode_destroy(ip);
12241 +                       }
12242 +                       if (gl->gl_state != LM_ST_UNLOCKED)
12243 +                               handle_callback(gl, LM_ST_UNLOCKED);
12244 +               }
12245 +
12246 +               unlock_on_glock(gl);
12247 +       }
12248 +
12249 +       write_lock(&bucket->hb_lock);
12250 +       if (atomic_read(&gl->gl_count) == 1) {
12251 +               list_del_init(&gl->gl_list);
12252 +               write_unlock(&bucket->hb_lock);
12253 +               glock_free(gl);
12254 +       } else {
12255 +               write_unlock(&bucket->hb_lock);
12256 +               glock_put(gl);
12257 +       }
12258 +}
12259 +
12260 +/**
12261 + * gfs_gl_hash_clear - Empty out the glock hash table
12262 + * @sdp: the filesystem
12263 + * @wait: wait until it's all gone
12264 + *
12265 + */
12266 +
12267 +void
12268 +gfs_gl_hash_clear(struct gfs_sbd *sdp, int wait)
12269 +{
12270 +       unsigned long t;
12271 +       unsigned int x;
12272 +       int cont;
12273 +
12274 +       t = jiffies;
12275 +
12276 +       for (;;) {
12277 +               cont = FALSE;
12278 +
12279 +               for (x = 0; x < GFS_GL_HASH_SIZE; x++)
12280 +                       if (examine_bucket(clear_glock, sdp, &sdp->sd_gl_hash[x]))
12281 +                               cont = TRUE;
12282 +
12283 +               if (!wait || !cont)
12284 +                       break;
12285 +
12286 +               if (time_after_eq(jiffies, t + sdp->sd_tune.gt_stall_secs * HZ)) {
12287 +                       printk("GFS: fsid=%s: Unmount seems to be stalled. Dumping lock state...\n",
12288 +                              sdp->sd_fsname);
12289 +                       gfs_dump_lockstate(sdp, NULL);
12290 +                       t = jiffies;
12291 +               }
12292 +
12293 +               invalidate_inodes(sdp->sd_vfs);
12294 +               yield();
12295 +       }
12296 +}
12297 +
12298 +/*
12299 + *  Diagnostic routines to help debug distributed deadlock
12300 + */
12301 +
12302 +/**
12303 + * dump_holder - print information about a glock holder
12304 + * @str: a string naming the type of holder
12305 + * @gh: the glock holder
12306 + * @buf: the buffer
12307 + * @size: the size of the buffer
12308 + * @count: where we are in the buffer
12309 + *
12310 + * Returns: 0 on success, -ENOBUFS when we run out of space
12311 + */
12312 +
12313 +static int
12314 +dump_holder(char *str, struct gfs_holder *gh,
12315 +           char *buf, unsigned int size, unsigned int *count)
12316 +{
12317 +       unsigned int x;
12318 +       int error = 0;
12319 +
12320 +       gfs_sprintf("  %s\n", str);
12321 +       gfs_sprintf("    owner = %ld\n",
12322 +                   (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
12323 +       gfs_sprintf("    gh_state = %u\n", gh->gh_state);
12324 +       gfs_sprintf("    gh_flags =");
12325 +       for (x = 0; x < 32; x++)
12326 +               if (gh->gh_flags & (1 << x))
12327 +                       gfs_sprintf(" %u", x);
12328 +       gfs_sprintf(" \n");
12329 +       gfs_sprintf("    error = %d\n", gh->gh_error);
12330 +       gfs_sprintf("    gh_iflags =");
12331 +       for (x = 0; x < 32; x++)
12332 +               if (test_bit(x, &gh->gh_iflags))
12333 +                       gfs_sprintf(" %u", x);
12334 +       gfs_sprintf(" \n");
12335 +
12336 + out:
12337 +       return error;
12338 +}
12339 +
12340 +/**
12341 + * dump_inode - print information about an inode
12342 + * @ip: the inode
12343 + * @buf: the buffer
12344 + * @size: the size of the buffer
12345 + * @count: where we are in the buffer
12346 + *
12347 + * Returns: 0 on success, -ENOBUFS when we run out of space
12348 + */
12349 +
12350 +static int
12351 +dump_inode(struct gfs_inode *ip,
12352 +          char *buf, unsigned int size, unsigned int *count)
12353 +{
12354 +       unsigned int x;
12355 +       int error = 0;
12356 +
12357 +       gfs_sprintf("  Inode:\n");
12358 +       gfs_sprintf("    num = %" PRIu64 "/%" PRIu64 "\n",
12359 +                   ip->i_num.no_formal_ino, ip->i_num.no_addr);
12360 +       gfs_sprintf("    type = %u\n", ip->i_di.di_type);
12361 +       gfs_sprintf("    i_count = %d\n", atomic_read(&ip->i_count));
12362 +       gfs_sprintf("    i_flags =");
12363 +       for (x = 0; x < 32; x++)
12364 +               if (test_bit(x, &ip->i_flags))
12365 +                       gfs_sprintf(" %u", x);
12366 +       gfs_sprintf(" \n");
12367 +       gfs_sprintf("    vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
12368 +
12369 + out:
12370 +       return error;
12371 +}
12372 +
12373 +/**
12374 + * dump_glock - print information about a glock
12375 + * @gl: the glock
12376 + * @buf: the buffer
12377 + * @size: the size of the buffer
12378 + * @count: where we are in the buffer
12379 + *
12380 + * Returns: 0 on success, -ENOBUFS when we run out of space
12381 + */
12382 +
12383 +static int
12384 +dump_glock(struct gfs_glock *gl,
12385 +          char *buf, unsigned int size, unsigned int *count)
12386 +{
12387 +       struct list_head *head, *tmp;
12388 +       struct gfs_holder *gh;
12389 +       unsigned int x;
12390 +       int error = 0;
12391 +
12392 +       spin_lock(&gl->gl_spin);
12393 +
12394 +       gfs_sprintf("Glock (%u, %" PRIu64 ")\n",
12395 +                   gl->gl_name.ln_type,
12396 +                   gl->gl_name.ln_number);
12397 +       gfs_sprintf("  gl_flags =");
12398 +       for (x = 0; x < 32; x++)
12399 +               if (test_bit(x, &gl->gl_flags))
12400 +                       gfs_sprintf(" %u", x);
12401 +       gfs_sprintf(" \n");
12402 +       gfs_sprintf("  gl_count = %d\n", atomic_read(&gl->gl_count));
12403 +       gfs_sprintf("  gl_state = %u\n", gl->gl_state);
12404 +       gfs_sprintf("  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
12405 +       gfs_sprintf("  object = %s\n", (gl->gl_object) ? "yes" : "no");
12406 +       if (gl->gl_aspace)
12407 +               gfs_sprintf("  aspace = %lu\n",
12408 +                           gl->gl_aspace->i_mapping->nrpages);
12409 +       else
12410 +               gfs_sprintf("  aspace = no\n");
12411 +       gfs_sprintf("  reclaim = %s\n",
12412 +                   (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
12413 +       if (gl->gl_req_gh) {
12414 +               error = dump_holder("Request", gl->gl_req_gh, buf, size, count);
12415 +               if (error)
12416 +                       goto out;
12417 +       }
12418 +       for (head = &gl->gl_holders, tmp = head->next;
12419 +            tmp != head;
12420 +            tmp = tmp->next) {
12421 +               gh = list_entry(tmp, struct gfs_holder, gh_list);
12422 +               error = dump_holder("Holder", gh, buf, size, count);
12423 +               if (error)
12424 +                       goto out;
12425 +       }
12426 +       for (head = &gl->gl_waiters1, tmp = head->next;
12427 +            tmp != head;
12428 +            tmp = tmp->next) {
12429 +               gh = list_entry(tmp, struct gfs_holder, gh_list);
12430 +               error = dump_holder("Waiter1", gh, buf, size, count);
12431 +               if (error)
12432 +                       goto out;
12433 +       }
12434 +       for (head = &gl->gl_waiters2, tmp = head->next;
12435 +            tmp != head;
12436 +            tmp = tmp->next) {
12437 +               gh = list_entry(tmp, struct gfs_holder, gh_list);
12438 +               error = dump_holder("Waiter2", gh, buf, size, count);
12439 +               if (error)
12440 +                       goto out;
12441 +       }
12442 +       if (gl->gl_ops == &gfs_inode_glops && gl2ip(gl)) {
12443 +               if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
12444 +                   list_empty(&gl->gl_holders)) {
12445 +                       error = dump_inode(gl2ip(gl), buf, size, count);
12446 +                       if (error)
12447 +                               goto out;
12448 +               } else
12449 +                       gfs_sprintf("  Inode: busy\n");
12450 +       }
12451 +
12452 + out:
12453 +       spin_unlock(&gl->gl_spin);
12454 +
12455 +       return error;
12456 +}
12457 +
12458 +/**
12459 + * gfs_dump_lockstate - print out the current lockstate
12460 + * @sdp: the filesystem
12461 + * @ub: the buffer to copy the information into
12462 + *
12463 + * If @ub is NULL, dump the lockstate to the console.
12464 + *
12465 + */
12466 +
12467 +int
12468 +gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub)
12469 +{
12470 +       struct gfs_gl_hash_bucket *bucket;
12471 +       struct list_head *tmp, *head;
12472 +       struct gfs_glock *gl;
12473 +       char *buf = NULL;
12474 +       unsigned int size = sdp->sd_tune.gt_lockdump_size;
12475 +       unsigned int x, count;
12476 +       int error = 0;
12477 +
12478 +       if (ub) {
12479 +               buf = kmalloc(size, GFP_KERNEL);
12480 +               if (!buf)
12481 +                       return -ENOMEM;
12482 +       }
12483 +
12484 +       for (x = 0; x < GFS_GL_HASH_SIZE; x++) {
12485 +               bucket = &sdp->sd_gl_hash[x];
12486 +               count = 0;
12487 +
12488 +               read_lock(&bucket->hb_lock);
12489 +
12490 +               for (head = &bucket->hb_list, tmp = head->next;
12491 +                    tmp != head;
12492 +                    tmp = tmp->next) {
12493 +                       gl = list_entry(tmp, struct gfs_glock, gl_list);
12494 +
12495 +                       if (test_bit(GLF_PLUG, &gl->gl_flags))
12496 +                               continue;
12497 +
12498 +                       error = dump_glock(gl, buf, size, &count);
12499 +                       if (error)
12500 +                               break;
12501 +               }
12502 +
12503 +               read_unlock(&bucket->hb_lock);
12504 +
12505 +               if (error)
12506 +                       break;
12507 +
12508 +               if (ub) {
12509 +                       if (ub->ub_count + count > ub->ub_size) {
12510 +                               error = -ENOMEM;
12511 +                               break;
12512 +                       }
12513 +                       if (copy_to_user(ub->ub_data + ub->ub_count, buf, count)) {
12514 +                               error = -EFAULT;
12515 +                               break;
12516 +                       }
12517 +                       ub->ub_count += count;
12518 +               }
12519 +       }
12520 +
12521 +       if (ub)
12522 +               kfree(buf);
12523 +
12524 +       return error;
12525 +}
12526 diff -urN linux-orig/fs/gfs/glock.h linux-patched/fs/gfs/glock.h
12527 --- linux-orig/fs/gfs/glock.h   1969-12-31 18:00:00.000000000 -0600
12528 +++ linux-patched/fs/gfs/glock.h        2004-06-30 13:27:49.342711362 -0500
12529 @@ -0,0 +1,134 @@
12530 +/******************************************************************************
12531 +*******************************************************************************
12532 +**
12533 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
12534 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
12535 +**
12536 +**  This copyrighted material is made available to anyone wishing to use,
12537 +**  modify, copy, or redistribute it subject to the terms and conditions
12538 +**  of the GNU General Public License v.2.
12539 +**
12540 +*******************************************************************************
12541 +******************************************************************************/
12542 +
12543 +#ifndef __GFS_GLOCK_DOT_H__
12544 +#define __GFS_GLOCK_DOT_H__
12545 +
12546 +/*
12547 +#define LM_FLAG_TRY       (0x00000001)
12548 +#define LM_FLAG_TRY_1CB   (0x00000002)
12549 +#define LM_FLAG_NOEXP     (0x00000004)
12550 +#define LM_FLAG_ANY       (0x00000008)
12551 +#define LM_FLAG_PRIORITY  (0x00000010)
12552 +*/
12553 +#define GL_LOCAL_EXCL     (0x00000020)
12554 +#define GL_ASYNC          (0x00000040)
12555 +#define GL_EXACT          (0x00000080)
12556 +#define GL_SKIP           (0x00000100)
12557 +#define GL_ATIME          (0x00000200)
12558 +#define GL_NOCACHE        (0x00000400)
12559 +#define GL_SYNC           (0x00000800)
12560 +
12561 +#define GLR_TRYFAILED     (13)
12562 +#define GLR_CANCELED      (14)
12563 +
12564 +static __inline__ int
12565 +gfs_glock_is_locked_by_me(struct gfs_glock *gl)
12566 +{
12567 +       struct list_head *tmp, *head;
12568 +       struct gfs_holder *gh;
12569 +       int locked = FALSE;
12570 +
12571 +       spin_lock(&gl->gl_spin);
12572 +       for (head = &gl->gl_holders, tmp = head->next;
12573 +            tmp != head;
12574 +            tmp = tmp->next) {
12575 +               gh = list_entry(tmp, struct gfs_holder, gh_list);
12576 +               if (gh->gh_owner == current) {
12577 +                       locked = TRUE;
12578 +                       break;
12579 +               }
12580 +       }
12581 +       spin_unlock(&gl->gl_spin);
12582 +
12583 +       return locked;
12584 +}
12585 +static __inline__ int
12586 +gfs_glock_is_held_excl(struct gfs_glock *gl)
12587 +{
12588 +       return (gl->gl_state == LM_ST_EXCLUSIVE);
12589 +}
12590 +static __inline__ int
12591 +gfs_glock_is_held_dfrd(struct gfs_glock *gl)
12592 +{
12593 +       return (gl->gl_state == LM_ST_DEFERRED);
12594 +}
12595 +static __inline__ int
12596 +gfs_glock_is_held_shrd(struct gfs_glock *gl)
12597 +{
12598 +       return (gl->gl_state == LM_ST_SHARED);
12599 +}
12600 +
12601 +#define GFS_ASYNC_LM(sdp) ((sdp)->sd_lockstruct.ls_flags & LM_LSFLAG_ASYNC)
12602 +
12603 +struct gfs_glock *gfs_glock_find(struct gfs_sbd *sdp,
12604 +                                struct lm_lockname *name);
12605 +int gfs_glock_get(struct gfs_sbd *sdp,
12606 +                 uint64_t number, struct gfs_glock_operations *glops,
12607 +                 int create, struct gfs_glock **glp);
12608 +void gfs_glock_hold(struct gfs_glock *gl);
12609 +void gfs_glock_put(struct gfs_glock *gl);
12610 +
12611 +void gfs_holder_init(struct gfs_glock *gl, unsigned int state, int flags,
12612 +                    struct gfs_holder *gh);
12613 +void gfs_holder_reinit(unsigned int state, int flags, struct gfs_holder *gh);
12614 +void gfs_holder_uninit(struct gfs_holder *gh);
12615 +struct gfs_holder *gfs_holder_get(struct gfs_glock *gl, unsigned int state,
12616 +                                 int flags);
12617 +void gfs_holder_put(struct gfs_holder *gh);
12618 +
12619 +void gfs_glock_xmote_th(struct gfs_glock *gl, unsigned int state, int flags);
12620 +void gfs_glock_drop_th(struct gfs_glock *gl);
12621 +
12622 +int gfs_glock_nq(struct gfs_holder *gh);
12623 +int gfs_glock_poll(struct gfs_holder *gh);
12624 +int gfs_glock_wait(struct gfs_holder *gh);
12625 +void gfs_glock_dq(struct gfs_holder *gh);
12626 +
12627 +void gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags);
12628 +void gfs_glock_force_drop(struct gfs_glock *gl);
12629 +
12630 +int gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags,
12631 +                     struct gfs_holder *gh);
12632 +void gfs_glock_dq_uninit(struct gfs_holder *gh);
12633 +int gfs_glock_nq_num(struct gfs_sbd *sdp,
12634 +                    uint64_t number, struct gfs_glock_operations *glops,
12635 +                    unsigned int state, int flags, struct gfs_holder *gh);
12636 +
12637 +int gfs_glock_nq_m(unsigned int num_gh, struct gfs_holder *ghs);
12638 +void gfs_glock_dq_m(unsigned int num_gh, struct gfs_holder *ghs);
12639 +
12640 +void gfs_glock_prefetch_num(struct gfs_sbd *sdp,
12641 +                           uint64_t number, struct gfs_glock_operations *glops,
12642 +                           unsigned int state, int flags);
12643 +
12644 +/*  Lock Value Block functions  */
12645 +
12646 +int gfs_lvb_hold(struct gfs_glock *gl);
12647 +void gfs_lvb_unhold(struct gfs_glock *gl);
12648 +void gfs_lvb_sync(struct gfs_glock *gl);
12649 +
12650 +void gfs_glock_cb(lm_fsdata_t * fsdata, unsigned int type, void *data);
12651 +
12652 +void gfs_try_toss_inode(struct gfs_sbd *sdp, struct gfs_inum *inum);
12653 +void gfs_iopen_go_callback(struct gfs_glock *gl, unsigned int state);
12654 +
12655 +void gfs_glock_schedule_for_reclaim(struct gfs_glock *gl);
12656 +void gfs_reclaim_glock(struct gfs_sbd *sdp);
12657 +
12658 +void gfs_scand_internal(struct gfs_sbd *sdp);
12659 +void gfs_gl_hash_clear(struct gfs_sbd *sdp, int wait);
12660 +
12661 +int gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub);
12662 +
12663 +#endif /* __GFS_GLOCK_DOT_H__ */
12664 diff -urN linux-orig/fs/gfs/glops.c linux-patched/fs/gfs/glops.c
12665 --- linux-orig/fs/gfs/glops.c   1969-12-31 18:00:00.000000000 -0600
12666 +++ linux-patched/fs/gfs/glops.c        2004-06-30 13:27:49.342711362 -0500
12667 @@ -0,0 +1,526 @@
12668 +/******************************************************************************
12669 +*******************************************************************************
12670 +**
12671 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
12672 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
12673 +**
12674 +**  This copyrighted material is made available to anyone wishing to use,
12675 +**  modify, copy, or redistribute it subject to the terms and conditions
12676 +**  of the GNU General Public License v.2.
12677 +**
12678 +*******************************************************************************
12679 +******************************************************************************/
12680 +
12681 +#include <linux/sched.h>
12682 +#include <linux/slab.h>
12683 +#include <linux/smp_lock.h>
12684 +#include <linux/spinlock.h>
12685 +#include <asm/semaphore.h>
12686 +#include <linux/completion.h>
12687 +#include <linux/buffer_head.h>
12688 +
12689 +#include "gfs.h"
12690 +#include "dio.h"
12691 +#include "glock.h"
12692 +#include "glops.h"
12693 +#include "inode.h"
12694 +#include "log.h"
12695 +#include "page.h"
12696 +#include "recovery.h"
12697 +#include "rgrp.h"
12698 +
12699 +/**
12700 + * meta_go_sync - sync out the metadata for this glock
12701 + * @gl: the glock
12702 + * @flags: DIO_*
12703 + *
12704 + */
12705 +
12706 +static void
12707 +meta_go_sync(struct gfs_glock *gl, int flags)
12708 +{
12709 +       if (!(flags & DIO_METADATA))
12710 +               return;
12711 +
12712 +       if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
12713 +               gfs_log_flush_glock(gl);
12714 +               gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12715 +       }
12716 +
12717 +       clear_bit(GLF_DIRTY, &gl->gl_flags);
12718 +       clear_bit(GLF_SYNC, &gl->gl_flags);
12719 +}
12720 +
12721 +/**
12722 + * meta_go_inval - invalidate the metadata for this glock
12723 + * @gl: the glock
12724 + * @flags:
12725 + *
12726 + */
12727 +
12728 +static void
12729 +meta_go_inval(struct gfs_glock *gl, int flags)
12730 +{
12731 +       if (!(flags & DIO_METADATA))
12732 +               return;
12733 +
12734 +       gfs_inval_buf(gl);
12735 +       gl->gl_vn++;
12736 +}
12737 +
12738 +/**
12739 + * meta_go_demote_ok - check to see if it's ok to unlock a glock
12740 + * @gl: the glock
12741 + *
12742 + * Returns: TRUE if it's ok
12743 + */
12744 +
12745 +static int
12746 +meta_go_demote_ok(struct gfs_glock *gl)
12747 +{
12748 +       return (gl->gl_aspace->i_mapping->nrpages) ? FALSE : TRUE;
12749 +}
12750 +
12751 +/**
12752 + * inode_go_xmote_th - promote/demote a glock
12753 + * @gl: the glock
12754 + * @state: the requested state
12755 + * @flags: the flags passed into gfs_glock()
12756 + *
12757 + */
12758 +
12759 +static void
12760 +inode_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
12761 +{
12762 +       if (gl->gl_state != LM_ST_UNLOCKED)
12763 +               gfs_inval_pte(gl);
12764 +       gfs_glock_xmote_th(gl, state, flags);
12765 +}
12766 +
12767 +/**
12768 + * inode_go_xmote_bh - promote/demote a glock
12769 + * @gl: the glock
12770 + *
12771 + * This will be really broken when (no_formal_ino != no_addr)
12772 + *
12773 + */
12774 +
12775 +static void
12776 +inode_go_xmote_bh(struct gfs_glock *gl)
12777 +{
12778 +       struct gfs_sbd *sdp = gl->gl_sbd;
12779 +       struct gfs_holder *gh = gl->gl_req_gh;
12780 +       struct buffer_head *bh;
12781 +       int error;
12782 +
12783 +       if (gl->gl_state != LM_ST_UNLOCKED &&
12784 +           (!gh || !(gh->gh_flags & GL_SKIP))) {
12785 +               error = gfs_dread(sdp, gl->gl_name.ln_number, gl, DIO_START, &bh);
12786 +               if (!error)
12787 +                       brelse(bh);
12788 +       }
12789 +}
12790 +
12791 +/**
12792 + * inode_go_drop_th - unlock a glock
12793 + * @gl: the glock
12794 + *
12795 + */
12796 +
12797 +static void
12798 +inode_go_drop_th(struct gfs_glock *gl)
12799 +{
12800 +       gfs_inval_pte(gl);
12801 +       gfs_glock_drop_th(gl);
12802 +}
12803 +
12804 +/**
12805 + * inode_go_sync - Sync the dirty data for a inode glock
12806 + * @gl: the glock
12807 + * @flags:
12808 + *
12809 + */
12810 +
12811 +static void
12812 +inode_go_sync(struct gfs_glock *gl, int flags)
12813 +{
12814 +       int meta = (flags & DIO_METADATA);
12815 +       int data = (flags & DIO_DATA);
12816 +
12817 +       if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
12818 +               if (meta && data) {
12819 +                       gfs_sync_page(gl, flags | DIO_START);
12820 +                       gfs_log_flush_glock(gl);
12821 +                       gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12822 +                       gfs_sync_page(gl, flags | DIO_WAIT | DIO_CHECK);
12823 +               } else if (meta) {
12824 +                       gfs_log_flush_glock(gl);
12825 +                       gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12826 +               } else if (data)
12827 +                       gfs_sync_page(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12828 +       }
12829 +
12830 +       if (meta && data) {
12831 +               if (!(flags & DIO_INVISIBLE))
12832 +                       clear_bit(GLF_DIRTY, &gl->gl_flags);
12833 +               clear_bit(GLF_SYNC, &gl->gl_flags);
12834 +       }
12835 +}
12836 +
12837 +/**
12838 + * inode_go_inval - prepare a inode glock to be released
12839 + * @gl: the glock
12840 + * @flags:
12841 + *
12842 + */
12843 +
12844 +static void
12845 +inode_go_inval(struct gfs_glock *gl, int flags)
12846 +{
12847 +       int meta = (flags & DIO_METADATA);
12848 +       int data = (flags & DIO_DATA);
12849 +
12850 +       if (meta) {
12851 +               gfs_inval_buf(gl);
12852 +               gl->gl_vn++;
12853 +       }
12854 +       if (data)
12855 +               gfs_inval_page(gl);
12856 +}
12857 +
12858 +/**
12859 + * inode_go_demote_ok - check to see if it's ok to unlock a glock
12860 + * @gl: the glock
12861 + *
12862 + * Returns: TRUE if it's ok
12863 + */
12864 +
12865 +static int
12866 +inode_go_demote_ok(struct gfs_glock *gl)
12867 +{
12868 +       struct gfs_sbd *sdp = gl->gl_sbd;
12869 +       int demote = FALSE;
12870 +
12871 +       if (!gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages)
12872 +               demote = TRUE;
12873 +       else if (!sdp->sd_args.ar_localcaching &&
12874 +                time_after_eq(jiffies, gl->gl_stamp + sdp->sd_tune.gt_demote_secs * HZ))
12875 +               demote = TRUE;
12876 +
12877 +       return demote;
12878 +}
12879 +
12880 +/**
12881 + * inode_go_lock - operation done after an inode lock is locked by a process
12882 + * @gl: the glock
12883 + * @flags: the flags passed into gfs_glock()
12884 + *
12885 + * Returns: 0 on success, -EXXX on failure
12886 + */
12887 +
12888 +static int
12889 +inode_go_lock(struct gfs_glock *gl, int flags)
12890 +{
12891 +       struct gfs_inode *ip = gl2ip(gl);
12892 +       int error = 0;
12893 +
12894 +       if (ip && ip->i_vn != gl->gl_vn) {
12895 +               error = gfs_copyin_dinode(ip);
12896 +               if (!error)
12897 +                       gfs_inode_attr_in(ip);
12898 +       }
12899 +
12900 +       return error;
12901 +}
12902 +
12903 +/**
12904 + * inode_go_unlock - operation done before an inode lock is unlocked by a process
12905 + * @gl: the glock
12906 + * @flags: the flags passed into gfs_gunlock()
12907 + *
12908 + */
12909 +
12910 +static void
12911 +inode_go_unlock(struct gfs_glock *gl, int flags)
12912 +{
12913 +       struct gfs_inode *ip = gl2ip(gl);
12914 +
12915 +       if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
12916 +               gfs_inode_attr_in(ip);
12917 +
12918 +       if (ip)
12919 +               gfs_flush_meta_cache(ip);
12920 +}
12921 +
12922 +/**
12923 + * rgrp_go_xmote_th - promote/demote a glock
12924 + * @gl: the glock
12925 + * @state: the requested state
12926 + * @flags: the flags passed into gfs_glock()
12927 + *
12928 + */
12929 +
12930 +static void
12931 +rgrp_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
12932 +{
12933 +       struct gfs_rgrpd *rgd = gl2rgd(gl);
12934 +
12935 +       GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
12936 +
12937 +       gfs_mhc_zap(rgd);
12938 +       gfs_depend_sync(rgd);
12939 +       gfs_glock_xmote_th(gl, state, flags);
12940 +}
12941 +
12942 +/**
12943 + * rgrp_go_drop_th - unlock a glock
12944 + * @gl: the glock
12945 + *
12946 + */
12947 +
12948 +static void
12949 +rgrp_go_drop_th(struct gfs_glock *gl)
12950 +{
12951 +       struct gfs_rgrpd *rgd = gl2rgd(gl);
12952 +
12953 +       GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
12954 +
12955 +       gfs_mhc_zap(rgd);
12956 +       gfs_depend_sync(rgd);
12957 +       gfs_glock_drop_th(gl);
12958 +}
12959 +
12960 +/**
12961 + * rgrp_go_demote_ok - check to see if it's ok to unlock a glock
12962 + * @gl: the glock
12963 + *
12964 + * Returns: TRUE if it's ok
12965 + */
12966 +
12967 +static int
12968 +rgrp_go_demote_ok(struct gfs_glock *gl)
12969 +{
12970 +       struct gfs_rgrpd *rgd = gl2rgd(gl);
12971 +       int demote = TRUE;
12972 +
12973 +       if (gl->gl_aspace->i_mapping->nrpages)
12974 +               demote = FALSE;
12975 +       else if (rgd && !list_empty(&rgd->rd_mhc)) /* Don't bother with lock here */
12976 +               demote = FALSE;
12977 +
12978 +       return demote;
12979 +}
12980 +
12981 +/**
12982 + * rgrp_go_lock - operation done after an rgrp lock is locked by a process
12983 + * @gl: the glock
12984 + * @flags: the flags passed into gfs_glock()
12985 + *
12986 + * Returns: 0 on success, -EXXX on failure
12987 + */
12988 +
12989 +static int
12990 +rgrp_go_lock(struct gfs_glock *gl, int flags)
12991 +{
12992 +       struct gfs_rgrpd *rgd = gl2rgd(gl);
12993 +       int error = 0;
12994 +
12995 +       GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
12996 +
12997 +       if (!(flags & GL_SKIP))
12998 +               error = gfs_rgrp_read(rgd);
12999 +
13000 +       return error;
13001 +}
13002 +
13003 +/**
13004 + * rgrp_go_unlock - operation done before an rgrp lock is unlocked by a process
13005 + * @gl: the glock
13006 + * @flags: the flags passed into gfs_gunlock()
13007 + *
13008 + */
13009 +
13010 +static void
13011 +rgrp_go_unlock(struct gfs_glock *gl, int flags)
13012 +{
13013 +       struct gfs_rgrpd *rgd = gl2rgd(gl);
13014 +
13015 +       GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
13016 +
13017 +       if (!(flags & GL_SKIP)) {
13018 +               gfs_rgrp_relse(rgd);
13019 +               if (test_bit(GLF_DIRTY, &gl->gl_flags))
13020 +                       gfs_rgrp_lvb_fill(rgd);
13021 +       }
13022 +}
13023 +
13024 +/**
13025 + * trans_go_xmote_th - promote/demote a metadata glock
13026 + * @gl: the glock
13027 + * @state: the requested state
13028 + * @flags: the flags passed into gfs_glock()
13029 + *
13030 + */
13031 +
13032 +static void
13033 +trans_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
13034 +{
13035 +       struct gfs_sbd *sdp = gl->gl_sbd;
13036 +       int error;
13037 +
13038 +       if (gl->gl_state != LM_ST_UNLOCKED &&
13039 +           test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
13040 +               gfs_sync_meta(sdp);
13041 +
13042 +               error = gfs_log_shutdown(sdp);
13043 +               if (error)
13044 +                       gfs_io_error(sdp);
13045 +       }
13046 +
13047 +       gfs_glock_xmote_th(gl, state, flags);
13048 +}
13049 +
13050 +/**
13051 + * trans_go_xmote_bh - promote/demote a metadata glock
13052 + * @gl: the glock
13053 + *
13054 + */
13055 +
13056 +static void
13057 +trans_go_xmote_bh(struct gfs_glock *gl)
13058 +{
13059 +       struct gfs_sbd *sdp = gl->gl_sbd;
13060 +       struct gfs_glock *j_gl = sdp->sd_journal_gh.gh_gl;
13061 +       struct gfs_log_header head;
13062 +       int error;
13063 +
13064 +       if (gl->gl_state != LM_ST_UNLOCKED &&
13065 +           test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
13066 +               j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
13067 +
13068 +               error = gfs_find_jhead(sdp, &sdp->sd_jdesc, j_gl, &head);
13069 +               GFS_ASSERT_SBD(!error, sdp,);  /* FixMe!!! */
13070 +               GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,);
13071 +
13072 +               /*  Initialize some head of the log stuff  */
13073 +               sdp->sd_sequence = head.lh_sequence;
13074 +               sdp->sd_log_head = head.lh_first + 1;
13075 +       }
13076 +}
13077 +
13078 +/**
13079 + * trans_go_drop_th - prepare the transaction glock to be released
13080 + * @gl: the glock
13081 + *
13082 + * We want to sync the device even with localcaching.  Remember
13083 + * that localcaching journal replay only marks buffers dirty.
13084 + */
13085 +
13086 +static void
13087 +trans_go_drop_th(struct gfs_glock *gl)
13088 +{
13089 +       struct gfs_sbd *sdp = gl->gl_sbd;
13090 +       int error;
13091 +
13092 +       if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
13093 +               gfs_sync_meta(sdp);
13094 +
13095 +               error = gfs_log_shutdown(sdp);
13096 +               if (error)
13097 +                       gfs_io_error(sdp);
13098 +       }
13099 +
13100 +       gfs_glock_drop_th(gl);
13101 +}
13102 +
13103 +/**
13104 + * nondisk_go_demote_ok - check to see if it's ok to unlock a glock
13105 + * @gl: the glock
13106 + *
13107 + * Returns: TRUE if it's ok
13108 + */
13109 +
13110 +static int
13111 +nondisk_go_demote_ok(struct gfs_glock *gl)
13112 +{
13113 +       return FALSE;
13114 +}
13115 +
13116 +/**
13117 + * quota_go_demote_ok - check to see if it's ok to unlock a glock
13118 + * @gl: the glock
13119 + *
13120 + * Returns: TRUE if it's ok
13121 + */
13122 +
13123 +static int
13124 +quota_go_demote_ok(struct gfs_glock *gl)
13125 +{
13126 +       return !atomic_read(&gl->gl_lvb_count);
13127 +}
13128 +
13129 +struct gfs_glock_operations gfs_meta_glops = {
13130 +      .go_xmote_th = gfs_glock_xmote_th,
13131 +      .go_drop_th = gfs_glock_drop_th,
13132 +      .go_sync = meta_go_sync,
13133 +      .go_inval = meta_go_inval,
13134 +      .go_demote_ok = meta_go_demote_ok,
13135 +      .go_type = LM_TYPE_META
13136 +};
13137 +
13138 +struct gfs_glock_operations gfs_inode_glops = {
13139 +      .go_xmote_th = inode_go_xmote_th,
13140 +      .go_xmote_bh = inode_go_xmote_bh,
13141 +      .go_drop_th = inode_go_drop_th,
13142 +      .go_sync = inode_go_sync,
13143 +      .go_inval = inode_go_inval,
13144 +      .go_demote_ok = inode_go_demote_ok,
13145 +      .go_lock = inode_go_lock,
13146 +      .go_unlock = inode_go_unlock,
13147 +      .go_type = LM_TYPE_INODE
13148 +};
13149 +
13150 +struct gfs_glock_operations gfs_rgrp_glops = {
13151 +      .go_xmote_th = rgrp_go_xmote_th,
13152 +      .go_drop_th = rgrp_go_drop_th,
13153 +      .go_sync = meta_go_sync,
13154 +      .go_inval = meta_go_inval,
13155 +      .go_demote_ok = rgrp_go_demote_ok,
13156 +      .go_lock = rgrp_go_lock,
13157 +      .go_unlock = rgrp_go_unlock,
13158 +      .go_type = LM_TYPE_RGRP
13159 +};
13160 +
13161 +struct gfs_glock_operations gfs_trans_glops = {
13162 +      .go_xmote_th = trans_go_xmote_th,
13163 +      .go_xmote_bh = trans_go_xmote_bh,
13164 +      .go_drop_th = trans_go_drop_th,
13165 +      .go_type = LM_TYPE_NONDISK
13166 +};
13167 +
13168 +struct gfs_glock_operations gfs_iopen_glops = {
13169 +      .go_xmote_th = gfs_glock_xmote_th,
13170 +      .go_drop_th = gfs_glock_drop_th,
13171 +      .go_callback = gfs_iopen_go_callback,
13172 +      .go_type = LM_TYPE_IOPEN
13173 +};
13174 +
13175 +struct gfs_glock_operations gfs_flock_glops = {
13176 +      .go_xmote_th = gfs_glock_xmote_th,
13177 +      .go_drop_th = gfs_glock_drop_th,
13178 +      .go_type = LM_TYPE_FLOCK
13179 +};
13180 +
13181 +struct gfs_glock_operations gfs_nondisk_glops = {
13182 +      .go_xmote_th = gfs_glock_xmote_th,
13183 +      .go_drop_th = gfs_glock_drop_th,
13184 +      .go_demote_ok = nondisk_go_demote_ok,
13185 +      .go_type = LM_TYPE_NONDISK
13186 +};
13187 +
13188 +struct gfs_glock_operations gfs_quota_glops = {
13189 +      .go_xmote_th = gfs_glock_xmote_th,
13190 +      .go_drop_th = gfs_glock_drop_th,
13191 +      .go_demote_ok = quota_go_demote_ok,
13192 +      .go_type = LM_TYPE_QUOTA
13193 +};
13194 diff -urN linux-orig/fs/gfs/glops.h linux-patched/fs/gfs/glops.h
13195 --- linux-orig/fs/gfs/glops.h   1969-12-31 18:00:00.000000000 -0600
13196 +++ linux-patched/fs/gfs/glops.h        2004-06-30 13:27:49.343711130 -0500
13197 @@ -0,0 +1,26 @@
13198 +/******************************************************************************
13199 +*******************************************************************************
13200 +**
13201 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
13202 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
13203 +**
13204 +**  This copyrighted material is made available to anyone wishing to use,
13205 +**  modify, copy, or redistribute it subject to the terms and conditions
13206 +**  of the GNU General Public License v.2.
13207 +**
13208 +*******************************************************************************
13209 +******************************************************************************/
13210 +
13211 +#ifndef __GLOPS_DOT_H__
13212 +#define __GLOPS_DOT_H__
13213 +
13214 +extern struct gfs_glock_operations gfs_meta_glops;
13215 +extern struct gfs_glock_operations gfs_inode_glops;
13216 +extern struct gfs_glock_operations gfs_rgrp_glops;
13217 +extern struct gfs_glock_operations gfs_trans_glops;
13218 +extern struct gfs_glock_operations gfs_iopen_glops;
13219 +extern struct gfs_glock_operations gfs_flock_glops;
13220 +extern struct gfs_glock_operations gfs_nondisk_glops;
13221 +extern struct gfs_glock_operations gfs_quota_glops;
13222 +
13223 +#endif /* __GLOPS_DOT_H__ */
13224 diff -urN linux-orig/fs/gfs/incore.h linux-patched/fs/gfs/incore.h
13225 --- linux-orig/fs/gfs/incore.h  1969-12-31 18:00:00.000000000 -0600
13226 +++ linux-patched/fs/gfs/incore.h       2004-06-30 13:27:49.343711130 -0500
13227 @@ -0,0 +1,726 @@
13228 +/******************************************************************************
13229 +*******************************************************************************
13230 +**
13231 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
13232 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
13233 +**
13234 +**  This copyrighted material is made available to anyone wishing to use,
13235 +**  modify, copy, or redistribute it subject to the terms and conditions
13236 +**  of the GNU General Public License v.2.
13237 +**
13238 +*******************************************************************************
13239 +******************************************************************************/
13240 +
13241 +#ifndef __INCORE_DOT_H__
13242 +#define __INCORE_DOT_H__
13243 +
13244 +#define DIO_NEW           (0x00000001)
13245 +#define DIO_FORCE         (0x00000002)
13246 +#define DIO_CLEAN         (0x00000004)
13247 +#define DIO_DIRTY         (0x00000008)
13248 +#define DIO_START         (0x00000010)
13249 +#define DIO_WAIT          (0x00000020)
13250 +#define DIO_METADATA      (0x00000040)
13251 +#define DIO_DATA          (0x00000080)
13252 +#define DIO_INVISIBLE     (0x00000100)
13253 +#define DIO_CHECK         (0x00000200)
13254 +#define DIO_ALL           (0x00000400)
13255 +
13256 +/*  Structure prototypes  */
13257 +
13258 +struct gfs_log_operations;
13259 +struct gfs_log_element;
13260 +struct gfs_meta_header_cache;
13261 +struct gfs_depend;
13262 +struct gfs_bitmap;
13263 +struct gfs_rgrpd;
13264 +struct gfs_bufdata;
13265 +struct gfs_glock_operations;
13266 +struct gfs_holder;
13267 +struct gfs_glock;
13268 +struct gfs_alloc;
13269 +struct gfs_inode;
13270 +struct gfs_file;
13271 +struct gfs_unlinked;
13272 +struct gfs_quota_le;
13273 +struct gfs_quota_data;
13274 +struct gfs_log_buf;
13275 +struct gfs_trans;
13276 +struct gfs_gl_hash_bucket;
13277 +struct gfs_sbd;
13278 +
13279 +typedef void (*gfs_glop_bh_t) (struct gfs_glock * gl, unsigned int ret);
13280 +
13281 +/*
13282 + *  Structure of operations that are associated with each
13283 + *  type of element in the log.
13284 + */
13285 +
13286 +struct gfs_log_operations {
13287 +       /*  Operations specific to a given log element  */
13288 +
13289 +       void (*lo_add) (struct gfs_sbd * sdp, struct gfs_log_element * le);
13290 +       void (*lo_trans_end) (struct gfs_sbd * sdp,
13291 +                             struct gfs_log_element * le);
13292 +       void (*lo_print) (struct gfs_sbd * sdp, struct gfs_log_element * le,
13293 +                         unsigned int where);
13294 +       struct gfs_trans *(*lo_overlap_trans) (struct gfs_sbd * sdp,
13295 +                                              struct gfs_log_element * le);
13296 +       void (*lo_incore_commit) (struct gfs_sbd * sdp, struct gfs_trans * tr,
13297 +                                 struct gfs_log_element * le);
13298 +       void (*lo_add_to_ail) (struct gfs_sbd * sdp,
13299 +                              struct gfs_log_element * le);
13300 +       void (*lo_clean_dump) (struct gfs_sbd * sdp,
13301 +                              struct gfs_log_element * le);
13302 +
13303 +       /*  Operations specific to a class of log elements  */
13304 +
13305 +       void (*lo_trans_size) (struct gfs_sbd * sdp, struct gfs_trans * tr,
13306 +                              unsigned int *mblks, unsigned int *eblks,
13307 +                              unsigned int *blocks, unsigned int *bmem);
13308 +       void (*lo_trans_combine) (struct gfs_sbd * sdp, struct gfs_trans * tr,
13309 +                                 struct gfs_trans * new_tr);
13310 +       void (*lo_build_bhlist) (struct gfs_sbd * sdp, struct gfs_trans * tr);
13311 +       void (*lo_dump_size) (struct gfs_sbd * sdp, unsigned int *elements,
13312 +                             unsigned int *blocks, unsigned int *bmem);
13313 +       void (*lo_build_dump) (struct gfs_sbd * sdp, struct gfs_trans * tr);
13314 +
13315 +       /*  Operations that happen at recovery time  */
13316 +
13317 +       void (*lo_before_scan) (struct gfs_sbd * sdp, unsigned int jid,
13318 +                               struct gfs_log_header * head,
13319 +                               unsigned int pass);
13320 +       int (*lo_scan_elements) (struct gfs_sbd * sdp,
13321 +                                struct gfs_jindex * jdesc,
13322 +                                struct gfs_glock * gl, uint64_t start,
13323 +                                struct gfs_log_descriptor * desc,
13324 +                                unsigned int pass);
13325 +       void (*lo_after_scan) (struct gfs_sbd * sdp, unsigned int jid,
13326 +                              unsigned int pass);
13327 +
13328 +       char *lo_name;
13329 +};
13330 +
13331 +/*
13332 + *  Structure that gets added to struct gfs_trans->tr_elements.  They
13333 + *  make up the "stuff" in each transaction.
13334 + */
13335 +
13336 +struct gfs_log_element {
13337 +       struct gfs_log_operations *le_ops;
13338 +
13339 +       struct gfs_trans *le_trans;
13340 +       struct list_head le_list;
13341 +};
13342 +
13343 +struct gfs_meta_header_cache {
13344 +       struct list_head mc_list_hash;
13345 +       struct list_head mc_list_single;
13346 +       struct list_head mc_list_rgd;
13347 +
13348 +       uint64_t mc_block;
13349 +       struct gfs_meta_header mc_mh;
13350 +};
13351 +
13352 +struct gfs_depend {
13353 +       struct list_head gd_list_hash;
13354 +       struct list_head gd_list_rgd;
13355 +
13356 +       struct gfs_rgrpd *gd_rgd;
13357 +       uint64_t gd_formal_ino;
13358 +       unsigned long gd_time;
13359 +};
13360 +
13361 +/*
13362 + *  Structure containing information about the allocation bitmaps.
13363 + *  There are one of these for each fs block that the bitmap for
13364 + *  the resource group header covers.
13365 + */
13366 +
13367 +struct gfs_bitmap {
13368 +       uint32_t bi_offset;     /* The offset in the buffer of the first byte */
13369 +       uint32_t bi_start;      /* The position of the first byte in this block */
13370 +       uint32_t bi_len;        /* The number of bytes in this block */
13371 +};
13372 +
13373 +/*
13374 + *  Structure containing information Resource Groups
13375 + */
13376 +
13377 +struct gfs_rgrpd {
13378 +       struct list_head rd_list;       /* Link with superblock */
13379 +       struct list_head rd_list_mru;
13380 +       struct list_head rd_recent;     /* Recently used rgrps */
13381 +
13382 +       struct gfs_glock *rd_gl;        /* Glock for rgrp */
13383 +
13384 +       unsigned long rd_flags;
13385 +
13386 +       struct gfs_rindex rd_ri;        /* Resource Index structure */
13387 +       struct gfs_rgrp rd_rg;          /* Resource Group structure */
13388 +       uint64_t rd_rg_vn;
13389 +
13390 +       struct gfs_bitmap *rd_bits;
13391 +       struct buffer_head **rd_bh;
13392 +
13393 +       uint32_t rd_last_alloc_data;
13394 +       uint32_t rd_last_alloc_meta;
13395 +
13396 +       struct list_head rd_mhc;
13397 +       struct list_head rd_depend;
13398 +
13399 +       struct gfs_sbd *rd_sbd;
13400 +};
13401 +
13402 +/*
13403 + *  Per-buffer data
13404 + */
13405 +
13406 +struct gfs_bufdata {
13407 +       struct buffer_head *bd_bh;      /* struct buffer_head which this struct belongs to */
13408 +       struct gfs_glock *bd_gl;        /* Pointer to Glock struct for this bh */
13409 +
13410 +       struct gfs_log_element bd_new_le;
13411 +       struct gfs_log_element bd_incore_le;
13412 +
13413 +       char *bd_frozen;
13414 +       struct semaphore bd_lock;
13415 +
13416 +       unsigned int bd_pinned;                 /* Pin count */
13417 +       struct list_head bd_ail_tr_list;        /* List of buffers hanging off tr_ail_bufs */
13418 +       struct list_head bd_ail_gl_list;        /* List of buffers hanging off gl_ail_bufs */
13419 +};
13420 +
13421 +/*
13422 + *  Glock operations
13423 + */
13424 +
13425 +struct gfs_glock_operations {
13426 +       void (*go_xmote_th) (struct gfs_glock * gl, unsigned int state,
13427 +                            int flags);
13428 +       void (*go_xmote_bh) (struct gfs_glock * gl);
13429 +       void (*go_drop_th) (struct gfs_glock * gl);
13430 +       void (*go_drop_bh) (struct gfs_glock * gl);
13431 +       void (*go_sync) (struct gfs_glock * gl, int flags);
13432 +       void (*go_inval) (struct gfs_glock * gl, int flags);
13433 +       int (*go_demote_ok) (struct gfs_glock * gl);
13434 +       int (*go_lock) (struct gfs_glock * gl, int flags);
13435 +       void (*go_unlock) (struct gfs_glock * gl, int flags);
13436 +       void (*go_callback) (struct gfs_glock * gl, unsigned int state);
13437 +       int go_type;
13438 +};
13439 +
13440 +/*  Actions  */
13441 +#define HIF_MUTEX               (0)
13442 +#define HIF_PROMOTE             (1)
13443 +#define HIF_DEMOTE              (2)
13444 +
13445 +/*  States  */
13446 +#define HIF_ALLOCED             (3)
13447 +#define HIF_DEALLOC             (4)
13448 +#define HIF_HOLDER              (5)
13449 +#define HIF_FIRST               (6)
13450 +#define HIF_WAKEUP              (7)
13451 +#define HIF_RECURSE             (8)
13452 +
13453 +struct gfs_holder {
13454 +       struct list_head gh_list;
13455 +
13456 +       struct gfs_glock *gh_gl;
13457 +       struct task_struct *gh_owner;
13458 +       unsigned int gh_state;
13459 +       int gh_flags;
13460 +
13461 +       int gh_error;
13462 +       unsigned long gh_iflags;
13463 +       struct completion gh_wait;
13464 +};
13465 +
13466 +/*
13467 + *  Glock Structure
13468 + */
13469 +
13470 +#define GLF_PLUG                (0)
13471 +#define GLF_LOCK                (1)
13472 +#define GLF_STICKY              (2)
13473 +#define GLF_PREFETCH            (3)
13474 +#define GLF_SYNC                (4)
13475 +#define GLF_DIRTY               (5)
13476 +#define GLF_LVB_INVALID         (6)
13477 +
13478 +struct gfs_glock {
13479 +       struct list_head gl_list;
13480 +       unsigned long gl_flags;
13481 +       struct lm_lockname gl_name;
13482 +       atomic_t gl_count;
13483 +
13484 +       spinlock_t gl_spin;
13485 +
13486 +       unsigned int gl_state;
13487 +       struct list_head gl_holders;
13488 +       struct list_head gl_waiters1;   /*  HIF_MUTEX  */
13489 +       struct list_head gl_waiters2;   /*  HIF_DEMOTE, HIF_PROMOTE  */
13490 +
13491 +       struct gfs_glock_operations *gl_ops;
13492 +
13493 +       struct gfs_holder *gl_req_gh;
13494 +       gfs_glop_bh_t gl_req_bh;
13495 +
13496 +       lm_lock_t *gl_lock;
13497 +       char *gl_lvb;
13498 +       atomic_t gl_lvb_count;
13499 +
13500 +       uint64_t gl_vn;
13501 +       unsigned long gl_stamp;
13502 +       void *gl_object;
13503 +
13504 +       struct gfs_log_element gl_new_le;
13505 +       struct gfs_log_element gl_incore_le;
13506 +
13507 +       struct gfs_gl_hash_bucket *gl_bucket;
13508 +       struct list_head gl_reclaim;
13509 +
13510 +       struct gfs_sbd *gl_sbd;
13511 +
13512 +       struct inode *gl_aspace;
13513 +       struct list_head gl_dirty_buffers;
13514 +       struct list_head gl_ail_bufs;
13515 +};
13516 +
13517 +/*
13518 + *  In-Place Reservation structure
13519 + */
13520 +
13521 +struct gfs_alloc {
13522 +       /*  Quota stuff  */
13523 +
13524 +       unsigned int al_qd_num;
13525 +       struct gfs_quota_data *al_qd[4];
13526 +       struct gfs_holder al_qd_ghs[4];
13527 +
13528 +       /* Filled in by the caller to gfs_inplace_reserve() */
13529 +
13530 +       uint32_t al_requested_di;
13531 +       uint32_t al_requested_meta;
13532 +       uint32_t al_requested_data;
13533 +
13534 +       /* Filled in by gfs_inplace_reserve() */
13535 +
13536 +       char *al_file;
13537 +       unsigned int al_line;
13538 +       struct gfs_holder al_ri_gh;
13539 +       struct gfs_holder al_rgd_gh;
13540 +       struct gfs_rgrpd *al_rgd;
13541 +       uint32_t al_reserved_meta;
13542 +       uint32_t al_reserved_data;
13543 +
13544 +       /* Filled in by gfs_blkalloc() */
13545 +
13546 +       uint32_t al_alloced_di;
13547 +       uint32_t al_alloced_meta;
13548 +       uint32_t al_alloced_data;
13549 +
13550 +       /* Dinode allocation crap */
13551 +
13552 +       struct gfs_unlinked *al_ul;
13553 +};
13554 +
13555 +/*
13556 + *  Incore inode structure
13557 + */
13558 +
13559 +#define GIF_QD_LOCKED           (0)
13560 +#define GIF_PAGED               (1)
13561 +#define GIF_SW_PAGED            (2)
13562 +
13563 +struct gfs_inode {
13564 +       struct gfs_inum i_num;
13565 +
13566 +       atomic_t i_count;
13567 +       unsigned long i_flags;
13568 +
13569 +       uint64_t i_vn;
13570 +       struct gfs_dinode i_di;
13571 +
13572 +       struct gfs_glock *i_gl;
13573 +       struct gfs_sbd *i_sbd;
13574 +       struct inode *i_vnode;
13575 +
13576 +       struct gfs_holder i_iopen_gh;
13577 +
13578 +       struct gfs_alloc *i_alloc;
13579 +       uint64_t i_last_rg_alloc;
13580 +
13581 +       struct task_struct *i_creat_task;
13582 +       pid_t i_creat_pid;
13583 +
13584 +       spinlock_t i_lock;
13585 +       struct buffer_head *i_cache[GFS_MAX_META_HEIGHT];
13586 +};
13587 +
13588 +/*
13589 + *  GFS per-fd structure
13590 + */
13591 +
13592 +#define GFF_DID_DIRECT_ALLOC    (0)
13593 +
13594 +struct gfs_file {
13595 +       unsigned long f_flags;
13596 +
13597 +       struct semaphore f_fl_lock;
13598 +       struct gfs_holder f_fl_gh;
13599 +
13600 +       struct gfs_inode *f_inode;
13601 +       struct file *f_vfile;
13602 +};
13603 +
13604 +/*
13605 + *  Unlinked inode log entry
13606 + */
13607 +
13608 +#define ULF_NEW_UL              (0)
13609 +#define ULF_INCORE_UL           (1)
13610 +#define ULF_IC_LIST             (2)
13611 +#define ULF_OD_LIST             (3)
13612 +#define ULF_LOCK                (4)
13613 +
13614 +struct gfs_unlinked {
13615 +       struct list_head ul_list;
13616 +       unsigned int ul_count;
13617 +
13618 +       struct gfs_inum ul_inum;
13619 +       unsigned long ul_flags;
13620 +
13621 +       struct gfs_log_element ul_new_le;
13622 +       struct gfs_log_element ul_incore_le;
13623 +       struct gfs_log_element ul_ondisk_le;
13624 +};
13625 +
13626 +/*
13627 + *  Quota log element
13628 + */
13629 +
13630 +struct gfs_quota_le {
13631 +       struct gfs_log_element ql_le;
13632 +
13633 +       struct gfs_quota_data *ql_data;
13634 +       struct list_head ql_data_list;
13635 +
13636 +       int64_t ql_change;
13637 +};
13638 +
13639 +#define QDF_USER                (0)
13640 +#define QDF_OD_LIST             (1)
13641 +#define QDF_LOCK                (2)
13642 +
13643 +struct gfs_quota_data {
13644 +       struct list_head qd_list;
13645 +       unsigned int qd_count;
13646 +
13647 +       uint32_t qd_id;
13648 +       unsigned long qd_flags;
13649 +
13650 +       struct list_head qd_le_list;
13651 +
13652 +       int64_t qd_change_new;
13653 +       int64_t qd_change_ic;
13654 +       int64_t qd_change_od;
13655 +       int64_t qd_change_sync;
13656 +
13657 +       struct gfs_quota_le qd_ondisk_ql;
13658 +       uint64_t qd_sync_gen;
13659 +
13660 +       struct gfs_glock *qd_gl;
13661 +       struct gfs_quota_lvb qd_qb;
13662 +
13663 +       unsigned long qd_last_warn;
13664 +};
13665 +
13666 +struct gfs_log_buf {
13667 +       struct list_head lb_list;
13668 +
13669 +       struct buffer_head lb_bh;
13670 +       struct buffer_head *lb_unlock;
13671 +};
13672 +
13673 +/*
13674 + *  Transaction structures
13675 + */
13676 +
13677 +#define TRF_LOG_DUMP            (0x00000001)
13678 +
13679 +struct gfs_trans {
13680 +       struct list_head tr_list;
13681 +
13682 +       /* Initial creation stuff */
13683 +
13684 +       char *tr_file;
13685 +       unsigned int tr_line;
13686 +
13687 +       unsigned int tr_mblks_asked;    /* Number of log blocks asked to be reserved */
13688 +       unsigned int tr_eblks_asked;
13689 +       unsigned int tr_seg_reserved;   /* Number of segments reserved */
13690 +
13691 +       struct gfs_holder *tr_t_gh;
13692 +
13693 +       /* Stuff filled in during creation */
13694 +
13695 +       unsigned int tr_flags;
13696 +       struct list_head tr_elements;
13697 +
13698 +       /* Stuff modified during the commit */
13699 +
13700 +       unsigned int tr_num_free_bufs;
13701 +       struct list_head tr_free_bufs;
13702 +       unsigned int tr_num_free_bmem;
13703 +       struct list_head tr_free_bmem;
13704 +
13705 +       uint64_t tr_log_head;           /* The current log head */
13706 +       uint64_t tr_first_head;         /* First header block */
13707 +
13708 +       struct list_head tr_bufs;       /* List of buffers going to the log */
13709 +
13710 +       /* Stuff that's part of the AIL */
13711 +
13712 +       struct list_head tr_ail_bufs;
13713 +
13714 +       /* Private data for different log element types */
13715 +
13716 +       unsigned int tr_num_gl;
13717 +       unsigned int tr_num_buf;
13718 +       unsigned int tr_num_iul;
13719 +       unsigned int tr_num_ida;
13720 +       unsigned int tr_num_q;
13721 +};
13722 +
13723 +/*
13724 + *  One bucket of the glock hash table.
13725 + */
13726 +
13727 +struct gfs_gl_hash_bucket {
13728 +       rwlock_t hb_lock;
13729 +       struct list_head hb_list;
13730 +} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
13731 +
13732 +/*
13733 + *  Super Block Data Structure  (One per filesystem)
13734 + */
13735 +
13736 +#define SDF_JOURNAL_LIVE        (0)
13737 +#define SDF_SCAND_RUN           (1)
13738 +#define SDF_GLOCKD_RUN          (2)
13739 +#define SDF_RECOVERD_RUN        (3)
13740 +#define SDF_LOGD_RUN            (4)
13741 +#define SDF_QUOTAD_RUN          (5)
13742 +#define SDF_INODED_RUN          (6)
13743 +#define SDF_NOATIME             (7)
13744 +#define SDF_ROFS                (8)
13745 +#define SDF_NEED_LOG_DUMP       (9)
13746 +#define SDF_FOUND_UL_DUMP       (10)
13747 +#define SDF_FOUND_Q_DUMP        (11)
13748 +#define SDF_IN_LOG_DUMP         (12)
13749 +
13750 +#define GFS_GL_HASH_SHIFT       (13)
13751 +#define GFS_GL_HASH_SIZE        (1 << GFS_GL_HASH_SHIFT)
13752 +#define GFS_GL_HASH_MASK        (GFS_GL_HASH_SIZE - 1)
13753 +
13754 +#define GFS_MHC_HASH_SHIFT      (10)
13755 +#define GFS_MHC_HASH_SIZE       (1 << GFS_MHC_HASH_SHIFT)
13756 +#define GFS_MHC_HASH_MASK       (GFS_MHC_HASH_SIZE - 1)
13757 +
13758 +#define GFS_DEPEND_HASH_SHIFT   (10)
13759 +#define GFS_DEPEND_HASH_SIZE    (1 << GFS_DEPEND_HASH_SHIFT)
13760 +#define GFS_DEPEND_HASH_MASK    (GFS_DEPEND_HASH_SIZE - 1)
13761 +
13762 +struct gfs_sbd {
13763 +       struct gfs_sb sd_sb;            /* Super Block */
13764 +
13765 +       struct super_block *sd_vfs;     /* FS's device independent sb */
13766 +
13767 +       struct gfs_args sd_args;
13768 +       unsigned long sd_flags;
13769 +
13770 +       struct gfs_tune sd_tune;        /* FS tuning structure */
13771 +
13772 +       /* Resource group stuff */
13773 +
13774 +       struct gfs_inode *sd_riinode;   /* rindex inode */
13775 +       uint64_t sd_riinode_vn; /* Version number of the resource index inode */
13776 +
13777 +       struct list_head sd_rglist;     /* List of resource groups */
13778 +       struct semaphore sd_rindex_lock;
13779 +
13780 +       struct list_head sd_rg_mru_list;        /* List of resource groups in MRU order */
13781 +       spinlock_t sd_rg_mru_lock;      /* Lock for MRU list */
13782 +       struct list_head sd_rg_recent;  /* Recently used rgrps */
13783 +       spinlock_t sd_rg_recent_lock;
13784 +       struct gfs_rgrpd *sd_rg_forward;        /* Next new rgrp to try for allocation */
13785 +       spinlock_t sd_rg_forward_lock;
13786 +
13787 +       unsigned int sd_rgcount;        /* Count of resource groups */
13788 +
13789 +       /*  Constants computed on mount  */
13790 +
13791 +       uint32_t sd_fsb2bb;
13792 +       uint32_t sd_fsb2bb_shift;       /* Shift FS Block numbers to the left by
13793 +                                          this to get buffer cache blocks  */
13794 +       uint32_t sd_diptrs;     /* Number of pointers in a dinode */
13795 +       uint32_t sd_inptrs;     /* Number of pointers in a indirect block */
13796 +       uint32_t sd_jbsize;     /* Size of a journaled data block */
13797 +       uint32_t sd_hash_bsize; /* sizeof(exhash block) */
13798 +       uint32_t sd_hash_bsize_shift;
13799 +       uint32_t sd_hash_ptrs;  /* Number of points in a hash block */
13800 +       uint32_t sd_max_dirres; /* Maximum space needed to add a directory entry */
13801 +       uint32_t sd_max_height; /* Maximum height of a file's metadata tree */
13802 +       uint64_t sd_heightsize[GFS_MAX_META_HEIGHT];
13803 +       uint32_t sd_max_jheight;        /* Maximum height of a journaled file's metadata tree */
13804 +       uint64_t sd_jheightsize[GFS_MAX_META_HEIGHT];
13805 +
13806 +       /*  Lock Stuff  */
13807 +
13808 +       struct gfs_gl_hash_bucket sd_gl_hash[GFS_GL_HASH_SIZE];
13809 +
13810 +       struct list_head sd_reclaim_list;
13811 +       spinlock_t sd_reclaim_lock;
13812 +       wait_queue_head_t sd_reclaim_wchan;
13813 +       atomic_t sd_reclaim_count;
13814 +
13815 +       struct lm_lockstruct sd_lockstruct;
13816 +
13817 +       struct list_head sd_mhc[GFS_MHC_HASH_SIZE];
13818 +       struct list_head sd_mhc_single;
13819 +       spinlock_t sd_mhc_lock;
13820 +       atomic_t sd_mhc_count;
13821 +
13822 +       struct list_head sd_depend[GFS_DEPEND_HASH_SIZE];
13823 +       spinlock_t sd_depend_lock;
13824 +       atomic_t sd_depend_count;
13825 +
13826 +       struct gfs_holder sd_live_gh;
13827 +
13828 +       struct gfs_holder sd_freeze_gh;
13829 +       struct semaphore sd_freeze_lock;
13830 +       unsigned int sd_freeze_count;
13831 +
13832 +       /*  Inode Stuff  */
13833 +
13834 +       struct gfs_inode *sd_rooti;     /* FS's root inode */
13835 +
13836 +       struct gfs_glock *sd_rename_gl; /* rename glock */
13837 +
13838 +       /*  Daemon stuff  */
13839 +
13840 +       struct task_struct *sd_scand_process;
13841 +       unsigned int sd_glockd_num;
13842 +       struct task_struct *sd_recoverd_process;
13843 +       struct task_struct *sd_logd_process;
13844 +       struct task_struct *sd_quotad_process;
13845 +       struct task_struct *sd_inoded_process;
13846 +
13847 +       struct semaphore sd_thread_lock;
13848 +       struct completion sd_thread_completion;
13849 +
13850 +       /*  Log stuff  */
13851 +
13852 +       struct gfs_glock *sd_trans_gl;  /* transaction glock */
13853 +
13854 +       struct gfs_inode *sd_jiinode;   /* jindex inode */
13855 +       uint64_t sd_jiinode_vn; /* Version number of the journal index inode */
13856 +
13857 +       unsigned int sd_journals;       /* Number of journals in the FS */
13858 +       struct gfs_jindex *sd_jindex;   /* Array of Jindex structures describing this FS's journals */
13859 +       struct semaphore sd_jindex_lock;
13860 +       unsigned long sd_jindex_refresh_time;
13861 +
13862 +       struct gfs_jindex sd_jdesc;     /* Jindex structure describing this machine's journal */
13863 +       struct gfs_holder sd_journal_gh;        /* the glock for this machine's journal */
13864 +
13865 +       uint64_t sd_sequence;   /* Assigned to xactions in order they commit */
13866 +       uint64_t sd_log_head;   /* Block number of next journal write */
13867 +       uint64_t sd_log_wrap;
13868 +
13869 +       spinlock_t sd_log_seg_lock;
13870 +       unsigned int sd_log_seg_free;   /* Free segments in the log */
13871 +       struct list_head sd_log_seg_list;
13872 +       wait_queue_head_t sd_log_seg_wait;
13873 +
13874 +       struct list_head sd_log_ail;    /* struct gfs_trans structures that form the Active Items List
13875 +                                          "next" is the head, "prev" is the tail  */
13876 +
13877 +       struct list_head sd_log_incore; /* transactions that have been commited incore (but not ondisk)
13878 +                                          "next" is the newest, "prev" is the oldest  */
13879 +       unsigned int sd_log_buffers;    /* Number of buffers in the incore log */
13880 +
13881 +       struct semaphore sd_log_lock;   /* Lock for access to log values */
13882 +
13883 +       uint64_t sd_log_dump_last;
13884 +       uint64_t sd_log_dump_last_wrap;
13885 +
13886 +       /*  unlinked crap  */
13887 +
13888 +       struct list_head sd_unlinked_list;
13889 +       spinlock_t sd_unlinked_lock;
13890 +
13891 +       atomic_t sd_unlinked_ic_count;
13892 +       atomic_t sd_unlinked_od_count;
13893 +
13894 +       /*  quota crap  */
13895 +
13896 +       struct list_head sd_quota_list;
13897 +       spinlock_t sd_quota_lock;
13898 +
13899 +       atomic_t sd_quota_count;
13900 +       atomic_t sd_quota_od_count;
13901 +
13902 +       struct gfs_inode *sd_qinode;
13903 +
13904 +       uint64_t sd_quota_sync_gen;
13905 +       unsigned long sd_quota_sync_time;
13906 +
13907 +       /*  license crap  */
13908 +
13909 +       struct gfs_inode *sd_linode;
13910 +
13911 +       /*  Recovery stuff  */
13912 +
13913 +       struct list_head sd_dirty_j;
13914 +       spinlock_t sd_dirty_j_lock;
13915 +
13916 +       unsigned int sd_recovery_replays;
13917 +       unsigned int sd_recovery_skips;
13918 +       unsigned int sd_recovery_sames;
13919 +
13920 +       /*  Counters  */
13921 +
13922 +       atomic_t sd_glock_count;
13923 +       atomic_t sd_glock_held_count;
13924 +       atomic_t sd_inode_count;
13925 +       atomic_t sd_bufdata_count;
13926 +       atomic_t sd_fh2dentry_misses;
13927 +       atomic_t sd_reclaimed;
13928 +       atomic_t sd_glock_nq_calls;
13929 +       atomic_t sd_glock_dq_calls;
13930 +       atomic_t sd_glock_prefetch_calls;
13931 +       atomic_t sd_lm_lock_calls;
13932 +       atomic_t sd_lm_unlock_calls;
13933 +       atomic_t sd_lm_callbacks;
13934 +       atomic_t sd_ops_address;
13935 +       atomic_t sd_ops_dentry;
13936 +       atomic_t sd_ops_export;
13937 +       atomic_t sd_ops_file;
13938 +       atomic_t sd_ops_inode;
13939 +       atomic_t sd_ops_super;
13940 +       atomic_t sd_ops_vm;
13941 +
13942 +       char sd_fsname[256];
13943 +
13944 +       /*  Debugging crud  */
13945 +
13946 +       unsigned long sd_last_readdirplus;
13947 +       unsigned long sd_last_unlocked_aop;
13948 +
13949 +       spinlock_t sd_ail_lock;
13950 +       struct list_head sd_recovery_bufs;
13951 +};
13952 +
13953 +#endif /* __INCORE_DOT_H__ */
13954 diff -urN linux-orig/fs/gfs/inode.c linux-patched/fs/gfs/inode.c
13955 --- linux-orig/fs/gfs/inode.c   1969-12-31 18:00:00.000000000 -0600
13956 +++ linux-patched/fs/gfs/inode.c        2004-06-30 13:27:49.343711130 -0500
13957 @@ -0,0 +1,1993 @@
13958 +/******************************************************************************
13959 +*******************************************************************************
13960 +**
13961 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
13962 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
13963 +**
13964 +**  This copyrighted material is made available to anyone wishing to use,
13965 +**  modify, copy, or redistribute it subject to the terms and conditions
13966 +**  of the GNU General Public License v.2.
13967 +**
13968 +*******************************************************************************
13969 +******************************************************************************/
13970 +
13971 +#include <linux/sched.h>
13972 +#include <linux/slab.h>
13973 +#include <linux/smp_lock.h>
13974 +#include <linux/spinlock.h>
13975 +#include <asm/semaphore.h>
13976 +#include <linux/completion.h>
13977 +#include <linux/buffer_head.h>
13978 +#include <linux/xattr_acl.h>
13979 +
13980 +#include "gfs.h"
13981 +#include "acl.h"
13982 +#include "bmap.h"
13983 +#include "dio.h"
13984 +#include "dir.h"
13985 +#include "eattr.h"
13986 +#include "glock.h"
13987 +#include "glops.h"
13988 +#include "inode.h"
13989 +#include "log.h"
13990 +#include "ops_address.h"
13991 +#include "ops_file.h"
13992 +#include "ops_inode.h"
13993 +#include "quota.h"
13994 +#include "rgrp.h"
13995 +#include "trans.h"
13996 +#include "unlinked.h"
13997 +
13998 +/**
13999 + * inode_attr_in - Copy attributes from the dinode into the VFS inode
14000 + * @ip: The GFS inode
14001 + *
14002 + */
14003 +
14004 +static void
14005 +inode_attr_in(struct gfs_inode *ip, struct inode *ino)
14006 +{
14007 +       unsigned int mode;
14008 +
14009 +       ino->i_ino = ip->i_num.no_formal_ino;
14010 +
14011 +       switch (ip->i_di.di_type) {
14012 +       case GFS_FILE_REG:
14013 +               mode = S_IFREG;
14014 +               ino->i_rdev = 0;
14015 +               break;
14016 +       case GFS_FILE_DIR:
14017 +               mode = S_IFDIR;
14018 +               ino->i_rdev = 0;
14019 +               break;
14020 +       case GFS_FILE_LNK:
14021 +               mode = S_IFLNK;
14022 +               ino->i_rdev = 0;
14023 +               break;
14024 +       case GFS_FILE_BLK:
14025 +               mode = S_IFBLK;
14026 +               ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
14027 +               break;
14028 +       case GFS_FILE_CHR:
14029 +               mode = S_IFCHR;
14030 +               ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
14031 +               break;
14032 +       case GFS_FILE_FIFO:
14033 +               mode = S_IFIFO;
14034 +               ino->i_rdev = 0;
14035 +               break;
14036 +       case GFS_FILE_SOCK:
14037 +               mode = S_IFSOCK;
14038 +               ino->i_rdev = 0;
14039 +               break;
14040 +       default:
14041 +               GFS_ASSERT_INODE(FALSE, ip,
14042 +                                printk("type = %u\n", ip->i_di.di_type););
14043 +               break;
14044 +       };
14045 +
14046 +       ino->i_mode = mode | (ip->i_di.di_mode & S_IALLUGO);
14047 +       ino->i_nlink = ip->i_di.di_nlink;
14048 +       ino->i_uid = ip->i_di.di_uid;
14049 +       ino->i_gid = ip->i_di.di_gid;
14050 +       i_size_write(ino, ip->i_di.di_size);
14051 +       ino->i_atime.tv_sec = ip->i_di.di_atime;
14052 +       ino->i_mtime.tv_sec = ip->i_di.di_mtime;
14053 +       ino->i_ctime.tv_sec = ip->i_di.di_ctime;
14054 +       ino->i_atime.tv_nsec = ino->i_mtime.tv_nsec = ino->i_ctime.tv_nsec = 0;
14055 +       ino->i_blksize = PAGE_SIZE;
14056 +       ino->i_blocks = ip->i_di.di_blocks <<
14057 +               (ip->i_sbd->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT);
14058 +       ino->i_generation = ip->i_di.di_header.mh_incarn;
14059 +}
14060 +
14061 +/**
14062 + * gfs_inode_attr_in - Copy attributes from the dinode into the VFS inode
14063 + * @ip: The GFS inode
14064 + *
14065 + */
14066 +
14067 +void
14068 +gfs_inode_attr_in(struct gfs_inode *ip)
14069 +{
14070 +       struct inode *inode;
14071 +
14072 +       inode = gfs_iget(ip, NO_CREATE);
14073 +       if (inode) {
14074 +               inode_attr_in(ip, inode);
14075 +               iput(inode);
14076 +       }
14077 +
14078 +}
14079 +
14080 +/**
14081 + * gfs_inode_attr_out - Copy attributes from VFS inode into the dinode
14082 + * @ip: The GFS inode
14083 + *
14084 + * Only copy out the attributes that we want the VFS layer
14085 + * to be able to modify.
14086 + */
14087 +
14088 +void
14089 +gfs_inode_attr_out(struct gfs_inode *ip)
14090 +{
14091 +       struct inode *inode;
14092 +
14093 +       inode = gfs_iget(ip, NO_CREATE);
14094 +       if (inode) {
14095 +               ip->i_di.di_mode = inode->i_mode & S_IALLUGO;
14096 +               ip->i_di.di_uid = inode->i_uid;
14097 +               ip->i_di.di_gid = inode->i_gid;
14098 +               ip->i_di.di_atime = inode->i_atime.tv_sec;
14099 +               ip->i_di.di_mtime = inode->i_mtime.tv_sec;
14100 +               ip->i_di.di_ctime = inode->i_ctime.tv_sec;
14101 +               iput(inode);
14102 +       }
14103 +}
14104 +
14105 +/**
14106 + * gfs_iget - Get/Create a struct inode for a struct gfs_inode
14107 + * @ip: the struct gfs_inode to get the struct inode for
14108 + *
14109 + * Returns: An inode
14110 + */
14111 +
14112 +struct inode *
14113 +gfs_iget(struct gfs_inode *ip, int create)
14114 +{
14115 +       struct inode *inode = NULL, *tmp;
14116 +
14117 +       spin_lock(&ip->i_lock);
14118 +       if (ip->i_vnode)
14119 +               inode = igrab(ip->i_vnode);
14120 +       spin_unlock(&ip->i_lock);
14121 +
14122 +       if (inode || !create)
14123 +               return inode;
14124 +
14125 +       tmp = new_inode(ip->i_sbd->sd_vfs);
14126 +       if (!tmp)
14127 +               return NULL;
14128 +
14129 +       inode_attr_in(ip, tmp);
14130 +
14131 +       if (ip->i_di.di_type == GFS_FILE_REG) {
14132 +               tmp->i_op = &gfs_file_iops;
14133 +               tmp->i_fop = &gfs_file_fops;
14134 +               tmp->i_mapping->a_ops = &gfs_file_aops;
14135 +       } else if (ip->i_di.di_type == GFS_FILE_DIR) {
14136 +               tmp->i_op = &gfs_dir_iops;
14137 +               tmp->i_fop = &gfs_dir_fops;
14138 +       } else if (ip->i_di.di_type == GFS_FILE_LNK) {
14139 +               tmp->i_op = &gfs_symlink_iops;
14140 +       } else {
14141 +               tmp->i_op = &gfs_dev_iops;
14142 +               init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
14143 +       }
14144 +
14145 +       vn2ip(tmp) = NULL;
14146 +
14147 +       for (;;) {
14148 +               spin_lock(&ip->i_lock);
14149 +               if (!ip->i_vnode)
14150 +                       break;
14151 +               inode = igrab(ip->i_vnode);
14152 +               spin_unlock(&ip->i_lock);
14153 +
14154 +               if (inode) {
14155 +                       iput(tmp);
14156 +                       return inode;
14157 +               }
14158 +               yield();
14159 +       }
14160 +
14161 +       inode = tmp;
14162 +
14163 +       gfs_inode_hold(ip);
14164 +       ip->i_vnode = inode;
14165 +       vn2ip(inode) = ip;
14166 +
14167 +       spin_unlock(&ip->i_lock);
14168 +
14169 +       insert_inode_hash(inode);
14170 +
14171 +       return inode;
14172 +}
14173 +
14174 +/**
14175 + * gfs_copyin_dinode - Refresh the incore copy of the dinode
14176 + * @ip: The GFS inode
14177 + *
14178 + * Returns: 0 on success, -EXXX on failure
14179 + */
14180 +
14181 +int
14182 +gfs_copyin_dinode(struct gfs_inode *ip)
14183 +{
14184 +       struct buffer_head *dibh;
14185 +       int error;
14186 +
14187 +       error = gfs_get_inode_buffer(ip, &dibh);
14188 +       if (error)
14189 +               return error;
14190 +
14191 +       gfs_metatype_check(ip->i_sbd, dibh, GFS_METATYPE_DI);
14192 +       gfs_dinode_in(&ip->i_di, dibh->b_data);
14193 +
14194 +       brelse(dibh);
14195 +
14196 +       GFS_ASSERT_INODE(ip->i_num.no_formal_ino ==
14197 +                        ip->i_di.di_num.no_formal_ino, ip,
14198 +                        gfs_dinode_print(&ip->i_di););
14199 +
14200 +       /*  Handle a moved inode  */
14201 +
14202 +       if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
14203 +               /*  Not implemented yet  */
14204 +               GFS_ASSERT_INODE(FALSE, ip,);
14205 +       }
14206 +
14207 +       ip->i_vn = ip->i_gl->gl_vn;
14208 +
14209 +       return 0;
14210 +}
14211 +
14212 +/**
14213 + * inode_create - create a struct gfs_inode
14214 + * @i_gl: The glock covering the inode
14215 + * @inum: The inode number
14216 + * @io_gl: the iopen glock, or NULL
14217 + * @io_state: the state the iopen glock should be acquire in
14218 + * @ipp: pointer to put the returned inode in
14219 + *
14220 + * Returns: 0 on success, -EXXX on failure
14221 + */
14222 +
14223 +static int
14224 +inode_create(struct gfs_glock *i_gl, struct gfs_inum *inum,
14225 +              struct gfs_glock *io_gl, unsigned int io_state,
14226 +              struct gfs_inode **ipp)
14227 +{
14228 +       struct gfs_sbd *sdp = i_gl->gl_sbd;
14229 +       struct gfs_inode *ip;
14230 +       int error = 0;
14231 +
14232 +       RETRY_MALLOC(ip = kmem_cache_alloc(gfs_inode_cachep, GFP_KERNEL), ip);
14233 +       memset(ip, 0, sizeof(struct gfs_inode));
14234 +
14235 +       ip->i_num = *inum;
14236 +
14237 +       atomic_set(&ip->i_count, 1);
14238 +
14239 +       ip->i_gl = i_gl;
14240 +       ip->i_sbd = sdp;
14241 +
14242 +       spin_lock_init(&ip->i_lock);
14243 +
14244 +       error = gfs_glock_nq_init(io_gl,
14245 +                                 io_state, GL_LOCAL_EXCL | GL_EXACT,
14246 +                                 &ip->i_iopen_gh);
14247 +       if (error)
14248 +               goto fail;
14249 +
14250 +       ip->i_iopen_gh.gh_owner = NULL;
14251 +
14252 +       spin_lock(&io_gl->gl_spin);
14253 +       gfs_glock_hold(i_gl);
14254 +       gl2gl(io_gl) = i_gl;
14255 +       spin_unlock(&io_gl->gl_spin);
14256 +
14257 +       error = gfs_copyin_dinode(ip);
14258 +       if (error)
14259 +               goto fail_iopen;
14260 +
14261 +       gfs_glock_hold(i_gl);
14262 +       gl2ip(i_gl) = ip;
14263 +
14264 +       atomic_inc(&sdp->sd_inode_count);
14265 +
14266 +       *ipp = ip;
14267 +
14268 +       return 0;
14269 +
14270 + fail_iopen:
14271 +       spin_lock(&io_gl->gl_spin);
14272 +       gl2gl(io_gl) = NULL;
14273 +       gfs_glock_put(i_gl);
14274 +       spin_unlock(&io_gl->gl_spin);
14275 +
14276 +       gfs_glock_dq_uninit(&ip->i_iopen_gh);
14277 +
14278 + fail:
14279 +       gfs_flush_meta_cache(ip);
14280 +       kmem_cache_free(gfs_inode_cachep, ip);
14281 +       *ipp = NULL;
14282 +
14283 +       return error;
14284 +}
14285 +
14286 +/**
14287 + * gfs_inode_get - Get an inode given its number
14288 + * @i_gl: The glock covering the inode
14289 + * @inum: The inode number
14290 + * @create: Flag to say if we are allowed to create a new struct gfs_inode
14291 + * @ipp: pointer to put the returned inode in
14292 + *
14293 + * Returns: 0 on success, -EXXX on failure
14294 + */
14295 +
14296 +int
14297 +gfs_inode_get(struct gfs_glock *i_gl, struct gfs_inum *inum, int create,
14298 +               struct gfs_inode **ipp)
14299 +{
14300 +       struct gfs_glock *io_gl;
14301 +       int error = 0;
14302 +
14303 +       *ipp = gl2ip(i_gl);
14304 +       if (*ipp) {
14305 +               atomic_inc(&(*ipp)->i_count);
14306 +               GFS_ASSERT_INODE((*ipp)->i_num.no_formal_ino ==
14307 +                                inum->no_formal_ino,
14308 +                                (*ipp),);
14309 +       } else if (create) {
14310 +               error = gfs_glock_get(i_gl->gl_sbd,
14311 +                                     inum->no_addr, &gfs_iopen_glops,
14312 +                                     CREATE, &io_gl);
14313 +               if (!error) {
14314 +                       error = inode_create(i_gl, inum, io_gl,
14315 +                                              LM_ST_SHARED, ipp);
14316 +                       gfs_glock_put(io_gl);
14317 +               }
14318 +       }
14319 +
14320 +       return error;
14321 +}
14322 +
14323 +/**
14324 + * gfs_inode_hold - hold a struct gfs_inode structure
14325 + * @ip: The GFS inode
14326 + *
14327 + */
14328 +
14329 +void
14330 +gfs_inode_hold(struct gfs_inode *ip)
14331 +{
14332 +       GFS_ASSERT_INODE(atomic_read(&ip->i_count), ip,);
14333 +       atomic_inc(&ip->i_count);
14334 +}
14335 +
14336 +/**
14337 + * gfs_inode_put - put a struct gfs_inode structure
14338 + * @ip: The GFS inode
14339 + *
14340 + */
14341 +
14342 +void
14343 +gfs_inode_put(struct gfs_inode *ip)
14344 +{
14345 +       atomic_dec(&ip->i_count);
14346 +       GFS_ASSERT_INODE(atomic_read(&ip->i_count) >= 0, ip,);
14347 +}
14348 +
14349 +/**
14350 + * gfs_inode_destroy - Destroy an inode structure with no references on it
14351 + * @ip: The GFS inode
14352 + *
14353 + * This function must be called with a glock held on the inode.
14354 + *
14355 + */
14356 +
14357 +void
14358 +gfs_inode_destroy(struct gfs_inode *ip)
14359 +{
14360 +       struct gfs_sbd *sdp = ip->i_sbd;
14361 +       struct gfs_glock *io_gl = ip->i_iopen_gh.gh_gl;
14362 +       struct gfs_glock *i_gl = ip->i_gl;
14363 +
14364 +       GFS_ASSERT_INODE(!atomic_read(&ip->i_count), ip,);
14365 +       GFS_ASSERT_INODE(gl2gl(io_gl) == i_gl, ip,);
14366 +
14367 +       spin_lock(&io_gl->gl_spin);
14368 +       gl2gl(io_gl) = NULL;
14369 +       gfs_glock_put(i_gl);
14370 +       spin_unlock(&io_gl->gl_spin);
14371 +
14372 +       gfs_glock_dq_uninit(&ip->i_iopen_gh);
14373 +
14374 +       gfs_flush_meta_cache(ip);
14375 +       kmem_cache_free(gfs_inode_cachep, ip);
14376 +
14377 +       gl2ip(i_gl) = NULL;
14378 +       gfs_glock_put(i_gl);
14379 +
14380 +       atomic_dec(&sdp->sd_inode_count);
14381 +}
14382 +
14383 +/**
14384 + * dinode_mark_unused -
14385 + * @ip:
14386 + *
14387 + * Returns: errno
14388 + */
14389 +
14390 +static int
14391 +dinode_mark_unused(struct gfs_inode *ip)
14392 +{
14393 +       struct buffer_head *dibh;
14394 +       struct gfs_dinode *di;
14395 +       uint32_t incarn;
14396 +       uint64_t ctime;
14397 +       uint32_t flags;
14398 +       int error;
14399 +
14400 +       error = gfs_get_inode_buffer(ip, &dibh);
14401 +       if (error)
14402 +               return error;
14403 +
14404 +       di = (struct gfs_dinode *)dibh->b_data;
14405 +
14406 +       gfs_trans_add_bh(ip->i_gl, dibh);
14407 +
14408 +       incarn = gfs32_to_cpu(di->di_header.mh_incarn) + 1;
14409 +       di->di_header.mh_incarn = cpu_to_gfs32(incarn);
14410 +
14411 +       ctime = get_seconds();
14412 +       di->di_ctime = cpu_to_gfs64(ctime);
14413 +
14414 +       flags = (gfs32_to_cpu(di->di_flags)) | GFS_DIF_UNUSED;
14415 +       di->di_flags = cpu_to_gfs32(flags);
14416 +
14417 +       brelse(dibh);
14418 +
14419 +       return 0;
14420 +}
14421 +
14422 +/**
14423 + * dinode_dealloc - Put deallocate a dinode
14424 + * @ip: The GFS inode
14425 + *
14426 + * Returns: 0 on success, -EXXX on failure
14427 + */
14428 +
14429 +static int
14430 +dinode_dealloc(struct gfs_inode *ip)
14431 +{
14432 +       struct gfs_sbd *sdp = ip->i_sbd;
14433 +       struct gfs_rgrpd *rgd;
14434 +       struct gfs_holder ri_gh, rgd_gh;
14435 +       int error;
14436 +
14437 +       gfs_alloc_get(ip);
14438 +
14439 +       error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
14440 +       if (error)
14441 +               goto fail;
14442 +
14443 +       error = gfs_rindex_hold(sdp, &ri_gh);
14444 +       if (error)
14445 +               goto fail_qs;
14446 +
14447 +       rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr);
14448 +       GFS_ASSERT_INODE(rgd, ip,);
14449 +
14450 +       error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
14451 +       if (error)
14452 +               goto fail_rindex_relse;
14453 +
14454 +       GFS_ASSERT_INODE(ip->i_di.di_blocks == 1, ip,
14455 +                        gfs_dinode_print(&ip->i_di););
14456 +
14457 +       /* Trans may require:
14458 +          One block for the RG header.
14459 +          One block for the dinode bit.
14460 +          One block for the dinode.
14461 +          We also need a block for the unlinked change.
14462 +          One block for the quota change. */
14463 +
14464 +       error = gfs_trans_begin(sdp, 3, 2);
14465 +       if (error)
14466 +               goto fail_rg_gunlock;
14467 +
14468 +       error = dinode_mark_unused(ip);
14469 +       if (error)
14470 +               goto fail_end_trans;
14471 +
14472 +       gfs_difree(rgd, ip);
14473 +
14474 +       gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, &ip->i_num);
14475 +       clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
14476 +
14477 +       gfs_trans_end(sdp);
14478 +
14479 +       gfs_glock_dq_uninit(&rgd_gh);
14480 +       gfs_glock_dq_uninit(&ri_gh);
14481 +
14482 +       gfs_quota_unhold_m(ip);
14483 +       gfs_alloc_put(ip);
14484 +
14485 +       return 0;
14486 +
14487 + fail_end_trans:
14488 +       gfs_trans_end(sdp);
14489 +
14490 + fail_rg_gunlock:
14491 +       gfs_glock_dq_uninit(&rgd_gh);
14492 +
14493 + fail_rindex_relse:
14494 +       gfs_glock_dq_uninit(&ri_gh);
14495 +
14496 + fail_qs:
14497 +       gfs_quota_unhold_m(ip);
14498 +
14499 + fail:
14500 +       gfs_alloc_put(ip);
14501 +
14502 +       return error;
14503 +}
14504 +
14505 +/**
14506 + * inode_dealloc - Deallocate an inode
14507 + * @sdp: the filesystem
14508 + * @inum: the inode number to deallocate
14509 + * @io_gh: a holder for the iopen glock for this inode
14510 + *
14511 + * Returns: 0 on success, -EXXX on failure
14512 + */
14513 +
14514 +static int
14515 +inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum,
14516 +               struct gfs_holder *io_gh)
14517 +{
14518 +       struct gfs_inode *ip;
14519 +       struct gfs_holder i_gh;
14520 +       int error;
14521 +
14522 +       error = gfs_glock_nq_num(sdp,
14523 +                                inum->no_formal_ino, &gfs_inode_glops,
14524 +                                LM_ST_EXCLUSIVE, 0, &i_gh);
14525 +       if (error)
14526 +               return error;
14527 +
14528 +       /* We reacquire the iopen lock here to avoid a race with the NFS server
14529 +          calling gfs_read_inode() with the inode number of a inode we're in the
14530 +          process of deallocating.  And we can't keep our hold on the lock
14531 +          from try_dealloc_inode() for deadlock reasons. */
14532 +
14533 +       gfs_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
14534 +       error = gfs_glock_nq(io_gh);
14535 +       switch (error) {
14536 +       case 0:
14537 +               break;
14538 +       case GLR_TRYFAILED:
14539 +               error = 0;
14540 +               goto fail;
14541 +       default:
14542 +               GFS_ASSERT_SBD(error < 0, sdp,);
14543 +               goto fail;
14544 +       }
14545 +
14546 +       GFS_ASSERT_GLOCK(!gl2ip(i_gh.gh_gl), i_gh.gh_gl,);
14547 +       error = inode_create(i_gh.gh_gl, inum, io_gh->gh_gl, LM_ST_EXCLUSIVE,
14548 +                            &ip);
14549 +
14550 +       gfs_glock_dq(io_gh);
14551 +
14552 +       if (error)
14553 +               goto fail;
14554 +
14555 +       GFS_ASSERT_INODE(!ip->i_di.di_nlink, ip,
14556 +                        gfs_dinode_print(&ip->i_di););
14557 +       GFS_ASSERT_INODE(atomic_read(&ip->i_count) == 1, ip,);
14558 +       GFS_ASSERT_INODE(!ip->i_vnode, ip,);
14559 +
14560 +       if (ip->i_di.di_type == GFS_FILE_DIR &&
14561 +           (ip->i_di.di_flags & GFS_DIF_EXHASH)) {
14562 +               error = gfs_dir_exhash_free(ip);
14563 +               if (error)
14564 +                       goto fail_iput;
14565 +       }
14566 +
14567 +       if (ip->i_di.di_eattr) {
14568 +               error = gfs_ea_dealloc(ip);
14569 +               if (error)
14570 +                       goto fail_iput;
14571 +       }
14572 +
14573 +       error = gfs_shrink(ip, 0, NULL);
14574 +       if (error)
14575 +               goto fail_iput;
14576 +
14577 +       error = dinode_dealloc(ip);
14578 +       if (error)
14579 +               goto fail_iput;
14580 +
14581 +       gfs_inode_put(ip);
14582 +       gfs_inode_destroy(ip);
14583 +
14584 +       gfs_glock_dq_uninit(&i_gh);
14585 +
14586 +       return 0;
14587 +
14588 + fail_iput:
14589 +       gfs_inode_put(ip);
14590 +       gfs_inode_destroy(ip);
14591 +
14592 + fail:
14593 +       gfs_glock_dq_uninit(&i_gh);
14594 +
14595 +       return error;
14596 +}
14597 +
14598 +/**
14599 + * inode_dealloc_init - Try to deallocate an inode and all its blocks
14600 + * @sdp: the filesystem
14601 + *
14602 + * Returns: 0 on success, -errno on error, 1 on busy
14603 + */
14604 +
14605 +static int
14606 +inode_dealloc_init(struct gfs_sbd *sdp, struct gfs_inum *inum)
14607 +{
14608 +       struct gfs_holder io_gh;
14609 +       int error = 0;
14610 +
14611 +       gfs_try_toss_inode(sdp, inum);
14612 +
14613 +       error = gfs_glock_nq_num(sdp,
14614 +                                inum->no_addr, &gfs_iopen_glops,
14615 +                                LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
14616 +       switch (error) {
14617 +       case 0:
14618 +               break;
14619 +       case GLR_TRYFAILED:
14620 +               return 1;
14621 +       default:
14622 +               GFS_ASSERT_SBD(error < 0, sdp,);
14623 +               return error;
14624 +       }
14625 +
14626 +       gfs_glock_dq(&io_gh);
14627 +       error = inode_dealloc(sdp, inum, &io_gh);
14628 +       gfs_holder_uninit(&io_gh);
14629 +
14630 +       return error;
14631 +}
14632 +
14633 +/**
14634 + * inode_dealloc_uninit - dealloc an uninitialized inode
14635 + * @sdp: the filesystem
14636 + *
14637 + * Returns: 0 on success, -errno on error, 1 on busy
14638 + */
14639 +
14640 +static int
14641 +inode_dealloc_uninit(struct gfs_sbd *sdp, struct gfs_inum *inum)
14642 +{
14643 +       struct gfs_rgrpd *rgd;
14644 +       struct gfs_holder ri_gh, rgd_gh;
14645 +       int error;
14646 +
14647 +       error = gfs_rindex_hold(sdp, &ri_gh);
14648 +       if (error)
14649 +               return error;
14650 +
14651 +       rgd = gfs_blk2rgrpd(sdp, inum->no_addr);
14652 +       GFS_ASSERT_SBD(rgd, sdp,);
14653 +
14654 +       error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
14655 +       if (error)
14656 +               goto fail;
14657 +
14658 +       /* Trans may require:
14659 +          One block for the RG header.
14660 +          One block for the dinode bit.
14661 +          We also need a block for the unlinked change. */
14662 +
14663 +       error = gfs_trans_begin(sdp, 2, 1);
14664 +       if (error)
14665 +               goto fail_gunlock;
14666 +
14667 +       gfs_difree_uninit(rgd, inum->no_addr);
14668 +       gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, inum);
14669 +
14670 +       gfs_trans_end(sdp);
14671 +
14672 +       gfs_glock_dq_uninit(&rgd_gh);
14673 +       gfs_glock_dq_uninit(&ri_gh);
14674 +
14675 +       return 0;
14676 +
14677 + fail_gunlock:
14678 +       gfs_glock_dq_uninit(&rgd_gh);
14679 +
14680 + fail:
14681 +       gfs_glock_dq_uninit(&ri_gh);
14682 +
14683 +       return error;
14684 +}
14685 +
14686 +/**
14687 + * gfs_inode_dealloc - Grab an unlinked inode off the list and try to free it.
14688 + * @sdp: the filesystem
14689 + *
14690 + * Returns: 0 on success, -errno on error, 1 on busy
14691 + */
14692 +
14693 +int
14694 +gfs_inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum)
14695 +{
14696 +       if (inum->no_formal_ino)
14697 +               return inode_dealloc_init(sdp, inum);
14698 +       else
14699 +               return inode_dealloc_uninit(sdp, inum);
14700 +}
14701 +
14702 +/**
14703 + * gfs_change_nlink - Change nlink count on inode
14704 + * @ip: The GFS inode
14705 + * @diff: The change in the nlink count required
14706 + *
14707 + * Returns: 0 on success, -EXXXX on failure.
14708 + */
14709 +
14710 +int
14711 +gfs_change_nlink(struct gfs_inode *ip, int diff)
14712 +{
14713 +       struct buffer_head *dibh;
14714 +       uint32_t nlink;
14715 +       int error;
14716 +
14717 +       nlink = ip->i_di.di_nlink + diff;
14718 +
14719 +       if (diff < 0)
14720 +               GFS_ASSERT_INODE(nlink < ip->i_di.di_nlink, ip,
14721 +                                gfs_dinode_print(&ip->i_di););
14722 +
14723 +       error = gfs_get_inode_buffer(ip, &dibh);
14724 +       if (error)
14725 +               return error;
14726 +
14727 +       ip->i_di.di_nlink = nlink;
14728 +       ip->i_di.di_ctime = get_seconds();
14729 +
14730 +       gfs_trans_add_bh(ip->i_gl, dibh);
14731 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
14732 +       brelse(dibh);
14733 +
14734 +       return 0;
14735 +}
14736 +
14737 +/**
14738 + * gfs_lookupi - Look up a filename in a directory and return its inode
14739 + * @d_gh: An initialized holder for the directory glock
14740 + * @name: The name of the inode to look for
14741 + * @is_root: If TRUE, ignore the caller's permissions
14742 + * @i_gh: An uninitialized holder for the new inode glock
14743 + *
14744 + * Returns: 0 on success, -EXXXX on failure
14745 + */
14746 +
14747 +int
14748 +gfs_lookupi(struct gfs_holder *d_gh, struct qstr *name,
14749 +           int is_root, struct gfs_holder *i_gh)
14750 +{
14751 +       struct gfs_inode *dip = gl2ip(d_gh->gh_gl);
14752 +       struct gfs_sbd *sdp = dip->i_sbd;
14753 +       struct gfs_glock *gl;
14754 +       struct gfs_inode *ip;
14755 +       struct gfs_inum inum, inum2;
14756 +       unsigned int type;
14757 +       int error;
14758 +
14759 +       i_gh->gh_gl = NULL;
14760 +
14761 +       if (!name->len || name->len > GFS_FNAMESIZE)
14762 +               return -ENAMETOOLONG;
14763 +
14764 +       if (gfs_filecmp(name, ".", 1)) {
14765 +               gfs_holder_reinit(LM_ST_SHARED, 0, d_gh);
14766 +               error = gfs_glock_nq(d_gh);
14767 +               if (!error) {
14768 +                       error = gfs_glock_nq_init(dip->i_gl,
14769 +                                                 LM_ST_SHARED, 0,
14770 +                                                 i_gh);
14771 +                       GFS_ASSERT_INODE(!error, ip,);
14772 +                       gfs_inode_hold(dip);
14773 +               }
14774 +
14775 +               return error;
14776 +       }
14777 +
14778 +       if (gfs_glock_is_locked_by_me(d_gh->gh_gl))
14779 +               bitch_about(sdp, &sdp->sd_last_readdirplus,
14780 +                           "readdirplus-type behavior");
14781 +
14782 +       gfs_holder_reinit(LM_ST_SHARED, 0, d_gh);
14783 +       error = gfs_glock_nq(d_gh);
14784 +       if (error)
14785 +               return error;
14786 +
14787 +       if (!is_root) {
14788 +               struct inode *dir = gfs_iget(dip, NO_CREATE);
14789 +               if (dir) {
14790 +                       error = permission(dir, MAY_EXEC, NULL);
14791 +                       iput(dir);
14792 +                       if (error) {
14793 +                               gfs_glock_dq(d_gh);
14794 +                               return error;
14795 +                       }
14796 +               }
14797 +       }
14798 +
14799 +       error = gfs_dir_search(dip, name, &inum, &type);
14800 +       if (error) {
14801 +               gfs_glock_dq(d_gh);
14802 +               if (error == -ENOENT)
14803 +                       error = 0;
14804 +               return error;
14805 +       }
14806 +
14807 + restart:
14808 +       error = gfs_glock_get(sdp, inum.no_formal_ino, &gfs_inode_glops,
14809 +                             CREATE, &gl);
14810 +       if (error) {
14811 +               gfs_glock_dq(d_gh);
14812 +               return error;
14813 +       }
14814 +
14815 +       /*  Acquire the second lock  */
14816 +
14817 +       if (gl->gl_name.ln_number < dip->i_gl->gl_name.ln_number) {
14818 +               gfs_glock_dq(d_gh);
14819 +
14820 +               error = gfs_glock_nq_init(gl, LM_ST_SHARED,
14821 +                                         LM_FLAG_ANY | GL_LOCAL_EXCL,
14822 +                                         i_gh);
14823 +               if (error)
14824 +                       goto out;
14825 +
14826 +               gfs_holder_reinit(LM_ST_SHARED, 0, d_gh);
14827 +               error = gfs_glock_nq(d_gh);
14828 +               if (error) {
14829 +                       gfs_glock_dq_uninit(i_gh);
14830 +                       goto out;
14831 +               }
14832 +
14833 +               if (!is_root) {
14834 +                       struct inode *dir = gfs_iget(dip, NO_CREATE);
14835 +                       if (dir) {
14836 +                               error = permission(dir, MAY_EXEC, NULL);
14837 +                               iput(dir);
14838 +                               if (error) {
14839 +                                       gfs_glock_dq(d_gh);
14840 +                                       gfs_glock_dq_uninit(i_gh);
14841 +                                       goto out;
14842 +                               }
14843 +                       }
14844 +               }
14845 +
14846 +               error = gfs_dir_search(dip, name, &inum2, &type);
14847 +               if (error) {
14848 +                       gfs_glock_dq(d_gh);
14849 +                       gfs_glock_dq_uninit(i_gh);
14850 +                       if (error == -ENOENT)
14851 +                               error = 0;
14852 +                       goto out;
14853 +               }
14854 +
14855 +               if (!gfs_inum_equal(&inum, &inum2)) {
14856 +                       gfs_glock_dq_uninit(i_gh);
14857 +                       gfs_glock_put(gl);
14858 +                       inum = inum2;
14859 +                       goto restart;
14860 +               }
14861 +       } else {
14862 +               error = gfs_glock_nq_init(gl, LM_ST_SHARED,
14863 +                                         LM_FLAG_ANY | GL_LOCAL_EXCL,
14864 +                                         i_gh);
14865 +               if (error) {
14866 +                       gfs_glock_dq(d_gh);
14867 +                       goto out;
14868 +               }
14869 +       }
14870 +
14871 +       error = gfs_inode_get(gl, &inum, CREATE, &ip);
14872 +       if (error) {
14873 +               gfs_glock_dq(d_gh);
14874 +               gfs_glock_dq_uninit(i_gh);
14875 +       }
14876 +       GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
14877 +
14878 + out:
14879 +       gfs_glock_put(gl);
14880 +
14881 +       return error;
14882 +}
14883 +
14884 +/**
14885 + * create_ok -
14886 + * @dip:
14887 + * @name:
14888 + * @type:
14889 + *
14890 + * Returns: errno
14891 + */
14892 +
14893 +static int
14894 +create_ok(struct gfs_inode *dip, struct qstr *name, unsigned int type)
14895 +{
14896 +       int error;
14897 +
14898 +       {
14899 +               struct inode *dir = gfs_iget(dip, NO_CREATE);
14900 +               if (dir) {
14901 +                       error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
14902 +                       iput(dir);
14903 +                       if (error)
14904 +                               return error;
14905 +               }
14906 +       }
14907 +
14908 +       /*  Don't create entries in an unlinked directory  */
14909 +
14910 +       if (!dip->i_di.di_nlink)
14911 +               return -EPERM;
14912 +
14913 +       error = gfs_dir_search(dip, name, NULL, NULL);
14914 +       switch (error) {
14915 +       case -ENOENT:
14916 +               error = 0;
14917 +               break;
14918 +       case 0:
14919 +               return -EEXIST;
14920 +       default:
14921 +               return error;
14922 +       }
14923 +
14924 +       if (dip->i_di.di_entries == (uint32_t)-1)
14925 +               return -EFBIG;
14926 +       if (type == GFS_FILE_DIR && dip->i_di.di_nlink == (uint32_t)-1)
14927 +               return -EMLINK;
14928 +
14929 +       return 0;
14930 +}
14931 +
14932 +/**
14933 + * dinode_alloc -
14934 + * @dip:
14935 + * @ul:
14936 + *
14937 + * Returns: errno
14938 + */
14939 +
14940 +static int
14941 +dinode_alloc(struct gfs_inode *dip, struct gfs_unlinked **ul)
14942 +{
14943 +       struct gfs_sbd *sdp = dip->i_sbd;
14944 +       struct gfs_alloc *al;
14945 +       struct gfs_inum inum;
14946 +       int error;
14947 +
14948 +       al = gfs_alloc_get(dip);
14949 +
14950 +       al->al_requested_di = 1;
14951 +
14952 +       error = gfs_inplace_reserve(dip);
14953 +       if (error)
14954 +               goto out;
14955 +
14956 +       error = gfs_trans_begin(sdp, al->al_rgd->rd_ri.ri_length, 1);
14957 +       if (error)
14958 +               goto out_inplace;
14959 +
14960 +       inum.no_formal_ino = 0;
14961 +       error = gfs_dialloc(dip, &inum.no_addr);
14962 +       if (error)
14963 +               goto out_end_trans;
14964 +
14965 +       *ul = gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &inum);
14966 +       gfs_unlinked_lock(sdp, *ul);
14967 +
14968 +       gfs_trans_add_gl(dip->i_gl);
14969 +
14970 + out_end_trans:
14971 +       gfs_trans_end(sdp);
14972 +
14973 + out_inplace:
14974 +       gfs_inplace_release(dip);
14975 +
14976 + out:
14977 +       gfs_alloc_put(dip);
14978 +
14979 +       return error;
14980 +}
14981 +
14982 +/**
14983 + * pick_formal_ino - Pick a formal inode number for a given inode
14984 + * @sdp: the filesystem
14985 + * @inum: the inode number structure
14986 + *
14987 + */
14988 +
14989 +static void
14990 +pick_formal_ino(struct gfs_sbd *sdp, struct gfs_inum *inum)
14991 +{
14992 +       /*  This won't always be true  */
14993 +       inum->no_formal_ino = inum->no_addr;
14994 +}
14995 +
14996 +/**
14997 + * make_dinode - Fill in a new dinode structure
14998 + * @dip: the directory this inode is being created in
14999 + * @gl: The glock covering the new inode
15000 + * @inum: the inode number
15001 + * @type: the file type
15002 + * @mode: the file permissions
15003 + * @uid:
15004 + * @gid:
15005 + *
15006 + */
15007 +
15008 +static int
15009 +make_dinode(struct gfs_inode *dip,
15010 +           struct gfs_glock *gl, struct gfs_inum *inum,
15011 +           unsigned int type, unsigned int mode,
15012 +           unsigned int uid, unsigned int gid)
15013 +{
15014 +       struct gfs_sbd *sdp = dip->i_sbd;
15015 +       struct gfs_dinode di;
15016 +       struct buffer_head *dibh;
15017 +       struct gfs_rgrpd *rgd;
15018 +       int error;
15019 +
15020 +       error = gfs_dread(sdp, inum->no_addr, gl,
15021 +                         DIO_NEW | DIO_START | DIO_WAIT,
15022 +                         &dibh);
15023 +       if (error)
15024 +               return error;
15025 +
15026 +       gfs_trans_add_bh(gl, dibh);
15027 +       gfs_metatype_set(sdp, dibh, GFS_METATYPE_DI, GFS_FORMAT_DI);
15028 +       gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode));
15029 +
15030 +       memset(&di, 0, sizeof(struct gfs_dinode));
15031 +
15032 +       gfs_meta_header_in(&di.di_header, dibh->b_data);
15033 +
15034 +       di.di_num = *inum;
15035 +
15036 +       di.di_mode = mode & S_IALLUGO;
15037 +       di.di_uid = uid;
15038 +       di.di_gid = gid;
15039 +       di.di_nlink = 1;
15040 +       di.di_blocks = 1;
15041 +       di.di_atime = di.di_mtime = di.di_ctime = get_seconds();
15042 +
15043 +       rgd = gfs_blk2rgrpd(sdp, inum->no_addr);
15044 +       GFS_ASSERT_SBD(rgd, sdp,
15045 +                      printk("block = %"PRIu64"\n", inum->no_addr););
15046 +
15047 +       di.di_rgrp = rgd->rd_ri.ri_addr;
15048 +       di.di_goal_rgrp = di.di_rgrp;
15049 +       di.di_goal_dblk = di.di_goal_mblk = inum->no_addr - rgd->rd_ri.ri_data1;
15050 +
15051 +       if (type == GFS_FILE_REG) {
15052 +               if ((dip->i_di.di_flags & GFS_DIF_INHERIT_JDATA) ||
15053 +                   sdp->sd_tune.gt_new_files_jdata)
15054 +                       di.di_flags |= GFS_DIF_JDATA;
15055 +               if ((dip->i_di.di_flags & GFS_DIF_INHERIT_DIRECTIO) ||
15056 +                   sdp->sd_tune.gt_new_files_directio)
15057 +                       di.di_flags |= GFS_DIF_DIRECTIO;
15058 +       } else if (type == GFS_FILE_DIR) {
15059 +               di.di_flags |= (dip->i_di.di_flags & GFS_DIF_INHERIT_DIRECTIO);
15060 +               di.di_flags |= (dip->i_di.di_flags & GFS_DIF_INHERIT_JDATA);
15061 +       }
15062 +
15063 +       di.di_type = type;
15064 +
15065 +       gfs_dinode_out(&di, dibh->b_data);
15066 +       brelse(dibh);
15067 +
15068 +       return 0;
15069 +}
15070 +
15071 +/**
15072 + * inode_init_and_link -
15073 + * @dip:
15074 + * @name:
15075 + * @inum:
15076 + * @gl:
15077 + * @type:
15078 + * @mode:
15079 + *
15080 + * Returns: errno
15081 + */
15082 +
15083 +static int
15084 +inode_init_and_link(struct gfs_inode *dip, struct qstr *name,
15085 +                   struct gfs_inum *inum, struct gfs_glock *gl,
15086 +                   unsigned int type, unsigned int mode)
15087 +{
15088 +       struct gfs_sbd *sdp = dip->i_sbd;
15089 +       struct posix_acl *acl = NULL;
15090 +       struct gfs_alloc *al;
15091 +       struct gfs_inode *ip;
15092 +       unsigned int gid;
15093 +       int alloc_required;
15094 +       int error;
15095 +
15096 +       error = gfs_setup_new_acl(dip, type, &mode, &acl);
15097 +       if (error)
15098 +               return error;
15099 +
15100 +       if (dip->i_di.di_mode & S_ISGID) {
15101 +               if (type == GFS_FILE_DIR)
15102 +                       mode |= S_ISGID;
15103 +               gid = dip->i_di.di_gid;
15104 +       }
15105 +       else
15106 +               gid = current->fsgid;
15107 +
15108 +       al = gfs_alloc_get(dip);
15109 +
15110 +       error = gfs_quota_lock_m(dip,
15111 +                                current->fsuid,
15112 +                                gid);
15113 +       if (error)
15114 +               goto fail;
15115 +
15116 +       error = gfs_quota_check(dip, current->fsuid, gid);
15117 +       if (error)
15118 +               goto fail_gunlock_q;
15119 +
15120 +       if (acl)
15121 +               alloc_required = TRUE;
15122 +       else {
15123 +               error = gfs_diradd_alloc_required(dip, name, &alloc_required);
15124 +               if (error)
15125 +                       goto fail_gunlock_q;
15126 +       }
15127 +
15128 +       if (alloc_required) {
15129 +               error = gfs_quota_check(dip, dip->i_di.di_uid, dip->i_di.di_gid);
15130 +               if (error)
15131 +                       goto fail_gunlock_q;
15132 +
15133 +               al->al_requested_meta = sdp->sd_max_dirres + GFS_MAX_EA_ACL_BLKS;
15134 +
15135 +               error = gfs_inplace_reserve(dip);
15136 +               if (error)
15137 +                       goto fail_gunlock_q;
15138 +
15139 +               /* Trans may require:
15140 +                  blocks for two dinodes, the directory blocks necessary for
15141 +                  a new entry, RG bitmap blocks for an allocation,
15142 +                  and one block for a quota change and
15143 +                  one block for an unlinked tag. */
15144 +
15145 +               error = gfs_trans_begin(sdp,
15146 +                                       2 + sdp->sd_max_dirres +
15147 +                                       al->al_rgd->rd_ri.ri_length +
15148 +                                       GFS_MAX_EA_ACL_BLKS, 2);
15149 +               if (error)
15150 +                       goto fail_inplace;
15151 +       } else {
15152 +               /* Trans may require:
15153 +                  blocks for two dinodes, a leaf block,
15154 +                  and one block for a quota change and
15155 +                  one block for an unlinked tag. */
15156 +
15157 +               error = gfs_trans_begin(sdp, 3, 2);
15158 +               if (error)
15159 +                       goto fail_gunlock_q;
15160 +       }
15161 +
15162 +       error = gfs_dir_add(dip, name, inum, type);
15163 +       if (error)
15164 +               goto fail_end_trans;
15165 +
15166 +       error = make_dinode(dip, gl, inum, type, mode, current->fsuid, gid);
15167 +       if (error)
15168 +               goto fail_end_trans;
15169 +
15170 +       al->al_ul = gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA,
15171 +                                          &(struct gfs_inum){0, inum->no_addr});
15172 +       gfs_trans_add_quota(sdp, +1, current->fsuid, gid);
15173 +
15174 +       /* Gfs_inode_get() can't fail here.  But then again, it shouldn't be
15175 +          here (it should be in gfs_createi()).  Gfs_init_acl() has no
15176 +          business needing a memory-resident inode. */
15177 +
15178 +       gfs_inode_get(gl, inum, CREATE, &ip);
15179 +
15180 +       if (acl) {
15181 +               error = gfs_init_acl(dip, ip, type, acl);
15182 +               GFS_ASSERT(!error, ); /* Sigh. */
15183 +       }
15184 +
15185 +       return 0;
15186 +
15187 + fail_end_trans:
15188 +       gfs_trans_end(sdp);
15189 +
15190 + fail_inplace:
15191 +       if (alloc_required)
15192 +               gfs_inplace_release(dip);
15193 +
15194 + fail_gunlock_q:
15195 +       gfs_quota_unlock_m(dip);
15196 +
15197 + fail:
15198 +       gfs_alloc_put(dip);
15199 +       if (acl)
15200 +               posix_acl_release(acl);
15201 +
15202 +       return error;
15203 +}
15204 +
15205 +/**
15206 + * gfs_createi - Create a new inode
15207 + * @d_gh: An initialized holder for the directory glock
15208 + * @name: The name of the new file
15209 + * @type: The type of dinode (GFS_FILE_REG, GFS_FILE_DIR, GFS_FILE_LNK, ...)
15210 + * @mode: the permissions on the new inode
15211 + * @i_gh: An uninitialized holder for the new inode glock
15212 + *
15213 + * If the return value is 0, the glocks on both the directory and the new
15214 + * file are held.  A transaction has been started and an inplace reservation
15215 + * is held, as well.
15216 + *
15217 + * Returns: 0 on success, -EXXXX on failure
15218 + */
15219 +
15220 +int
15221 +gfs_createi(struct gfs_holder *d_gh, struct qstr *name,
15222 +           unsigned int type, unsigned int mode,
15223 +           struct gfs_holder *i_gh)
15224 +{
15225 +       struct gfs_inode *dip = gl2ip(d_gh->gh_gl);
15226 +       struct gfs_sbd *sdp = dip->i_sbd;
15227 +       struct gfs_unlinked *ul;
15228 +       struct gfs_inum inum;
15229 +       struct gfs_holder io_gh;
15230 +       int error;
15231 +
15232 +       if (!name->len || name->len > GFS_FNAMESIZE)
15233 +               return -ENAMETOOLONG;
15234 +
15235 +       gfs_holder_reinit(LM_ST_EXCLUSIVE, 0, d_gh);
15236 +       error = gfs_glock_nq(d_gh);
15237 +       if (error)
15238 +               return error;
15239 +
15240 +       error = create_ok(dip, name, type);
15241 +       if (error)
15242 +               goto fail;
15243 +
15244 +       error = dinode_alloc(dip, &ul);
15245 +       if (error)
15246 +               goto fail;
15247 +
15248 +       inum.no_addr = ul->ul_inum.no_addr;
15249 +       pick_formal_ino(sdp, &inum);
15250 +
15251 +       if (inum.no_formal_ino < dip->i_num.no_formal_ino) {
15252 +               gfs_glock_dq(d_gh);
15253 +
15254 +               error = gfs_glock_nq_num(sdp,
15255 +                                        inum.no_formal_ino, &gfs_inode_glops,
15256 +                                        LM_ST_EXCLUSIVE, GL_SKIP, i_gh);
15257 +               if (error) {
15258 +                       gfs_unlinked_unlock(sdp, ul);
15259 +                       return error;
15260 +               }
15261 +
15262 +               gfs_holder_reinit(LM_ST_EXCLUSIVE, 0, d_gh);
15263 +               error = gfs_glock_nq(d_gh);
15264 +               if (error) {
15265 +                       gfs_glock_dq_uninit(i_gh);
15266 +                       gfs_unlinked_unlock(sdp, ul);
15267 +                       return error;
15268 +               }
15269 +
15270 +               error = create_ok(dip, name, type);
15271 +               if (error)
15272 +                       goto fail_gunlock_i;
15273 +       } else {
15274 +               error = gfs_glock_nq_num(sdp,
15275 +                                        inum.no_formal_ino, &gfs_inode_glops,
15276 +                                        LM_ST_EXCLUSIVE, GL_SKIP, i_gh);
15277 +               if (error)
15278 +                       goto fail_ul;
15279 +       }
15280 +
15281 +       error = gfs_glock_nq_num(sdp,
15282 +                                inum.no_addr, &gfs_iopen_glops,
15283 +                                LM_ST_SHARED, GL_LOCAL_EXCL | GL_EXACT,
15284 +                                &io_gh);
15285 +       if (error)
15286 +               goto fail_gunlock_i;
15287 +
15288 +       error = inode_init_and_link(dip, name, &inum, i_gh->gh_gl, type, mode);
15289 +       if (error)
15290 +               goto fail_gunlock_io;
15291 +
15292 +       gfs_glock_dq_uninit(&io_gh);
15293 +
15294 +       return 0;
15295 +
15296 + fail_gunlock_io:
15297 +       gfs_glock_dq_uninit(&io_gh);
15298 +
15299 + fail_gunlock_i:
15300 +       gfs_glock_dq_uninit(i_gh);
15301 +
15302 + fail_ul:
15303 +       gfs_unlinked_unlock(sdp, ul);
15304 +
15305 + fail:
15306 +       gfs_glock_dq(d_gh);
15307 +
15308 +       return error;
15309 +}
15310 +
15311 +/**
15312 + * gfs_unlinki - Unlink a file
15313 + * @dip: The inode of the directory
15314 + * @name: The name of the file to be unlinked
15315 + * @ip: The inode of the file to be removed
15316 + *
15317 + * Assumes Glocks on both dip and ip are held.
15318 + *
15319 + * Returns: 0 on success, -EXXXX on failure
15320 + */
15321 +
15322 +int
15323 +gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
15324 +{
15325 +       struct gfs_sbd *sdp = dip->i_sbd;
15326 +       int error;
15327 +
15328 +       error = gfs_dir_del(dip, name);
15329 +       if (error)
15330 +               return error;
15331 +
15332 +       error = gfs_change_nlink(ip, -1);
15333 +       if (error)
15334 +               return error;
15335 +
15336 +       /* If this inode is being unlinked from the directory structure,
15337 +          we need to mark that in the log so that it isn't lost during
15338 +          a crash. */
15339 +
15340 +       if (!ip->i_di.di_nlink) {
15341 +               gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &ip->i_num);
15342 +               set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
15343 +       }
15344 +
15345 +       return 0;
15346 +}
15347 +
15348 +/**
15349 + * gfs_rmdiri - Remove a directory
15350 + * @dip: The parent directory of the directory to be removed
15351 + * @name: The name of the directory to be removed
15352 + * @ip: The GFS inode of the directory to be removed
15353 + *
15354 + * Assumes Glocks on dip and ip are held
15355 + *
15356 + * Returns: 0 on success, -EXXXX on failure
15357 + */
15358 +
15359 +int
15360 +gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
15361 +{
15362 +       struct gfs_sbd *sdp = dip->i_sbd;
15363 +       struct qstr dotname;
15364 +       int error;
15365 +
15366 +       GFS_ASSERT_INODE(ip->i_di.di_entries == 2, ip,
15367 +                        gfs_dinode_print(&ip->i_di););
15368 +
15369 +       error = gfs_dir_del(dip, name);
15370 +       if (error)
15371 +               return error;
15372 +
15373 +       error = gfs_change_nlink(dip, -1);
15374 +       if (error)
15375 +               return error;
15376 +
15377 +       dotname.len = 1;
15378 +       dotname.name = ".";
15379 +       error = gfs_dir_del(ip, &dotname);
15380 +       if (error)
15381 +               return error;
15382 +
15383 +       dotname.len = 2;
15384 +       dotname.name = "..";
15385 +       error = gfs_dir_del(ip, &dotname);
15386 +       if (error)
15387 +               return error;
15388 +
15389 +       error = gfs_change_nlink(ip, -2);
15390 +       if (error)
15391 +               return error;
15392 +
15393 +       /* This inode is being unlinked from the directory structure and
15394 +          we need to mark that in the log so that it isn't lost during
15395 +          a crash. */
15396 +
15397 +       gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &ip->i_num);
15398 +       set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
15399 +
15400 +       return 0;
15401 +}
15402 +
15403 +/*
15404 + * gfs_revalidate - check to see that a inode is still in a directory
15405 + * @dip: the directory
15406 + * @name: the name of the file
15407 + * @ip: the inode
15408 + *
15409 + * Assumes that the lock on (at least) @dip is held.
15410 + *
15411 + * Returns: 0 if the parent/child relationship is correct, -ENOENT if it isn't
15412 + */
15413 +
15414 +int
15415 +gfs_revalidate(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
15416 +{
15417 +       struct gfs_inum inum;
15418 +       unsigned int type;
15419 +       int error;
15420 +
15421 +       error = gfs_dir_search(dip, name, &inum, &type);
15422 +       if (!error) {
15423 +               if (inum.no_formal_ino == ip->i_num.no_formal_ino)
15424 +                       GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
15425 +               else
15426 +                       error = -ENOENT;
15427 +       }
15428 +
15429 +       return error;
15430 +}
15431 +
15432 +/*
15433 + * gfs_ok_to_move - check if it's ok to move a directory to another directory
15434 + * @this: move this
15435 + * @to: to here
15436 + *
15437 + * Follow @to back to the root and make sure we don't encounter @this
15438 + * Assumes we already hold the rename lock.
15439 + *
15440 + * Returns: 0 if it's ok to move, -EXXX if it isn't
15441 + */
15442 +
15443 +int
15444 +gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to)
15445 +{
15446 +       struct gfs_sbd *sdp = this->i_sbd;
15447 +       struct gfs_inode *tmp;
15448 +       struct gfs_holder to_gh, tmp_gh;
15449 +       struct qstr dotdot;
15450 +       int error = 0;
15451 +
15452 +       memset(&dotdot, 0, sizeof (struct qstr));
15453 +       dotdot.name = "..";
15454 +       dotdot.len = 2;
15455 +
15456 +       gfs_inode_hold(to);
15457 +
15458 +       for (;;) {
15459 +               if (to == this) {
15460 +                       error = -EINVAL;
15461 +                       break;
15462 +               }
15463 +               if (to == sdp->sd_rooti) {
15464 +                       error = 0;
15465 +                       break;
15466 +               }
15467 +
15468 +               gfs_holder_init(to->i_gl, 0, 0, &to_gh);
15469 +
15470 +               error = gfs_lookupi(&to_gh, &dotdot, TRUE, &tmp_gh);
15471 +               if (error) {
15472 +                       gfs_holder_uninit(&to_gh);
15473 +                       break;
15474 +               }
15475 +               if (!tmp_gh.gh_gl) {
15476 +                       gfs_holder_uninit(&to_gh);
15477 +                       error = -ENOENT;
15478 +                       break;
15479 +               }
15480 +
15481 +               tmp = gl2ip(tmp_gh.gh_gl);
15482 +
15483 +               gfs_glock_dq_uninit(&to_gh);
15484 +               gfs_glock_dq_uninit(&tmp_gh);
15485 +
15486 +               gfs_inode_put(to);
15487 +               to = tmp;
15488 +       }
15489 +
15490 +       gfs_inode_put(to);
15491 +
15492 +       return error;
15493 +}
15494 +
15495 +/**
15496 + * gfs_readlinki - return the contents of a symlink
15497 + * @ip: the symlink's inode
15498 + * @buf: a pointer to the buffer to be filled
15499 + * @len: a pointer to the length of @buf
15500 + *
15501 + * If @buf is too small, a piece of memory is gmalloc()ed and needs
15502 + * to be freed by the caller.
15503 + *
15504 + * Returns: 0 on success, -EXXX on failure
15505 + */
15506 +
15507 +int
15508 +gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len)
15509 +{
15510 +       struct gfs_holder i_gh;
15511 +       struct buffer_head *dibh;
15512 +       unsigned int x;
15513 +       int error;
15514 +
15515 +       gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
15516 +       error = gfs_glock_nq_atime(&i_gh);
15517 +       if (error) {
15518 +               gfs_holder_uninit(&i_gh);
15519 +               return error;
15520 +       }
15521 +
15522 +       GFS_ASSERT_INODE(ip->i_di.di_size, ip,);
15523 +
15524 +       error = gfs_get_inode_buffer(ip, &dibh);
15525 +       if (error)
15526 +               goto out;
15527 +
15528 +       x = ip->i_di.di_size + 1;
15529 +       if (x > *len)
15530 +               *buf = gmalloc(x);
15531 +
15532 +       memcpy(*buf, dibh->b_data + sizeof(struct gfs_dinode), x);
15533 +       *len = x;
15534 +
15535 +       brelse(dibh);
15536 +
15537 + out:
15538 +       gfs_glock_dq_uninit(&i_gh);
15539 +
15540 +       return error;
15541 +}
15542 +
15543 +/**
15544 + * gfs_glock_nq_atime - Acquire the glock and conditionally update the atime on an inode
15545 + * @gh: the holder to acquire
15546 + *
15547 + * Tests atime for gfs_read, gfs_readdir and gfs_test_mmap
15548 + * Update if the difference between the current time and the current atime
15549 + * is greater than a interval specfied at mount.
15550 + *
15551 + * Returns: 0 on success, -EXXX on error
15552 + */
15553 +
15554 +int
15555 +gfs_glock_nq_atime(struct gfs_holder *gh)
15556 +{
15557 +       struct gfs_glock *gl = gh->gh_gl;
15558 +       struct gfs_sbd *sdp = gl->gl_sbd;
15559 +       struct gfs_inode *ip;
15560 +       int64_t curtime, quantum = sdp->sd_tune.gt_atime_quantum;
15561 +       unsigned int state;
15562 +       int flags;
15563 +       int error;
15564 +
15565 +       GFS_ASSERT_GLOCK(gh->gh_flags & GL_ATIME, gl,);
15566 +       GFS_ASSERT_GLOCK(!(gh->gh_flags & GL_ASYNC), gl,);
15567 +       GFS_ASSERT_GLOCK(gl->gl_ops == &gfs_inode_glops, gl,);
15568 +
15569 +       ip = gl2ip(gl);
15570 +       GFS_ASSERT_GLOCK(ip, gl,);
15571 +
15572 +       state = gh->gh_state;
15573 +       flags = gh->gh_flags;
15574 +
15575 +       error = gfs_glock_nq(gh);
15576 +       if (error)
15577 +               return error;
15578 +
15579 +       if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
15580 +           test_bit(SDF_ROFS, &sdp->sd_flags))
15581 +               return 0;
15582 +
15583 +       curtime = get_seconds();
15584 +       if (curtime - ip->i_di.di_atime >= quantum) {
15585 +               int was_exclusive = (gl->gl_state == LM_ST_EXCLUSIVE);
15586 +
15587 +               gfs_glock_dq(gh);
15588 +               gfs_holder_reinit(LM_ST_EXCLUSIVE,
15589 +                                 gh->gh_flags & ~LM_FLAG_ANY,
15590 +                                 gh);
15591 +               error = gfs_glock_nq(gh);
15592 +               if (error)
15593 +                       return error;
15594 +
15595 +               /* Verify this hasn't been updated while we were
15596 +                  trying to get exclusive lock. */
15597 +
15598 +               curtime = get_seconds();
15599 +               if (curtime - ip->i_di.di_atime >= quantum) {
15600 +                       struct buffer_head *dibh;
15601 +
15602 +                       error = gfs_trans_begin(sdp, 1, 0);
15603 +                       if (error == -EROFS)
15604 +                               return 0;
15605 +                       if (error)
15606 +                               goto fail;
15607 +
15608 +                       error = gfs_get_inode_buffer(ip, &dibh);
15609 +                       if (error)
15610 +                               goto fail_end_trans;
15611 +
15612 +                       ip->i_di.di_atime = curtime;
15613 +
15614 +                       gfs_trans_add_bh(ip->i_gl, dibh);
15615 +                       gfs_dinode_out(&ip->i_di, dibh->b_data);
15616 +                       brelse(dibh);
15617 +
15618 +                       gfs_trans_end(sdp);
15619 +               }
15620 +
15621 +               if (!was_exclusive) {
15622 +                       gfs_glock_dq(gh);
15623 +                       flags &= ~LM_FLAG_ANY;
15624 +                       flags |= GL_EXACT;
15625 +                       gfs_holder_reinit(state, flags, gh);
15626 +                       error = gfs_glock_nq(gh);
15627 +                       return error;
15628 +               }
15629 +       }
15630 +
15631 +       return 0;
15632 +
15633 + fail_end_trans:
15634 +       gfs_trans_end(sdp);
15635 +
15636 + fail:
15637 +       gfs_glock_dq(gh);
15638 +
15639 +       return error;
15640 +}
15641 +
15642 +/**
15643 + * glock_compare_atime - Compare two struct gfs_glock structures for sorting
15644 + * @arg_a: the first structure
15645 + * @arg_b: the second structure
15646 + *
15647 + */
15648 +
15649 +static int
15650 +glock_compare_atime(const void *arg_a, const void *arg_b)
15651 +{
15652 +       struct gfs_holder *gh_a = *(struct gfs_holder **)arg_a;
15653 +       struct gfs_holder *gh_b = *(struct gfs_holder **)arg_b;
15654 +       struct lm_lockname *a = &gh_a->gh_gl->gl_name;
15655 +       struct lm_lockname *b = &gh_b->gh_gl->gl_name;
15656 +       int ret = 0;
15657 +
15658 +       if (a->ln_number > b->ln_number)
15659 +               ret = 1;
15660 +       else if (a->ln_number < b->ln_number)
15661 +               ret = -1;
15662 +       else {
15663 +               if (gh_a->gh_state == LM_ST_SHARED &&
15664 +                   gh_b->gh_state == LM_ST_EXCLUSIVE)
15665 +                       ret = 1;
15666 +               else if (gh_a->gh_state == LM_ST_SHARED &&
15667 +                        (gh_b->gh_flags & GL_ATIME))
15668 +                       ret = 1;
15669 +       }
15670 +
15671 +       return ret;
15672 +}
15673 +
15674 +/**
15675 + * gfs_glock_nq_m_atime - acquire multiple glocks where one may need an atime update
15676 + * @num_gh: the number of structures
15677 + * @ghs: an array of struct gfs_holder structures
15678 + *
15679 + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired)
15680 + */
15681 +
15682 +int
15683 +gfs_glock_nq_m_atime(unsigned int num_gh, struct gfs_holder *ghs)
15684 +{
15685 +       struct gfs_holder *p[num_gh];
15686 +       unsigned int x;
15687 +       int error = 0;
15688 +
15689 +       GFS_ASSERT(num_gh,);
15690 +
15691 +       if (num_gh == 1) {
15692 +               ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
15693 +               if (ghs->gh_flags & GL_ATIME)
15694 +                       error = gfs_glock_nq_atime(ghs);
15695 +               else
15696 +                       error = gfs_glock_nq(ghs);
15697 +               return error;
15698 +       }
15699 +
15700 +       for (x = 0; x < num_gh; x++)
15701 +               p[x] = &ghs[x];
15702 +
15703 +       gfs_sort(p, num_gh, sizeof(struct gfs_holder *), glock_compare_atime);
15704 +
15705 +       for (x = 0; x < num_gh; x++) {
15706 +               p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
15707 +
15708 +               if (p[x]->gh_flags & GL_ATIME)
15709 +                       error = gfs_glock_nq_atime(p[x]);
15710 +               else
15711 +                       error = gfs_glock_nq(p[x]);
15712 +
15713 +               if (error) {
15714 +                       while (x--)
15715 +                               gfs_glock_dq(p[x]);
15716 +                       break;
15717 +               }
15718 +       }
15719 +
15720 +       return error;
15721 +}
15722 +
15723 +/**
15724 + * gfs_try_toss_vnode - See if we can toss a vnode from memory
15725 + * @ip: the inode
15726 + *
15727 + * Returns:  TRUE if the vnode was tossed
15728 + */
15729 +
15730 +void
15731 +gfs_try_toss_vnode(struct gfs_inode *ip)
15732 +{
15733 +       struct inode *inode;
15734 +
15735 +       inode = gfs_iget(ip, NO_CREATE);
15736 +       if (!inode)
15737 +               return;
15738 +
15739 +       d_prune_aliases(inode);
15740 +
15741 +       if (ip->i_di.di_type == GFS_FILE_DIR) {
15742 +               struct list_head *head = &inode->i_dentry;
15743 +               struct dentry *d = NULL;
15744 +
15745 +               spin_lock(&dcache_lock);
15746 +               if (list_empty(head))
15747 +                       spin_unlock(&dcache_lock);
15748 +               else {
15749 +                       d = list_entry(head->next, struct dentry, d_alias);
15750 +                       dget_locked(d);
15751 +                       spin_unlock(&dcache_lock);
15752 +
15753 +                       if (have_submounts(d))
15754 +                               dput(d);
15755 +                       else {
15756 +                               shrink_dcache_parent(d);
15757 +                               dput(d);
15758 +                               d_prune_aliases(inode);
15759 +                       }
15760 +               }
15761 +       }
15762 +
15763 +       inode->i_nlink = 0;
15764 +       iput(inode);
15765 +}
15766 +
15767 +/**
15768 + * iah_make_jdata -
15769 + * @gl:
15770 + * @inum:
15771 + *
15772 + */
15773 +
15774 +static void
15775 +iah_make_jdata(struct gfs_glock *gl, struct gfs_inum *inum)
15776 +{
15777 +       struct buffer_head *bh;
15778 +       struct gfs_dinode *di;
15779 +       uint32_t flags;
15780 +       int error;
15781 +
15782 +       error = gfs_dread(gl->gl_sbd, inum->no_addr, gl, DIO_START | DIO_WAIT, &bh);
15783 +       GFS_ASSERT_GLOCK(!error, gl,); /* Already pinned */
15784 +
15785 +       di = (struct gfs_dinode *)bh->b_data;
15786 +
15787 +       flags = di->di_flags;
15788 +       flags = gfs32_to_cpu(flags) | GFS_DIF_JDATA;
15789 +       di->di_flags = cpu_to_gfs32(flags);
15790 +
15791 +       brelse(bh);
15792 +}
15793 +
15794 +/**
15795 + * iah_super_update -
15796 + * @sdp:
15797 + *
15798 + * Returns: errno
15799 + */
15800 +
15801 +static int
15802 +iah_super_update(struct gfs_sbd *sdp)
15803 +{
15804 +       struct gfs_glock *gl;
15805 +       struct buffer_head *bh;
15806 +       int error;
15807 +
15808 +       error = gfs_glock_get(sdp,
15809 +                             GFS_SB_LOCK, &gfs_meta_glops,
15810 +                             NO_CREATE, &gl);
15811 +       GFS_ASSERT_SBD(!error && gl, sdp,); /* This should already be held. */
15812 +
15813 +       error = gfs_dread(sdp,
15814 +                         GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, gl,
15815 +                         DIO_START | DIO_WAIT, &bh);
15816 +       if (!error) {
15817 +               gfs_trans_add_bh(gl, bh);
15818 +               gfs_sb_out(&sdp->sd_sb, bh->b_data);
15819 +               brelse(bh);
15820 +       }
15821 +
15822 +       gfs_glock_put(gl);
15823 +
15824 +       return error;
15825 +}
15826 +
15827 +/**
15828 + * inode_alloc_hidden -
15829 + * @sdp:
15830 + * @inum:
15831 + *
15832 + * Returns: errno
15833 + */
15834 +
15835 +static int
15836 +inode_alloc_hidden(struct gfs_sbd *sdp, struct gfs_inum *inum)
15837 +{
15838 +       struct gfs_inode *dip = sdp->sd_rooti;
15839 +       struct gfs_holder d_gh, i_gh;
15840 +       struct gfs_unlinked *ul;
15841 +       int error;
15842 +
15843 +       error = gfs_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
15844 +       if (error)
15845 +               return error;
15846 +
15847 +       error = dinode_alloc(dip, &ul);
15848 +       if (error)
15849 +               goto fail;
15850 +
15851 +       inum->no_addr = ul->ul_inum.no_addr;
15852 +       pick_formal_ino(sdp, inum);
15853 +
15854 +       /* Don't worry about deadlock ordering here.  We're the first
15855 +          mounter and still under the mount lock (i.e. there is no
15856 +          contention). */
15857 +
15858 +       error = gfs_glock_nq_num(sdp,
15859 +                                inum->no_formal_ino, &gfs_inode_glops,
15860 +                                LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
15861 +       if (error)
15862 +               goto fail_ul;
15863 +
15864 +       gfs_alloc_get(dip);
15865 +
15866 +       error = gfs_quota_hold_m(dip, 0, 0);
15867 +       if (error)
15868 +               goto fail_al;
15869 +
15870 +       /* Trans may require:
15871 +          The new inode, the superblock,
15872 +          and one block for a quota change and
15873 +          one block for an unlinked tag. */
15874 +
15875 +       error = gfs_trans_begin(sdp, 2, 2);
15876 +       if (error)
15877 +               goto fail_unhold;
15878 +
15879 +       error = make_dinode(dip, i_gh.gh_gl, inum, GFS_FILE_REG, 0600, 0, 0);
15880 +       if (error)
15881 +               goto fail_end_trans;
15882 +
15883 +       iah_make_jdata(i_gh.gh_gl, inum);
15884 +
15885 +       error = iah_super_update(sdp);
15886 +       if (error)
15887 +               goto fail_end_trans;
15888 +
15889 +       gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA,
15890 +                              &(struct gfs_inum){0, inum->no_addr});
15891 +       gfs_trans_add_quota(sdp, +1, 0, 0);
15892 +       gfs_trans_add_gl(dip->i_gl);
15893 +
15894 +       gfs_trans_end(sdp);
15895 +       gfs_quota_unhold_m(dip);
15896 +       gfs_alloc_put(dip);
15897 +
15898 +       gfs_glock_dq_uninit(&i_gh);
15899 +       gfs_glock_dq_uninit(&d_gh);
15900 +
15901 +       gfs_unlinked_unlock(sdp, ul);
15902 +
15903 +       gfs_log_flush(sdp);
15904 +
15905 +       return 0;
15906 +
15907 + fail_end_trans:
15908 +       gfs_trans_end(sdp);
15909 +
15910 + fail_unhold:
15911 +       gfs_quota_unhold_m(dip);
15912 +
15913 + fail_al:
15914 +       gfs_alloc_put(dip);
15915 +       gfs_glock_dq_uninit(&i_gh);
15916 +
15917 + fail_ul:
15918 +       gfs_unlinked_unlock(sdp, ul);
15919 +
15920 + fail:
15921 +       gfs_glock_dq_uninit(&d_gh);
15922 +
15923 +       return error;
15924 +}
15925 +
15926 +/**
15927 + * gfs_alloc_qinode - allocate a quota inode
15928 + * @sdp: The GFS superblock
15929 + *
15930 + * Returns: 0 on success, error code otherwise
15931 + */
15932 +
15933 +int
15934 +gfs_alloc_qinode(struct gfs_sbd *sdp)
15935 +{
15936 +       return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_quota_di);
15937 +}
15938 +
15939 +/**
15940 + * gfs_alloc_linode - allocate a license inode
15941 + * @sdp: The GFS superblock
15942 + *
15943 + * Returns: 0 on success, error code otherwise
15944 + */
15945 +
15946 +int
15947 +gfs_alloc_linode(struct gfs_sbd *sdp)
15948 +{
15949 +       return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_license_di);
15950 +}
15951 diff -urN linux-orig/fs/gfs/inode.h linux-patched/fs/gfs/inode.h
15952 --- linux-orig/fs/gfs/inode.h   1969-12-31 18:00:00.000000000 -0600
15953 +++ linux-patched/fs/gfs/inode.h        2004-06-30 13:27:49.344710898 -0500
15954 @@ -0,0 +1,68 @@
15955 +/******************************************************************************
15956 +*******************************************************************************
15957 +**
15958 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
15959 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
15960 +**
15961 +**  This copyrighted material is made available to anyone wishing to use,
15962 +**  modify, copy, or redistribute it subject to the terms and conditions
15963 +**  of the GNU General Public License v.2.
15964 +**
15965 +*******************************************************************************
15966 +******************************************************************************/
15967 +
15968 +#ifndef __INODE_DOT_H__
15969 +#define __INODE_DOT_H__
15970 +
15971 +void gfs_inode_attr_in(struct gfs_inode *ip);
15972 +void gfs_inode_attr_out(struct gfs_inode *ip);
15973 +struct inode *gfs_iget(struct gfs_inode *ip, int create);
15974 +
15975 +int gfs_copyin_dinode(struct gfs_inode *ip);
15976 +
15977 +int gfs_inode_get(struct gfs_glock *i_gl, struct gfs_inum *inum, int create,
15978 +                   struct gfs_inode **ipp);
15979 +void gfs_inode_hold(struct gfs_inode *ip);
15980 +void gfs_inode_put(struct gfs_inode *ip);
15981 +void gfs_inode_destroy(struct gfs_inode *ip);
15982 +
15983 +int gfs_inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum);
15984 +
15985 +int gfs_change_nlink(struct gfs_inode *ip, int diff);
15986 +int gfs_lookupi(struct gfs_holder *d_gh, struct qstr *name,
15987 +               int is_root, struct gfs_holder *i_gh);
15988 +int gfs_createi(struct gfs_holder *d_gh, struct qstr *name,
15989 +               unsigned int type, unsigned int mode,
15990 +               struct gfs_holder *i_gh);
15991 +int gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
15992 +int gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
15993 +int gfs_revalidate(struct gfs_inode *dip, struct qstr *name,
15994 +                  struct gfs_inode *ip);
15995 +int gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to);
15996 +int gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len);
15997 +
15998 +int gfs_glock_nq_atime(struct gfs_holder *gh);
15999 +int gfs_glock_nq_m_atime(unsigned int num_gh, struct gfs_holder *ghs);
16000 +
16001 +void gfs_try_toss_vnode(struct gfs_inode *ip);
16002 +
16003 +/*  Backwards compatibility functions  */
16004 +
16005 +int gfs_alloc_qinode(struct gfs_sbd *sdp);
16006 +int gfs_alloc_linode(struct gfs_sbd *sdp);
16007 +
16008 +/*  Inlines  */
16009 +
16010 +static __inline__ int
16011 +gfs_is_stuffed(struct gfs_inode *ip)
16012 +{
16013 +       return !ip->i_di.di_height;
16014 +}
16015 +
16016 +static __inline__ int
16017 +gfs_is_jdata(struct gfs_inode *ip)
16018 +{
16019 +       return ip->i_di.di_flags & GFS_DIF_JDATA;
16020 +}
16021 +
16022 +#endif /* __INODE_DOT_H__ */
16023 diff -urN linux-orig/fs/gfs/ioctl.c linux-patched/fs/gfs/ioctl.c
16024 --- linux-orig/fs/gfs/ioctl.c   1969-12-31 18:00:00.000000000 -0600
16025 +++ linux-patched/fs/gfs/ioctl.c        2004-06-30 13:27:49.345710666 -0500
16026 @@ -0,0 +1,983 @@
16027 +/******************************************************************************
16028 +*******************************************************************************
16029 +**
16030 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
16031 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
16032 +**
16033 +**  This copyrighted material is made available to anyone wishing to use,
16034 +**  modify, copy, or redistribute it subject to the terms and conditions
16035 +**  of the GNU General Public License v.2.
16036 +**
16037 +*******************************************************************************
16038 +******************************************************************************/
16039 +
16040 +#include <linux/sched.h>
16041 +#include <linux/slab.h>
16042 +#include <linux/smp_lock.h>
16043 +#include <linux/spinlock.h>
16044 +#include <asm/semaphore.h>
16045 +#include <linux/completion.h>
16046 +#include <linux/buffer_head.h>
16047 +#include <asm/uaccess.h>
16048 +
16049 +#include "gfs.h"
16050 +#include "bmap.h"
16051 +#include "dio.h"
16052 +#include "dir.h"
16053 +#include "eattr.h"
16054 +#include "file.h"
16055 +#include "glock.h"
16056 +#include "glops.h"
16057 +#include "inode.h"
16058 +#include "ioctl.h"
16059 +#include "quota.h"
16060 +#include "rgrp.h"
16061 +#include "super.h"
16062 +#include "trans.h"
16063 +
16064 +/**
16065 + * gfs_add_bh_to_ub - copy a buffer up to user space
16066 + * @ub: the structure representing where to copy
16067 + * @bh: the buffer
16068 + *
16069 + * Returns: 0 on success, -EXXX on failure
16070 + */
16071 +
16072 +int
16073 +gfs_add_bh_to_ub(struct gfs_user_buffer *ub, struct buffer_head *bh)
16074 +{
16075 +       uint64_t blkno = bh->b_blocknr;
16076 +
16077 +       if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size)
16078 +               return -ENOMEM;
16079 +
16080 +       if (copy_to_user(ub->ub_data + ub->ub_count,
16081 +                        &blkno,
16082 +                        sizeof(uint64_t)))
16083 +               return -EFAULT;
16084 +       ub->ub_count += sizeof(uint64_t);
16085 +
16086 +       if (copy_to_user(ub->ub_data + ub->ub_count,
16087 +                        bh->b_data,
16088 +                        bh->b_size))
16089 +               return -EFAULT;
16090 +       ub->ub_count += bh->b_size;
16091 +
16092 +       return 0;
16093 +}
16094 +
16095 +/**
16096 + * get_meta - Read out all the metadata for a file
16097 + * @ip: the file
16098 + *
16099 + * Returns: 0 on success, -EXXX on failure
16100 + */
16101 +
16102 +static int
16103 +get_meta(struct gfs_inode *ip, void *arg)
16104 +{
16105 +       struct gfs_holder i_gh;
16106 +       struct gfs_user_buffer ub;
16107 +       int error;
16108 +
16109 +       if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer)))
16110 +               return -EFAULT;
16111 +       ub.ub_count = 0;
16112 +
16113 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
16114 +       if (error)
16115 +               return error;
16116 +
16117 +       error = gfs_get_file_meta(ip, &ub);
16118 +       if (error)
16119 +               goto out;
16120 +
16121 +       if (ip->i_di.di_type == GFS_FILE_DIR &&
16122 +           (ip->i_di.di_flags & GFS_DIF_EXHASH)) {
16123 +               error = gfs_get_dir_meta(ip, &ub);
16124 +               if (error)
16125 +                       goto out;
16126 +       }
16127 +
16128 +       if (ip->i_di.di_eattr) {
16129 +               error = gfs_get_eattr_meta(ip, &ub);
16130 +               if (error)
16131 +                       goto out;
16132 +       }
16133 +
16134 +       if (copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer)))
16135 +               error = -EFAULT;
16136 +
16137 + out:
16138 +       gfs_glock_dq_uninit(&i_gh);
16139 +
16140 +       return error;
16141 +}
16142 +
16143 +/**
16144 + * file_stat - return the struct gfs_dinode of a file to user space
16145 + * @ip: the inode
16146 + * @arg: where to copy to
16147 + *
16148 + * Returns: 0 on success, -EXXX on failure
16149 + */
16150 +
16151 +static int
16152 +file_stat(struct gfs_inode *ip, void *arg)
16153 +{
16154 +       struct gfs_holder i_gh;
16155 +       struct gfs_dinode di;
16156 +       int error;
16157 +
16158 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
16159 +       if (error)
16160 +               return error;
16161 +
16162 +       memcpy(&di, &ip->i_di, sizeof(struct gfs_dinode));
16163 +
16164 +       gfs_glock_dq_uninit(&i_gh);
16165 +
16166 +       if (copy_to_user(arg, &di, sizeof(struct gfs_dinode)))
16167 +               return -EFAULT;
16168 +
16169 +       return 0;
16170 +}
16171 +
16172 +/**
16173 + * do_get_super - Dump the superblock into a buffer
16174 + * @sb: The superblock
16175 + * @ptr: The buffer pointer
16176 + *
16177 + * Returns: 0 or error code
16178 + */
16179 +
16180 +static int
16181 +do_get_super(struct gfs_sbd *sdp, void *arg)
16182 +{
16183 +       struct gfs_sb *sb;
16184 +       struct gfs_holder sb_gh;
16185 +       struct buffer_head *bh;
16186 +       int error;
16187 +
16188 +       sb = gmalloc(sizeof(struct gfs_sb));
16189 +
16190 +       error = gfs_glock_nq_num(sdp,
16191 +                                GFS_SB_LOCK, &gfs_meta_glops,
16192 +                                LM_ST_SHARED, 0, &sb_gh);
16193 +       if (error)
16194 +               goto out;
16195 +
16196 +       error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, sb_gh.gh_gl,
16197 +                         DIO_START | DIO_WAIT, &bh);
16198 +       if (error) {
16199 +               gfs_glock_dq_uninit(&sb_gh);
16200 +               goto out;
16201 +       }
16202 +
16203 +       gfs_sb_in(sb, bh->b_data);
16204 +       brelse(bh);
16205 +
16206 +       gfs_glock_dq_uninit(&sb_gh);
16207 +
16208 +       if (copy_to_user(arg, sb, sizeof(struct gfs_sb)))
16209 +               error = -EFAULT;
16210 +
16211 + out:
16212 +       kfree(sb);
16213 +
16214 +       return error;
16215 +}
16216 +
16217 +/**
16218 + * jt2ip - convert the file type in a jio struct to the right hidden ip
16219 + * @sdp: the filesystem
16220 + * @jt: the gfs_jio_structure
16221 + *
16222 + * Returns: The inode structure for the correct hidden file
16223 + */
16224 +
16225 +static struct gfs_inode *
16226 +jt2ip(struct gfs_sbd *sdp, struct gfs_jio *jt)
16227 +{
16228 +       struct gfs_inode *ip = NULL;
16229 +
16230 +       switch (jt->jio_file) {
16231 +       case GFS_HIDDEN_JINDEX:
16232 +               ip = sdp->sd_jiinode;
16233 +               break;
16234 +
16235 +       case GFS_HIDDEN_RINDEX:
16236 +               ip = sdp->sd_riinode;
16237 +               break;
16238 +
16239 +       case GFS_HIDDEN_QUOTA:
16240 +               ip = sdp->sd_qinode;
16241 +               break;
16242 +
16243 +       case GFS_HIDDEN_LICENSE:
16244 +               ip = sdp->sd_linode;
16245 +               break;
16246 +       }
16247 +
16248 +       return ip;
16249 +}
16250 +
16251 +/**
16252 + * jread_ioctl - Read from a journaled data file via ioctl
16253 + * @sdp: the filesystem
16254 + * @arg: The argument from ioctl
16255 + *
16256 + * Returns: Amount of data copied or error
16257 + */
16258 +
16259 +static int
16260 +jread_ioctl(struct gfs_sbd *sdp, void *arg)
16261 +{
16262 +       struct gfs_jio jt;
16263 +       struct gfs_inode *ip;
16264 +       struct gfs_holder i_gh;
16265 +       int error;
16266 +
16267 +       if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16268 +               return -EFAULT;
16269 +
16270 +       ip = jt2ip(sdp, &jt);
16271 +       if (!ip)
16272 +               return -EINVAL;
16273 +
16274 +       GFS_ASSERT_INODE(gfs_is_jdata(ip), ip,);
16275 +
16276 +       if (!access_ok(VERIFY_WRITE, jt.jio_data, jt.jio_size))
16277 +               return -EFAULT;
16278 +
16279 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
16280 +       if (error)
16281 +               return error;
16282 +
16283 +       error = gfs_readi(ip, jt.jio_data, jt.jio_offset, jt.jio_size,
16284 +                         gfs_copy2user);
16285 +
16286 +       gfs_glock_dq_uninit(&i_gh);
16287 +
16288 +       if (error < 0)
16289 +               return error;
16290 +       jt.jio_count = error;
16291 +
16292 +       if (copy_to_user(arg, &jt, sizeof(struct gfs_jio)))
16293 +               return -EFAULT;
16294 +
16295 +       return 0;
16296 +}
16297 +
16298 +/**
16299 + * jwrite_ioctl - Write to a journaled file via ioctl
16300 + * @sdp: the filesystem
16301 + * @arg: The argument from ioctl
16302 + *
16303 + * Returns: Amount of data copied or error
16304 + */
16305 +
16306 +static int
16307 +jwrite_ioctl(struct gfs_sbd *sdp, void *arg)
16308 +{
16309 +       struct gfs_jio jt;
16310 +       struct gfs_inode *ip;
16311 +       struct gfs_alloc *al = NULL;
16312 +       struct gfs_holder i_gh;
16313 +       unsigned int data_blocks, ind_blocks;
16314 +       int alloc_required;
16315 +       int error;
16316 +
16317 +       if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16318 +               return -EFAULT;
16319 +
16320 +       ip = jt2ip(sdp, &jt);
16321 +       if (!ip)
16322 +               return -EINVAL;
16323 +
16324 +       GFS_ASSERT_INODE(gfs_is_jdata(ip), ip,);
16325 +
16326 +       if (!access_ok(VERIFY_READ, jt.jio_data, jt.jio_size))
16327 +               return -EFAULT;
16328 +
16329 +       gfs_write_calc_reserv(ip, jt.jio_size, &data_blocks, &ind_blocks);
16330 +
16331 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
16332 +                                 LM_FLAG_PRIORITY | GL_SYNC, &i_gh);
16333 +       if (error)
16334 +               return error;
16335 +
16336 +       error = gfs_write_alloc_required(ip, jt.jio_offset, jt.jio_size,
16337 +                                        &alloc_required);
16338 +       if (error)
16339 +               goto out;
16340 +
16341 +       if (alloc_required) {
16342 +               al = gfs_alloc_get(ip);
16343 +
16344 +               error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE,
16345 +                                        NO_QUOTA_CHANGE);
16346 +               if (error)
16347 +                       goto out_alloc;
16348 +
16349 +               al->al_requested_meta = ind_blocks + data_blocks;
16350 +
16351 +               error = gfs_inplace_reserve(ip);
16352 +               if (error)
16353 +                       goto out_qs;
16354 +
16355 +               /* Trans may require:
16356 +                  All blocks for a RG bitmap, all the "data" blocks, whatever
16357 +                  indirect blocks we need, a modified dinode, and a quota change */
16358 +
16359 +               error = gfs_trans_begin(sdp,
16360 +                                       1 + al->al_rgd->rd_ri.ri_length +
16361 +                                       ind_blocks + data_blocks, 1);
16362 +               if (error)
16363 +                       goto out_relse;
16364 +       } else {
16365 +               /* Trans may require:
16366 +                  All the "data" blocks and a modified dinode. */
16367 +
16368 +               error = gfs_trans_begin(sdp, 1 + data_blocks, 0);
16369 +               if (error)
16370 +                       goto out_relse;
16371 +       }
16372 +
16373 +       error = gfs_writei(ip, jt.jio_data, jt.jio_offset, jt.jio_size,
16374 +                          gfs_copy_from_user);
16375 +       if (error >= 0) {
16376 +               jt.jio_count = error;
16377 +               error = 0;
16378 +       }
16379 +
16380 +       gfs_trans_end(sdp);
16381 +
16382 + out_relse:
16383 +       if (alloc_required) {
16384 +               GFS_ASSERT_INODE(error || al->al_alloced_meta, ip,);
16385 +               gfs_inplace_release(ip);
16386 +       }
16387 +
16388 + out_qs:
16389 +       if (alloc_required)
16390 +               gfs_quota_unhold_m(ip);
16391 +
16392 + out_alloc:
16393 +       if (alloc_required)
16394 +               gfs_alloc_put(ip);
16395 +
16396 + out:
16397 +       ip->i_gl->gl_vn++;
16398 +       gfs_glock_dq_uninit(&i_gh);
16399 +
16400 +       if (!error && copy_to_user(arg, &jt, sizeof(struct gfs_jio)))
16401 +               return -EFAULT;
16402 +
16403 +       return error;
16404 +}
16405 +
16406 +/**
16407 + * jstat_ioctl - Stat to a journaled file via ioctl
16408 + * @sdp: the filesystem
16409 + * @arg: The argument from ioctl
16410 + *
16411 + * Returns: 0 on success, -EXXX on failure
16412 + */
16413 +
16414 +static int
16415 +jstat_ioctl(struct gfs_sbd *sdp, void *arg)
16416 +{
16417 +       struct gfs_jio jt;
16418 +       struct gfs_inode *ip;
16419 +       struct gfs_holder i_gh;
16420 +       int error;
16421 +
16422 +       if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16423 +           return -EFAULT;
16424 +
16425 +       ip = jt2ip(sdp, &jt);
16426 +       if (!ip)
16427 +               return -EINVAL;
16428 +
16429 +       if (jt.jio_size < sizeof(struct gfs_dinode))
16430 +               return -EINVAL;
16431 +
16432 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
16433 +       if (error)
16434 +               return error;
16435 +
16436 +       error = copy_to_user(jt.jio_data, &ip->i_di, sizeof(struct gfs_dinode));
16437 +
16438 +       gfs_glock_dq_uninit(&i_gh);
16439 +
16440 +       if (error)
16441 +               return -EFAULT;
16442 +
16443 +       return 0;
16444 +}
16445 +
16446 +/**
16447 + * jtrunc_ioctl - Truncate to a journaled file via ioctl
16448 + * @sdp: the filesystem
16449 + * @arg: The argument from ioctl
16450 + *
16451 + * Returns: 0 on success, -EXXX on failure
16452 + */
16453 +
16454 +static int
16455 +jtrunc_ioctl(struct gfs_sbd *sdp, void *arg)
16456 +{
16457 +       struct gfs_jio jt;
16458 +       struct gfs_inode *ip;
16459 +       struct gfs_holder i_gh;
16460 +       int error;
16461 +
16462 +       if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16463 +           return -EFAULT;
16464 +
16465 +       ip = jt2ip(sdp, &jt);
16466 +       if (!ip)
16467 +               return -EINVAL;
16468 +
16469 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SYNC, &i_gh);
16470 +       if (error)
16471 +               return error;
16472 +
16473 +       error = gfs_truncatei(ip, jt.jio_offset, NULL);
16474 +
16475 +       ip->i_gl->gl_vn++;
16476 +       gfs_glock_dq_uninit(&i_gh);
16477 +
16478 +       return error;
16479 +}
16480 +
16481 +/**
16482 + * lock_dump - copy out info about the GFS' lock space
16483 + * @sdp: the filesystem
16484 + * @arg: a pointer to a struct gfs_user_buffer in user space
16485 + *
16486 + * Returns: 0 on success, -EXXX on failure
16487 + */
16488 +
16489 +static int
16490 +lock_dump(struct gfs_sbd *sdp, void *arg)
16491 +{
16492 +       struct gfs_user_buffer ub;
16493 +       int error;
16494 +
16495 +       if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer)))
16496 +               return -EFAULT;
16497 +       ub.ub_count = 0;
16498 +
16499 +       error = gfs_dump_lockstate(sdp, &ub);
16500 +       if (error)
16501 +               return error;
16502 +
16503 +       if (copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer)))
16504 +               return -EFAULT;
16505 +
16506 +       return 0;
16507 +}
16508 +
16509 +/**
16510 + * stat_gfs_ioctl - Do a GFS specific statfs
16511 + * @sdp: the filesystem
16512 + * @arg: the struct gfs_usage structure
16513 + *
16514 + * Returns: 0 on success, -EXXX on failure
16515 + */
16516 +
16517 +static int
16518 +stat_gfs_ioctl(struct gfs_sbd *sdp, void *arg)
16519 +{
16520 +       struct gfs_usage *u;
16521 +       int error;
16522 +
16523 +       u = gmalloc(sizeof(struct gfs_usage));
16524 +
16525 +       error = gfs_stat_gfs(sdp, u, TRUE);
16526 +       if (!error && copy_to_user(arg, u, sizeof(struct gfs_usage)))
16527 +               return -EFAULT;
16528 +
16529 +       kfree(u);
16530 +
16531 +       return error;
16532 +}
16533 +
16534 +/**
16535 + * reclaim_ioctl - ioctl called to perform metadata reclaimation
16536 + * @sdp: the filesystem
16537 + * @arg: a pointer to a struct gfs_reclaim_stats in user space
16538 + *
16539 + * Returns: 0 on success, -EXXX on failure
16540 + */
16541 +
16542 +static int
16543 +reclaim_ioctl(struct gfs_sbd *sdp, void *arg)
16544 +{
16545 +       struct gfs_reclaim_stats stats;
16546 +       int error;
16547 +
16548 +       memset(&stats, 0, sizeof(struct gfs_reclaim_stats));
16549 +
16550 +       error = gfs_reclaim_metadata(sdp, &stats);
16551 +       if (error)
16552 +               return error;
16553 +
16554 +       if (copy_to_user(arg, &stats, sizeof(struct gfs_reclaim_stats)))
16555 +               return -EFAULT;
16556 +
16557 +       return 0;
16558 +}
16559 +
16560 +/**
16561 + * get_tune - pass the current tuneable parameters up to user space
16562 + * @sdp: the filesystem
16563 + * @arg: a pointer to a struct gfs_tune in user space
16564 + *
16565 + * Returns: 0 on success, -EXXX on failure
16566 + */
16567 +
16568 +static int
16569 +get_tune(struct gfs_sbd *sdp, void *arg)
16570 +{
16571 +       if (copy_to_user(arg, &sdp->sd_tune, sizeof(struct gfs_tune)))
16572 +               return -EFAULT;
16573 +
16574 +       return 0;
16575 +}
16576 +
16577 +/**
16578 + * set_tune - replace the current tuneable parameters with a set from user space
16579 + * @sdp: the filesystem
16580 + * @arg: a pointer to a struct gfs_tune in user space
16581 + *
16582 + * Returns: 0 on success, -EXXX on failure
16583 + */
16584 +
16585 +static int
16586 +set_tune(struct gfs_sbd *sdp, void *arg)
16587 +{
16588 +       struct gfs_tune *gt;
16589 +       int error = 0;
16590 +
16591 +       gt = gmalloc(sizeof(struct gfs_tune));
16592 +
16593 +       if (copy_from_user(gt, arg, sizeof(struct gfs_tune)))
16594 +               error = -EFAULT;
16595 +       else {
16596 +               if (gt->gt_tune_version != GFS_TUNE_VERSION) {
16597 +                       printk("GFS: fsid=%s: invalid version of tuneable parameters\n",
16598 +                              sdp->sd_fsname);
16599 +                       error = -EINVAL;
16600 +               } else
16601 +                       memcpy(&sdp->sd_tune, gt, sizeof(struct gfs_tune));
16602 +       }
16603 +
16604 +       kfree(gt);
16605 +
16606 +       return error;
16607 +}
16608 +
16609 +/**
16610 + * gfs_set_flag - set/clear a flag on an inode
16611 + * @ip: the inode
16612 + * @cmd: GFS_SET_FLAG or GFS_CLEAR_FLAG
16613 + * @arg: the flag to change (in user space)
16614 + *
16615 + * Returns: 0 on success, -EXXX on failure
16616 + */
16617 +
16618 +static int
16619 +gfs_set_flag(struct gfs_inode *ip, unsigned int cmd, void *arg)
16620 +{
16621 +       struct gfs_sbd *sdp = ip->i_sbd;
16622 +       struct gfs_holder i_gh;
16623 +       struct buffer_head *dibh;
16624 +       uint32_t flag;
16625 +       int error;
16626 +
16627 +       if (copy_from_user(&flag, arg, sizeof(uint32_t)))
16628 +               return -EFAULT;
16629 +
16630 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
16631 +       if (error)
16632 +               return error;
16633 +
16634 +       error = -EACCES;
16635 +       if (ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
16636 +               goto out;
16637 +
16638 +       error = -EINVAL;
16639 +
16640 +       switch (flag) {
16641 +       case GFS_DIF_EXHASH:
16642 +       case GFS_DIF_UNUSED:
16643 +       case GFS_DIF_EA_INDIRECT:
16644 +               goto out;
16645 +
16646 +       case GFS_DIF_JDATA:
16647 +               if (ip->i_di.di_type != GFS_FILE_REG || ip->i_di.di_size)
16648 +                       goto out;
16649 +               break;
16650 +
16651 +       case GFS_DIF_DIRECTIO:
16652 +               if (ip->i_di.di_type != GFS_FILE_REG)
16653 +                       goto out;
16654 +               break;
16655 +
16656 +       case GFS_DIF_IMMUTABLE:
16657 +       case GFS_DIF_APPENDONLY:
16658 +       case GFS_DIF_NOATIME:
16659 +       case GFS_DIF_SYNC:
16660 +               /*  FixMe!!!  */
16661 +               error = -ENOSYS;
16662 +               goto out;
16663 +
16664 +       case GFS_DIF_INHERIT_DIRECTIO:
16665 +       case GFS_DIF_INHERIT_JDATA:
16666 +               if (ip->i_di.di_type != GFS_FILE_DIR)
16667 +                       goto out;
16668 +               break;
16669 +
16670 +       default:
16671 +               goto out;
16672 +       }
16673 +
16674 +       error = gfs_trans_begin(sdp, 1, 0);
16675 +       if (error)
16676 +               goto out;
16677 +
16678 +       error = gfs_get_inode_buffer(ip, &dibh);
16679 +       if (error)
16680 +               goto out_trans_end;
16681 +
16682 +       if (cmd == GFS_SET_FLAG)
16683 +               ip->i_di.di_flags |= flag;
16684 +       else
16685 +               ip->i_di.di_flags &= ~flag;
16686 +
16687 +       gfs_trans_add_bh(ip->i_gl, dibh);
16688 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
16689 +
16690 +       brelse(dibh);
16691 +
16692 + out_trans_end:
16693 +       gfs_trans_end(sdp);
16694 +
16695 + out:
16696 +       gfs_glock_dq_uninit(&i_gh);
16697 +
16698 +       return error;
16699 +}
16700 +
16701 +/**
16702 + * handle_roll - Read a atomic_t as an unsigned int
16703 + * @a: a counter
16704 + *
16705 + * if @a is negative, reset it to zero
16706 + *
16707 + * Returns: the value of the counter
16708 + */
16709 +
16710 +static unsigned int
16711 +handle_roll(atomic_t *a)
16712 +{
16713 +       int x = atomic_read(a);
16714 +       if (x < 0) {
16715 +               atomic_set(a, 0);
16716 +               return 0;
16717 +       }
16718 +       return (unsigned int)x;
16719 +}
16720 +
16721 +/**
16722 + * fill_counters - Write a FS' counters into a buffer
16723 + * @sdp: the filesystem
16724 + * @buf: the buffer
16725 + * @size: the size of the buffer
16726 + * @count: where we are in the buffer
16727 + *
16728 + * Returns: errno
16729 + */
16730 +
16731 +static int
16732 +fill_counters(struct gfs_sbd *sdp,
16733 +             char *buf, unsigned int size, unsigned int *count)
16734 +{
16735 +       int error = 0;
16736 +
16737 +       gfs_sprintf("sd_glock_count:locks::%d\n",
16738 +                   atomic_read(&sdp->sd_glock_count));
16739 +       gfs_sprintf("sd_glock_held_count:locks held::%d\n",
16740 +                   atomic_read(&sdp->sd_glock_held_count));
16741 +       gfs_sprintf("sd_inode_count:incore inodes::%d\n",
16742 +                   atomic_read(&sdp->sd_inode_count));
16743 +       gfs_sprintf("sd_bufdata_count:metadata buffers::%d\n",
16744 +                   atomic_read(&sdp->sd_bufdata_count));
16745 +       gfs_sprintf("sd_unlinked_ic_count:unlinked inodes::%d\n",
16746 +                   atomic_read(&sdp->sd_unlinked_ic_count));
16747 +       gfs_sprintf("sd_quota_count:quota IDs::%d\n",
16748 +                   atomic_read(&sdp->sd_quota_count));
16749 +       gfs_sprintf("sd_log_buffers:incore log buffers::%u\n",
16750 +                   sdp->sd_log_buffers);
16751 +       gfs_sprintf("sd_log_seg_free:log segments free::%u\n",
16752 +                   sdp->sd_log_seg_free);
16753 +       gfs_sprintf("ji_nsegment:log segments total::%u\n",
16754 +                   sdp->sd_jdesc.ji_nsegment);
16755 +       gfs_sprintf("sd_mhc_count:meta header cache entries::%d\n",
16756 +                   atomic_read(&sdp->sd_mhc_count));
16757 +       gfs_sprintf("sd_depend_count:glock dependencies::%d\n",
16758 +                   atomic_read(&sdp->sd_depend_count));
16759 +       gfs_sprintf("sd_reclaim_count:glocks on reclaim list::%d\n",
16760 +                   atomic_read(&sdp->sd_reclaim_count));
16761 +       gfs_sprintf("sd_log_wrap:log wraps::%"PRIu64"\n",
16762 +                   sdp->sd_log_wrap);
16763 +       gfs_sprintf("sd_fh2dentry_misses:fh2dentry misses:diff:%u\n",
16764 +                   handle_roll(&sdp->sd_fh2dentry_misses));
16765 +       gfs_sprintf("sd_reclaimed:glocks reclaimed:diff:%u\n",
16766 +                   handle_roll(&sdp->sd_reclaimed));
16767 +       gfs_sprintf("sd_glock_nq_calls:glock nq calls:diff:%u\n",
16768 +                   handle_roll(&sdp->sd_glock_nq_calls));
16769 +       gfs_sprintf("sd_glock_dq_calls:glock dq calls:diff:%u\n",
16770 +                   handle_roll(&sdp->sd_glock_dq_calls));
16771 +       gfs_sprintf("sd_glock_prefetch_calls:glock prefetch calls:diff:%u\n",
16772 +                   handle_roll(&sdp->sd_glock_prefetch_calls));
16773 +       gfs_sprintf("sd_lm_lock_calls:lm_lock calls:diff:%u\n",
16774 +                   handle_roll(&sdp->sd_lm_lock_calls));
16775 +       gfs_sprintf("sd_lm_unlock_calls:lm_unlock calls:diff:%u\n",
16776 +                   handle_roll(&sdp->sd_lm_unlock_calls));
16777 +       gfs_sprintf("sd_lm_callbacks:lm callbacks:diff:%u\n",
16778 +                   handle_roll(&sdp->sd_lm_callbacks));
16779 +       gfs_sprintf("sd_ops_address:address operations:diff:%u\n",
16780 +                   handle_roll(&sdp->sd_ops_address));
16781 +       gfs_sprintf("sd_ops_dentry:dentry operations:diff:%u\n",
16782 +                   handle_roll(&sdp->sd_ops_dentry));
16783 +       gfs_sprintf("sd_ops_export:export operations:diff:%u\n",
16784 +                   handle_roll(&sdp->sd_ops_export));
16785 +       gfs_sprintf("sd_ops_file:file operations:diff:%u\n",
16786 +                   handle_roll(&sdp->sd_ops_file));
16787 +       gfs_sprintf("sd_ops_inode:inode operations:diff:%u\n",
16788 +                   handle_roll(&sdp->sd_ops_inode));
16789 +       gfs_sprintf("sd_ops_super:super operations:diff:%u\n",
16790 +                   handle_roll(&sdp->sd_ops_super));
16791 +       gfs_sprintf("sd_ops_vm:vm operations:diff:%u\n",
16792 +                   handle_roll(&sdp->sd_ops_vm));
16793 +
16794 + out:
16795 +       return error;
16796 +}
16797 +
16798 +/**
16799 + * get_counters - return usage counters to user space
16800 + * @sdp: the filesystem
16801 + * @arg: the counter structure to fill
16802 + *
16803 + * Returns: 0 on success, -EXXX on failure
16804 + */
16805 +
16806 +static int
16807 +get_counters(struct gfs_sbd *sdp, void *arg)
16808 +{
16809 +       struct gfs_user_buffer ub;
16810 +       unsigned int size = sdp->sd_tune.gt_lockdump_size;
16811 +       char *buf;
16812 +       int error;
16813 +
16814 +       if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer)))
16815 +               return -EFAULT;
16816 +       ub.ub_count = 0;
16817 +
16818 +       if (size > ub.ub_size)
16819 +               size = ub.ub_size;
16820 +
16821 +       buf = kmalloc(size, GFP_KERNEL);
16822 +       if (!buf)
16823 +               return -ENOMEM;
16824 +
16825 +       error = fill_counters(sdp, buf, size, &ub.ub_count);
16826 +       if (!error) {
16827 +               if (copy_to_user(ub.ub_data, buf, ub.ub_count) ||
16828 +                   copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer)))
16829 +                       error = -EFAULT;
16830 +       }
16831 +
16832 +       kfree(buf);
16833 +
16834 +       return error;
16835 +}
16836 +
16837 +/**
16838 + * gfs_ioctli - filesystem independent ioctl function
16839 + * @ip: the inode the ioctl was on
16840 + * @cmd: the ioctl number
16841 + * @arg: the argument (still in user space)
16842 + *
16843 + * Returns: 0 on success, -EXXX on failure
16844 + */
16845 +
16846 +int
16847 +gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg)
16848 +{
16849 +       struct gfs_sbd *sdp = ip->i_sbd;
16850 +       int error = 0;
16851 +
16852 +       switch (cmd) {
16853 +       case GFS_GET_META:
16854 +               error = get_meta(ip, arg);
16855 +               break;
16856 +
16857 +       case GFS_FILE_STAT:
16858 +               error = file_stat(ip, arg);
16859 +               break;
16860 +
16861 +       case GFS_SHRINK:
16862 +               if (capable(CAP_SYS_ADMIN))
16863 +                       gfs_gl_hash_clear(sdp, FALSE);
16864 +               else
16865 +                       error = -EACCES;
16866 +               break;
16867 +
16868 +       case GFS_GET_ARGS:
16869 +               if  (copy_to_user(arg, &sdp->sd_args,
16870 +                                 sizeof(struct gfs_args)))
16871 +                       error = -EFAULT;
16872 +               break;
16873 +
16874 +       case GFS_GET_LOCKSTRUCT:
16875 +               if (copy_to_user(arg, &sdp->sd_lockstruct,
16876 +                                sizeof(struct lm_lockstruct)))
16877 +                       error = -EFAULT;
16878 +               break;
16879 +
16880 +       case GFS_GET_SUPER:
16881 +               error = do_get_super(sdp, arg);
16882 +               break;
16883 +
16884 +       case GFS_JREAD:
16885 +               if (capable(CAP_SYS_ADMIN))
16886 +                       error = jread_ioctl(sdp, arg);
16887 +               else
16888 +                       error = -EACCES;
16889 +               break;
16890 +
16891 +       case GFS_JWRITE:
16892 +               if (capable(CAP_SYS_ADMIN))
16893 +                       error = jwrite_ioctl(sdp, arg);
16894 +               else
16895 +                       error = -EACCES;
16896 +               break;
16897 +
16898 +       case GFS_JSTAT:
16899 +               error = jstat_ioctl(sdp, arg);
16900 +               break;
16901 +
16902 +       case GFS_JTRUNC:
16903 +               if (capable(CAP_SYS_ADMIN))
16904 +                       error = jtrunc_ioctl(sdp, arg);
16905 +               else
16906 +                       error = -EACCES;
16907 +               break;
16908 +
16909 +       case GFS_LOCK_DUMP:
16910 +               if (capable(CAP_SYS_ADMIN))
16911 +                       error = lock_dump(sdp, arg);
16912 +               else
16913 +                       error = -EACCES;
16914 +               break;
16915 +
16916 +       case GFS_STATGFS:
16917 +               error = stat_gfs_ioctl(sdp, arg);
16918 +               break;
16919 +
16920 +       case GFS_FREEZE:
16921 +               if (capable(CAP_SYS_ADMIN))
16922 +                       error = gfs_freeze_fs(sdp);
16923 +               else
16924 +                       error = -EACCES;
16925 +               break;
16926 +
16927 +       case GFS_UNFREEZE:
16928 +               if (capable(CAP_SYS_ADMIN))
16929 +                       gfs_unfreeze_fs(sdp);
16930 +               else
16931 +                       error = -EACCES;
16932 +               break;
16933 +
16934 +       case GFS_RECLAIM_METADATA:
16935 +               if (capable(CAP_SYS_ADMIN))
16936 +                       error = reclaim_ioctl(sdp, arg);
16937 +               else
16938 +                       error = -EACCES;
16939 +               break;
16940 +
16941 +       case GFS_QUOTA_SYNC:
16942 +               if (capable(CAP_SYS_ADMIN))
16943 +                       error = gfs_quota_sync(sdp);
16944 +               else
16945 +                       error = -EACCES;
16946 +               break;
16947 +
16948 +       case GFS_QUOTA_REFRESH:
16949 +               if (capable(CAP_SYS_ADMIN))
16950 +                       error = gfs_quota_refresh(sdp, arg);
16951 +               else
16952 +                       error = -EACCES;
16953 +               break;
16954 +
16955 +       case GFS_QUOTA_READ:
16956 +               /*  Permissions handled later  */
16957 +               error = gfs_quota_read(sdp, arg);
16958 +               break;
16959 +
16960 +       case GFS_GET_TUNE:
16961 +               error = get_tune(sdp, arg);
16962 +               break;
16963 +
16964 +       case GFS_SET_TUNE:
16965 +               if (capable(CAP_SYS_ADMIN))
16966 +                       error = set_tune(sdp, arg);
16967 +               else
16968 +                       error = -EACCES;
16969 +               break;
16970 +
16971 +       case GFS_EATTR_GET:
16972 +               /*  Permissions handled later  */
16973 +               error = gfs_get_eattr_ioctl(sdp, ip, arg);
16974 +               break;
16975 +
16976 +       case GFS_EATTR_SET:
16977 +               /*  Permissions handled later  */
16978 +               error = gfs_set_eattr_ioctl(sdp, ip, arg);
16979 +               break;
16980 +
16981 +       case GFS_WHERE_ARE_YOU:
16982 +               {
16983 +                       unsigned int x = GFS_MAGIC;
16984 +                       if (copy_to_user(arg, &x, sizeof(unsigned int)))
16985 +                               error = -EFAULT;
16986 +               }
16987 +               break;
16988 +
16989 +       case GFS_SET_FLAG:
16990 +       case GFS_CLEAR_FLAG:
16991 +               /*  Permissions handled later  */
16992 +               error = gfs_set_flag(ip, cmd, arg);
16993 +               break;
16994 +
16995 +       case GFS_GET_COUNTERS:
16996 +               error = get_counters(sdp, arg);
16997 +               break;
16998 +
16999 +       case GFS_FILE_FLUSH:
17000 +               gfs_glock_force_drop(ip->i_gl);
17001 +               break;
17002 +
17003 +       default:
17004 +               error = -ENOTTY;
17005 +               break;
17006 +       }
17007 +
17008 +       return error;
17009 +}
17010 diff -urN linux-orig/fs/gfs/ioctl.h linux-patched/fs/gfs/ioctl.h
17011 --- linux-orig/fs/gfs/ioctl.h   1969-12-31 18:00:00.000000000 -0600
17012 +++ linux-patched/fs/gfs/ioctl.h        2004-06-30 13:27:49.345710666 -0500
17013 @@ -0,0 +1,21 @@
17014 +/******************************************************************************
17015 +*******************************************************************************
17016 +**
17017 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
17018 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
17019 +**
17020 +**  This copyrighted material is made available to anyone wishing to use,
17021 +**  modify, copy, or redistribute it subject to the terms and conditions
17022 +**  of the GNU General Public License v.2.
17023 +**
17024 +*******************************************************************************
17025 +******************************************************************************/
17026 +
17027 +#ifndef __IOCTL_DOT_H__
17028 +#define __IOCTL_DOT_H__
17029 +
17030 +int gfs_add_bh_to_ub(struct gfs_user_buffer *ub, struct buffer_head *bh);
17031 +
17032 +int gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg);
17033 +
17034 +#endif /* __IOCTL_DOT_H__ */
17035 diff -urN linux-orig/fs/gfs/locking.c linux-patched/fs/gfs/locking.c
17036 --- linux-orig/fs/gfs/locking.c 1969-12-31 18:00:00.000000000 -0600
17037 +++ linux-patched/fs/gfs/locking.c      2004-06-30 13:27:49.345710666 -0500
17038 @@ -0,0 +1,114 @@
17039 +/******************************************************************************
17040 +*******************************************************************************
17041 +**
17042 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
17043 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
17044 +**
17045 +**  This copyrighted material is made available to anyone wishing to use,
17046 +**  modify, copy, or redistribute it subject to the terms and conditions
17047 +**  of the GNU General Public License v.2.
17048 +**
17049 +*******************************************************************************
17050 +******************************************************************************/
17051 +
17052 +#include <linux/sched.h>
17053 +#include <linux/slab.h>
17054 +#include <linux/smp_lock.h>
17055 +#include <linux/spinlock.h>
17056 +#include <asm/semaphore.h>
17057 +#include <linux/completion.h>
17058 +#include <linux/buffer_head.h>
17059 +
17060 +#include "gfs.h"
17061 +#include "dio.h"
17062 +#include "glock.h"
17063 +#include "locking.h"
17064 +#include "super.h"
17065 +
17066 +/**
17067 + * gfs_mount_lockproto - mount a locking protocol
17068 + * @sdp: the filesystem
17069 + * @args: mount arguements
17070 + * @silent: if TRUE, don't complain if the FS isn't a GFS fs
17071 + *
17072 + * Returns: 0 on success, -EXXX on failure
17073 + */
17074 +
17075 +int
17076 +gfs_mount_lockproto(struct gfs_sbd *sdp, int silent)
17077 +{
17078 +       struct gfs_sb *sb = NULL;
17079 +       struct buffer_head *bh;
17080 +       char *proto, *table, *p = NULL;
17081 +       int error = 0;
17082 +
17083 +       proto = sdp->sd_args.ar_lockproto;
17084 +       table = sdp->sd_args.ar_locktable;
17085 +
17086 +       /*  Try to autodetect  */
17087 +
17088 +       if (!proto[0] || !table[0]) {
17089 +               error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, NULL,
17090 +                                 DIO_FORCE | DIO_START | DIO_WAIT, &bh);
17091 +               if (error)
17092 +                       goto out;
17093 +
17094 +               sb = gmalloc(sizeof(struct gfs_sb));
17095 +               gfs_sb_in(sb, bh->b_data);
17096 +               brelse(bh);
17097 +
17098 +               error = gfs_check_sb(sdp, sb, silent);
17099 +               if (error)
17100 +                       goto out;
17101 +
17102 +               if (!proto[0])
17103 +                       proto = sb->sb_lockproto;
17104 +
17105 +               if (!table[0])
17106 +                       table = sb->sb_locktable;
17107 +       }
17108 +
17109 +       error = lm_mount(proto, table, sdp->sd_args.ar_hostdata,
17110 +                        gfs_glock_cb, sdp,
17111 +                        GFS_MIN_LVB_SIZE, &sdp->sd_lockstruct);
17112 +       if (error) {
17113 +               printk("GFS: can't mount proto = %s, table = %s, hostdata = %s\n",
17114 +                    proto, table, sdp->sd_args.ar_hostdata);
17115 +               goto out;
17116 +       }
17117 +
17118 +       GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lockspace, sdp,);
17119 +       GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_ops, sdp,);
17120 +       GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lvb_size >= GFS_MIN_LVB_SIZE,
17121 +                      sdp,);
17122 +
17123 +       if (!*table) {
17124 +               table = p = gmalloc(sizeof(sdp->sd_vfs->s_id) + 1);
17125 +               strncpy(table, sdp->sd_vfs->s_id, sizeof(sdp->sd_vfs->s_id));
17126 +               table[sizeof(sdp->sd_vfs->s_id)] = 0;
17127 +       }
17128 +
17129 +       snprintf(sdp->sd_fsname, 256, "%s.%u", table,
17130 +                sdp->sd_lockstruct.ls_jid);
17131 +
17132 +       if (p)
17133 +               kfree(p);
17134 +
17135 +      out:
17136 +       if (sb)
17137 +               kfree(sb);
17138 +
17139 +       return error;
17140 +}
17141 +
17142 +/**
17143 + * gfs_unmount_lockproto - Unmount lock protocol
17144 + * @sdp: The GFS superblock
17145 + *
17146 + */
17147 +
17148 +void
17149 +gfs_unmount_lockproto(struct gfs_sbd *sdp)
17150 +{
17151 +       lm_unmount(&sdp->sd_lockstruct);
17152 +}
17153 diff -urN linux-orig/fs/gfs/locking.h linux-patched/fs/gfs/locking.h
17154 --- linux-orig/fs/gfs/locking.h 1969-12-31 18:00:00.000000000 -0600
17155 +++ linux-patched/fs/gfs/locking.h      2004-06-30 13:27:49.345710666 -0500
17156 @@ -0,0 +1,20 @@
17157 +/******************************************************************************
17158 +*******************************************************************************
17159 +**
17160 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
17161 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
17162 +**
17163 +**  This copyrighted material is made available to anyone wishing to use,
17164 +**  modify, copy, or redistribute it subject to the terms and conditions
17165 +**  of the GNU General Public License v.2.
17166 +**
17167 +*******************************************************************************
17168 +******************************************************************************/
17169 +
17170 +#ifndef __LOCKING_DOT_H__
17171 +#define __LOCKING_DOT_H__
17172 +
17173 +int gfs_mount_lockproto(struct gfs_sbd *sdp, int silent);
17174 +void gfs_unmount_lockproto(struct gfs_sbd *sdp);
17175 +
17176 +#endif /* __LOCKING_DOT_H__ */
17177 diff -urN linux-orig/fs/gfs/log.c linux-patched/fs/gfs/log.c
17178 --- linux-orig/fs/gfs/log.c     1969-12-31 18:00:00.000000000 -0600
17179 +++ linux-patched/fs/gfs/log.c  2004-06-30 13:27:49.346710434 -0500
17180 @@ -0,0 +1,1315 @@
17181 +/******************************************************************************
17182 +*******************************************************************************
17183 +**
17184 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
17185 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
17186 +**
17187 +**  This copyrighted material is made available to anyone wishing to use,
17188 +**  modify, copy, or redistribute it subject to the terms and conditions
17189 +**  of the GNU General Public License v.2.
17190 +**
17191 +*******************************************************************************
17192 +******************************************************************************/
17193 +
17194 +/*
17195 +          What rolls down stairs
17196 +             Alone or in pairs
17197 +      Rolls over your neighbor's dog.
17198 +         What's great for a snack
17199 +           And fits on your back
17200 +             It's log, log, log!
17201 +             It's lo-og, lo-og,
17202 +       It's big, it's heavy, it's wood.
17203 +             It's lo-og, lo-og,
17204 +       It's better than bad, it's good.
17205 +           Everyone wants a log,
17206 +         You're gonna love it, log
17207 +         Come on and get your log,
17208 +           Everyone needs a log...
17209 +            LOG... FROM BLAMMO!
17210 +
17211 +                     -- The Ren and Stimpy Show
17212 +*/
17213 +
17214 +#include <linux/sched.h>
17215 +#include <linux/slab.h>
17216 +#include <linux/smp_lock.h>
17217 +#include <linux/spinlock.h>
17218 +#include <asm/semaphore.h>
17219 +#include <linux/completion.h>
17220 +#include <linux/buffer_head.h>
17221 +
17222 +#include "gfs.h"
17223 +#include "dio.h"
17224 +#include "log.h"
17225 +#include "lops.h"
17226 +
17227 +/**
17228 + * gfs_struct2blk - compute stuff
17229 + * @sdp: the filesystem
17230 + * @nstruct: the number of structures
17231 + * @ssize: the size of the structures
17232 + *
17233 + * Compute the number of log descriptor blocks needed to hold a certain number
17234 + * of structures of a certain size.
17235 + *
17236 + * Returns: the number of blocks needed
17237 + */
17238 +
17239 +unsigned int
17240 +gfs_struct2blk(struct gfs_sbd *sdp, unsigned int nstruct, unsigned int ssize)
17241 +{
17242 +       unsigned int blks;
17243 +       unsigned int first, second;
17244 +
17245 +       blks = 1;
17246 +       first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_log_descriptor)) / ssize;
17247 +
17248 +       if (nstruct > first) {
17249 +               second = sdp->sd_sb.sb_bsize / ssize;
17250 +               blks += DIV_RU(nstruct - first, second);
17251 +       }
17252 +
17253 +       return blks;
17254 +}
17255 +
17256 +/**
17257 + * gfs_blk2seg - Convert number of blocks into number of segments
17258 + * @sdp: The GFS superblock
17259 + * @blocks: The number of blocks
17260 + *
17261 + * Returns: The number of journal segments
17262 + */
17263 +
17264 +unsigned int
17265 +gfs_blk2seg(struct gfs_sbd *sdp, unsigned int blocks)
17266 +{
17267 +       return DIV_RU(blocks, sdp->sd_sb.sb_seg_size - 1);
17268 +}
17269 +
17270 +/**
17271 + * log_distance - Compute distance between two journal blocks
17272 + * @sdp: The GFS superblock
17273 + * @newer: The most recent journal block of the pair
17274 + * @older: The older journal block of the pair
17275 + *
17276 + *   Compute the distance (in the journal direction) between two
17277 + *   blocks in the journal
17278 + *
17279 + * Returns: the distance in blocks
17280 + */
17281 +
17282 +static __inline__ unsigned int
17283 +log_distance(struct gfs_sbd *sdp, uint64_t newer, uint64_t older)
17284 +{
17285 +       int64_t dist;
17286 +
17287 +       dist = newer - older;
17288 +       if (dist < 0)
17289 +               dist += sdp->sd_jdesc.ji_nsegment * sdp->sd_sb.sb_seg_size;
17290 +
17291 +       return dist;
17292 +}
17293 +
17294 +/**
17295 + * log_incr_head - Increment journal head
17296 + * @sdp: The GFS superblock
17297 + * @head: the variable holding the head of the journal
17298 + *
17299 + * Increment journal head by one.
17300 + * At the end of the journal, wrap head back to the start.
17301 + *
17302 + */
17303 +
17304 +static __inline__ void
17305 +log_incr_head(struct gfs_sbd *sdp, uint64_t * head)
17306 +{
17307 +       struct gfs_jindex *jdesc = &sdp->sd_jdesc;
17308 +
17309 +       if (++*head ==
17310 +           jdesc->ji_addr + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size)
17311 +               *head = jdesc->ji_addr;
17312 +}
17313 +
17314 +/**
17315 + * gfs_ail_start - Start I/O on the AIL
17316 + * @sdp: the filesystem
17317 + * @flags:
17318 + *
17319 + */
17320 +
17321 +void
17322 +gfs_ail_start(struct gfs_sbd *sdp, int flags)
17323 +{
17324 +       struct list_head *head = &sdp->sd_log_ail;
17325 +       struct list_head *first, *tmp;
17326 +       struct gfs_trans *first_tr, *tr;
17327 +
17328 +       gfs_log_lock(sdp);
17329 +
17330 +       if (list_empty(head)) {
17331 +               gfs_log_unlock(sdp);
17332 +               return;
17333 +       }
17334 +
17335 +       first = head->prev;
17336 +       first_tr = list_entry(first, struct gfs_trans, tr_list);
17337 +       gfs_ail_start_trans(sdp, first_tr);
17338 +
17339 +       if (flags & DIO_ALL)
17340 +               first_tr = NULL;
17341 +
17342 +       for (tmp = first->prev; tmp != head; tmp = tmp->prev) {
17343 +               if (first_tr && gfs_ail_empty_trans(sdp, first_tr))
17344 +                       break;
17345 +
17346 +               tr = list_entry(tmp, struct gfs_trans, tr_list);
17347 +               gfs_ail_start_trans(sdp, tr);
17348 +       }
17349 +
17350 +       gfs_log_unlock(sdp);
17351 +}
17352 +
17353 +/**
17354 + * current_tail - Find block number of current log tail
17355 + * @sdp: The GFS superblock
17356 + *
17357 + * Find the block number of the current tail of the log.
17358 + * Assumes that the log lock is held.
17359 + *
17360 + * Returns: The tail's block number
17361 + */
17362 +
17363 +static uint64_t
17364 +current_tail(struct gfs_sbd *sdp)
17365 +{
17366 +       struct gfs_trans *tr;
17367 +       uint64_t tail;
17368 +
17369 +       if (list_empty(&sdp->sd_log_ail)) {
17370 +               tail = sdp->sd_log_head;
17371 +
17372 +               if (!gfs_log_is_header(sdp, tail)) {
17373 +                       tail--;
17374 +                       GFS_ASSERT_SBD(gfs_log_is_header(sdp, tail), sdp,);
17375 +               }
17376 +       } else {
17377 +               tr = list_entry(sdp->sd_log_ail.prev,
17378 +                               struct gfs_trans, tr_list);
17379 +               tail = tr->tr_first_head;
17380 +       }
17381 +
17382 +       return tail;
17383 +}
17384 +
17385 +/**
17386 + * gfs_ail_empty - move the tail of the log forward (if possible)
17387 + * @sdp: the filesystem
17388 + *
17389 + * Returns: TRUE if the AIL is empty
17390 + */
17391 +
17392 +int
17393 +gfs_ail_empty(struct gfs_sbd *sdp)
17394 +{
17395 +       struct list_head *head, *tmp, *prev;
17396 +       struct gfs_trans *tr;
17397 +       uint64_t oldtail, newtail;
17398 +       unsigned int dist;
17399 +       unsigned int segments;
17400 +       int ret;
17401 +
17402 +       gfs_log_lock(sdp);
17403 +
17404 +       oldtail = current_tail(sdp);
17405 +
17406 +       for (head = &sdp->sd_log_ail, tmp = head->prev, prev = tmp->prev;
17407 +            tmp != head;
17408 +            tmp = prev, prev = tmp->prev) {
17409 +               tr = list_entry(tmp, struct gfs_trans, tr_list);
17410 +
17411 +               if (gfs_ail_empty_trans(sdp, tr)) {
17412 +                       list_del(&tr->tr_list);
17413 +                       kfree(tr);
17414 +               }
17415 +       }
17416 +
17417 +       newtail = current_tail(sdp);
17418 +
17419 +       if (oldtail != newtail) {
17420 +               dist = log_distance(sdp, newtail, oldtail);
17421 +
17422 +               segments = dist / sdp->sd_sb.sb_seg_size;
17423 +               GFS_ASSERT_SBD(segments * sdp->sd_sb.sb_seg_size == dist, sdp,);
17424 +
17425 +               spin_lock(&sdp->sd_log_seg_lock);
17426 +               sdp->sd_log_seg_free += segments;
17427 +               GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
17428 +                              sdp,);
17429 +               spin_unlock(&sdp->sd_log_seg_lock);
17430 +       }
17431 +
17432 +       ret = list_empty(head);
17433 +
17434 +       gfs_log_unlock(sdp);
17435 +
17436 +       return ret;
17437 +}
17438 +
17439 +/**
17440 + * gfs_log_reserve - Make a log reservation
17441 + * @sdp: The GFS superblock
17442 + * @segments: The number of segments to reserve
17443 + * @jump_queue: if TRUE, don't care about fairness ordering
17444 + *
17445 + * Returns:  0 on success, -EXXX on failure
17446 + */
17447 +
17448 +int
17449 +gfs_log_reserve(struct gfs_sbd *sdp, unsigned int segments, int jump_queue)
17450 +{
17451 +       unsigned long start;
17452 +       struct list_head list;
17453 +       unsigned int try = 0;
17454 +
17455 +       GFS_ASSERT_SBD(segments, sdp,);
17456 +
17457 +       if (segments >= sdp->sd_jdesc.ji_nsegment) {
17458 +               printk("GFS: fsid=%s: error reserving log space (%u, %u)\n",
17459 +                      sdp->sd_fsname, segments, sdp->sd_jdesc.ji_nsegment);
17460 +               return -EINVAL;
17461 +       }
17462 +
17463 +       INIT_LIST_HEAD(&list);
17464 +       start = jiffies;
17465 +
17466 +       for (;;) {
17467 +               spin_lock(&sdp->sd_log_seg_lock);
17468 +
17469 +               if (list_empty(&list)) {
17470 +                       if (jump_queue)
17471 +                               list_add(&list, &sdp->sd_log_seg_list);
17472 +                       else {
17473 +                               list_add_tail(&list, &sdp->sd_log_seg_list);
17474 +                               while (sdp->sd_log_seg_list.next != &list) {
17475 +                                       DECLARE_WAITQUEUE(__wait_chan, current);
17476 +                                       current->state = TASK_UNINTERRUPTIBLE;
17477 +                                       add_wait_queue(&sdp->sd_log_seg_wait,
17478 +                                                      &__wait_chan);
17479 +                                       spin_unlock(&sdp->sd_log_seg_lock);
17480 +                                       schedule();
17481 +                                       spin_lock(&sdp->sd_log_seg_lock);
17482 +                                       remove_wait_queue(&sdp->sd_log_seg_wait,
17483 +                                                         &__wait_chan);
17484 +                                       current->state = TASK_RUNNING;
17485 +                               }
17486 +                       }
17487 +               }
17488 +
17489 +               if (sdp->sd_log_seg_free >= segments) {
17490 +                       sdp->sd_log_seg_free -= segments;
17491 +                       list_del(&list);
17492 +                       spin_unlock(&sdp->sd_log_seg_lock);
17493 +                       wake_up(&sdp->sd_log_seg_wait);
17494 +                       break;
17495 +               }
17496 +
17497 +               spin_unlock(&sdp->sd_log_seg_lock);
17498 +
17499 +               if (try) {
17500 +                       gfs_log_flush(sdp);
17501 +                       gfs_ail_start(sdp, 0);
17502 +               }
17503 +
17504 +               gfs_ail_empty(sdp);
17505 +
17506 +               try++;
17507 +               if (time_after_eq(jiffies, start + 60 * HZ))
17508 +                       printk("GFS: fsid=%s: pid %d can't make log reservation (asking for %u segments)\n",
17509 +                              sdp->sd_fsname, current->pid, segments);
17510 +               yield();
17511 +       }
17512 +
17513 +       return 0;
17514 +}
17515 +
17516 +/**
17517 + * gfs_log_release - Release a given number of log segments
17518 + * @sdp: The GFS superblock
17519 + * @segments: The number of segments
17520 + *
17521 + */
17522 +
17523 +void
17524 +gfs_log_release(struct gfs_sbd *sdp, unsigned int segments)
17525 +{
17526 +       spin_lock(&sdp->sd_log_seg_lock);
17527 +       sdp->sd_log_seg_free += segments;
17528 +       GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, sdp,);
17529 +       spin_unlock(&sdp->sd_log_seg_lock);
17530 +}
17531 +
17532 +/**
17533 + * log_get_header - Get the journal header buffer
17534 + * @sdp: The GFS superblock
17535 + * @tr: The transaction
17536 + * @next: TRUE is this is not a continuation of an existing transaction
17537 + *
17538 + * Returns: the log buffer
17539 + */
17540 +
17541 +static struct gfs_log_buf *
17542 +log_get_header(struct gfs_sbd *sdp, struct gfs_trans *tr, int next)
17543 +{
17544 +       struct gfs_log_buf *lb;
17545 +       struct list_head *bmem;
17546 +       struct gfs_log_header header;
17547 +
17548 +       GFS_ASSERT_SBD(gfs_log_is_header(sdp, tr->tr_log_head), sdp,);
17549 +
17550 +       GFS_ASSERT_SBD(tr->tr_num_free_bufs &&
17551 +                      !list_empty(&tr->tr_free_bufs), sdp,);
17552 +       lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list);
17553 +       list_del(&lb->lb_list);
17554 +       tr->tr_num_free_bufs--;
17555 +
17556 +       GFS_ASSERT_SBD(tr->tr_num_free_bmem &&
17557 +                      !list_empty(&tr->tr_free_bmem), sdp,);
17558 +       bmem = tr->tr_free_bmem.next;
17559 +       list_del(bmem);
17560 +       tr->tr_num_free_bmem--;
17561 +
17562 +       gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, (char *)bmem);
17563 +       memset(bmem, 0, sdp->sd_sb.sb_bsize);
17564 +
17565 +       memset(&header, 0, sizeof (header));
17566 +
17567 +       if (next) {
17568 +               header.lh_header.mh_magic = GFS_MAGIC;
17569 +               header.lh_header.mh_type = GFS_METATYPE_LH;
17570 +               header.lh_header.mh_format = GFS_FORMAT_LH;
17571 +               header.lh_first = tr->tr_log_head;
17572 +               header.lh_sequence = sdp->sd_sequence + 1;
17573 +               header.lh_tail = current_tail(sdp);
17574 +               header.lh_last_dump = sdp->sd_log_dump_last;
17575 +       } else {
17576 +               header.lh_header.mh_magic = GFS_MAGIC;
17577 +               header.lh_header.mh_type = GFS_METATYPE_LH;
17578 +               header.lh_header.mh_format = GFS_FORMAT_LH;
17579 +               header.lh_first = tr->tr_first_head;
17580 +               header.lh_sequence = sdp->sd_sequence;
17581 +               header.lh_tail = current_tail(sdp);
17582 +               header.lh_last_dump = sdp->sd_log_dump_last;
17583 +
17584 +               list_add(&lb->lb_list, &tr->tr_bufs);
17585 +       }
17586 +
17587 +       gfs_log_header_out(&header, lb->lb_bh.b_data);
17588 +       gfs_log_header_out(&header,
17589 +                          lb->lb_bh.b_data + GFS_BASIC_BLOCK -
17590 +                          sizeof(struct gfs_log_header));
17591 +
17592 +       log_incr_head(sdp, &tr->tr_log_head);
17593 +
17594 +       return lb;
17595 +}
17596 +
17597 +/**
17598 + * gfs_log_get_buf - Get a buffer to use for control data
17599 + * @sdp: The GFS superblock
17600 + * @tr: The GFS transaction
17601 + *
17602 + * Generate a regular buffer for use in the journal as control data.
17603 + *
17604 + * Returns: the buffer
17605 + */
17606 +
17607 +struct gfs_log_buf *
17608 +gfs_log_get_buf(struct gfs_sbd *sdp, struct gfs_trans *tr)
17609 +{
17610 +       struct gfs_log_buf *lb;
17611 +       struct list_head *bmem;
17612 +
17613 +       if (gfs_log_is_header(sdp, tr->tr_log_head))
17614 +               log_get_header(sdp, tr, FALSE);
17615 +
17616 +       GFS_ASSERT_SBD(tr->tr_num_free_bufs &&
17617 +                      !list_empty(&tr->tr_free_bufs), sdp,);
17618 +       lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list);
17619 +       list_del(&lb->lb_list);
17620 +       tr->tr_num_free_bufs--;
17621 +
17622 +       GFS_ASSERT_SBD(tr->tr_num_free_bmem
17623 +                      && !list_empty(&tr->tr_free_bmem), sdp,);
17624 +       bmem = tr->tr_free_bmem.next;
17625 +       list_del(bmem);
17626 +       tr->tr_num_free_bmem--;
17627 +
17628 +       gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, (char *)bmem);
17629 +       memset(bmem, 0, sdp->sd_sb.sb_bsize);
17630 +
17631 +       list_add(&lb->lb_list, &tr->tr_bufs);
17632 +
17633 +       log_incr_head(sdp, &tr->tr_log_head);
17634 +
17635 +       return lb;
17636 +}
17637 +
17638 +/**
17639 + * gfs_log_fake_buf - Build a fake buffer head
17640 + * @sdp: the filesystem
17641 + * @tr: the transaction this is part of
17642 + * @data: the data the buffer should point to
17643 + * @unlock: a buffer that is unlocked as this struct gfs_log_buf is torn down
17644 + *
17645 + */
17646 +
17647 +void
17648 +gfs_log_fake_buf(struct gfs_sbd *sdp, struct gfs_trans *tr, char *data,
17649 +                struct buffer_head *unlock)
17650 +{
17651 +       struct gfs_log_buf *lb;
17652 +
17653 +       if (gfs_log_is_header(sdp, tr->tr_log_head))
17654 +               log_get_header(sdp, tr, FALSE);
17655 +
17656 +       GFS_ASSERT_SBD(tr->tr_num_free_bufs &&
17657 +                      !list_empty(&tr->tr_free_bufs), sdp,);
17658 +       lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list);
17659 +       list_del(&lb->lb_list);
17660 +       tr->tr_num_free_bufs--;
17661 +
17662 +       gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, data);
17663 +       lb->lb_unlock = unlock;
17664 +
17665 +       list_add(&lb->lb_list, &tr->tr_bufs);
17666 +
17667 +       log_incr_head(sdp, &tr->tr_log_head);
17668 +}
17669 +
17670 +/**
17671 + * check_seg_usage - Check that we didn't use too many segments
17672 + * @sdp: The GFS superblock
17673 + * @tr: The transaction
17674 + *
17675 + * Also, make sure we don't write ever get to a point where there are
17676 + * no dumps in the log (corrupting the log).  Panic before we let
17677 + * that happen.
17678 + *
17679 + */
17680 +
17681 +static void
17682 +check_seg_usage(struct gfs_sbd *sdp, struct gfs_trans *tr)
17683 +{
17684 +       struct gfs_jindex *jdesc = &sdp->sd_jdesc;
17685 +       unsigned int dist;
17686 +       unsigned int segments;
17687 +       uint64_t head_off, head_wrap;
17688 +       uint64_t dump_off, dump_wrap;
17689 +
17690 +       dist = log_distance(sdp, tr->tr_log_head, tr->tr_first_head);
17691 +
17692 +       segments = dist / sdp->sd_sb.sb_seg_size;
17693 +       GFS_ASSERT_SBD(segments * sdp->sd_sb.sb_seg_size == dist, sdp,);
17694 +       GFS_ASSERT_SBD(segments == tr->tr_seg_reserved, sdp,);
17695 +
17696 +       if (sdp->sd_log_dump_last) {
17697 +               head_off = tr->tr_first_head +
17698 +                       tr->tr_seg_reserved * sdp->sd_sb.sb_seg_size;
17699 +               head_wrap = sdp->sd_log_wrap;
17700 +               if (head_off >= jdesc->ji_addr +
17701 +                   jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size) {
17702 +                       head_off -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size;
17703 +                       head_wrap++;
17704 +               }
17705 +
17706 +               dump_off = sdp->sd_log_dump_last;
17707 +               dump_wrap = sdp->sd_log_dump_last_wrap;
17708 +
17709 +               switch (head_wrap - dump_wrap) {
17710 +               case 0:
17711 +                       break;
17712 +
17713 +               case 1:
17714 +                       if (head_off < dump_off)
17715 +                               break;
17716 +                       else if (head_off == dump_off &&
17717 +                                (tr->tr_flags & TRF_LOG_DUMP))
17718 +                               break;
17719 +
17720 +               default:
17721 +                       GFS_ASSERT_SBD(FALSE, sdp,
17722 +                                      printk("head_off = %"PRIu64", head_wrap = %"PRIu64"\n",
17723 +                                             head_off, head_wrap);
17724 +                                      printk("dump_off = %"PRIu64", dump_wrap = %"PRIu64"\n",
17725 +                                             dump_off, dump_wrap););
17726 +                       break;
17727 +               }
17728 +       }
17729 +}
17730 +
17731 +/**
17732 + * log_free_buf - Free a struct gfs_log_buf (and possibly the data it points to)
17733 + * @sdp: the filesystem
17734 + * @lb: the log buffer
17735 + *
17736 + */
17737 +
17738 +static void
17739 +log_free_buf(struct gfs_sbd *sdp, struct gfs_log_buf *lb)
17740 +{
17741 +       char *bmem;
17742 +
17743 +       bmem = lb->lb_bh.b_data;
17744 +       gfs_logbh_uninit(sdp, &lb->lb_bh);
17745 +
17746 +       if (lb->lb_unlock)
17747 +               gfs_unlock_buffer(lb->lb_unlock);
17748 +       else
17749 +               kfree(bmem);
17750 +
17751 +       kfree(lb);
17752 +}
17753 +
17754 +/**
17755 + * sync_trans - Add "last" descriptor to transaction and sync to disk
17756 + * @sdp: The GFS superblock
17757 + * @tr: The transaction
17758 + *
17759 + * Add the "last" descriptor on to the end of the current transaction
17760 + * and sync it out to disk.  Don't commit it yet, though.
17761 + *
17762 + * Returns: 0 on success, -EXXX on failure
17763 + */
17764 +
17765 +static int
17766 +sync_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
17767 +{
17768 +       struct list_head *tmp, *head, *prev;
17769 +       struct gfs_log_descriptor desc;
17770 +       struct gfs_log_buf *lb;
17771 +       uint64_t blk;
17772 +       int error = 0, e;
17773 +
17774 +       /*  Build LAST descriptor  */
17775 +
17776 +       lb = gfs_log_get_buf(sdp, tr);
17777 +
17778 +       memset(&desc, 0, sizeof(struct gfs_log_descriptor));
17779 +       desc.ld_header.mh_magic = GFS_MAGIC;
17780 +       desc.ld_header.mh_type = GFS_METATYPE_LD;
17781 +       desc.ld_header.mh_format = GFS_FORMAT_LD;
17782 +       desc.ld_type = GFS_LOG_DESC_LAST;
17783 +       desc.ld_length = 1;
17784 +       for (blk = tr->tr_log_head; !gfs_log_is_header(sdp, blk); blk++)
17785 +               desc.ld_length++;
17786 +       gfs_desc_out(&desc, lb->lb_bh.b_data);
17787 +
17788 +       while (!gfs_log_is_header(sdp, tr->tr_log_head))
17789 +               log_incr_head(sdp, &tr->tr_log_head);
17790 +
17791 +       check_seg_usage(sdp, tr);
17792 +
17793 +       /* Start I/O
17794 +          Go in "prev" direction to start the I/O in order. */
17795 +
17796 +       for (head = &tr->tr_bufs, tmp = head->prev, prev = tmp->prev;
17797 +            tmp != head;
17798 +            tmp = prev, prev = tmp->prev) {
17799 +               lb = list_entry(tmp, struct gfs_log_buf, lb_list);
17800 +
17801 +               if (error) {
17802 +                       list_del(&lb->lb_list);
17803 +                       log_free_buf(sdp, lb);
17804 +               } else {
17805 +                       e = gfs_logbh_start(sdp, &lb->lb_bh);
17806 +                       if (e) {
17807 +                               list_del(&lb->lb_list);
17808 +                               log_free_buf(sdp, lb);
17809 +                               error = e;
17810 +                       }
17811 +               }
17812 +       }
17813 +
17814 +       /* Wait on I/O
17815 +          Go in "next" direction to minimize sleeps/wakeups. */
17816 +
17817 +       while (!list_empty(&tr->tr_bufs)) {
17818 +               lb = list_entry(tr->tr_bufs.next, struct gfs_log_buf, lb_list);
17819 +
17820 +               e = gfs_logbh_wait(sdp, &lb->lb_bh);
17821 +               if (e)
17822 +                       error = e;
17823 +
17824 +               list_del(&lb->lb_list);
17825 +               log_free_buf(sdp, lb);
17826 +       }
17827 +
17828 +       return error;
17829 +}
17830 +
17831 +/**
17832 + * commit_trans - Commit the current transaction
17833 + * @sdp: The GFS superblock
17834 + * @tr: The transaction
17835 + *
17836 + * Write next header to commit
17837 + *
17838 + * Returns: 0 on success, -EXXX on failure
17839 + */
17840 +
17841 +static int
17842 +commit_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
17843 +{
17844 +       struct gfs_log_buf *lb;
17845 +       int error;
17846 +
17847 +       lb = log_get_header(sdp, tr, TRUE);
17848 +
17849 +       error = gfs_logbh_start(sdp, &lb->lb_bh);
17850 +       if (!error)
17851 +               error = gfs_logbh_wait(sdp, &lb->lb_bh);
17852 +
17853 +       log_free_buf(sdp, lb);
17854 +
17855 +       return error;
17856 +}
17857 +
17858 +/**
17859 + * disk_commit - Write a transaction to the on-disk journal
17860 + * @sdp: The GFS superblock
17861 + * @tr: The transaction
17862 + *
17863 + * Returns: 0 on success, -EXXX on failure
17864 + */
17865 +
17866 +static int
17867 +disk_commit(struct gfs_sbd *sdp, struct gfs_trans *tr)
17868 +{
17869 +       uint64_t last_dump, last_dump_wrap;
17870 +       int error = 0;
17871 +
17872 +       GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,);
17873 +       tr->tr_log_head = sdp->sd_log_head;
17874 +       tr->tr_first_head = tr->tr_log_head - 1;
17875 +       GFS_ASSERT_SBD(gfs_log_is_header(sdp, tr->tr_first_head), sdp,);
17876 +
17877 +       LO_BUILD_BHLIST(sdp, tr);
17878 +
17879 +       GFS_ASSERT_SBD(!list_empty(&tr->tr_bufs), sdp,);
17880 +
17881 +       error = sync_trans(sdp, tr);
17882 +       if (error) {
17883 +               /* Eat unusable commit buffer */
17884 +               log_free_buf(sdp, log_get_header(sdp, tr, TRUE));
17885 +               goto out;
17886 +       }
17887 +
17888 +       if (tr->tr_flags & TRF_LOG_DUMP) {
17889 +               /* This commit header should point to the log dump we're
17890 +                  commiting as the current one.  But save the copy of the
17891 +                  old one in case we have problems commiting the dump. */
17892 +
17893 +               last_dump = sdp->sd_log_dump_last;
17894 +               last_dump_wrap = sdp->sd_log_dump_last_wrap;
17895 +
17896 +               sdp->sd_log_dump_last = tr->tr_first_head;
17897 +               sdp->sd_log_dump_last_wrap = sdp->sd_log_wrap;
17898 +
17899 +               error = commit_trans(sdp, tr);
17900 +               if (error) {
17901 +                       sdp->sd_log_dump_last = last_dump;
17902 +                       sdp->sd_log_dump_last_wrap = last_dump_wrap;
17903 +                       goto out;
17904 +               }
17905 +       } else {
17906 +               error = commit_trans(sdp, tr);
17907 +               if (error)
17908 +                       goto out;
17909 +       }
17910 +
17911 +       if (sdp->sd_log_head > tr->tr_log_head)
17912 +               sdp->sd_log_wrap++;
17913 +       sdp->sd_log_head = tr->tr_log_head;
17914 +       sdp->sd_sequence++;
17915 +
17916 + out:
17917 +       GFS_ASSERT_SBD(!tr->tr_num_free_bufs &&
17918 +                      list_empty(&tr->tr_free_bufs), sdp,);
17919 +       GFS_ASSERT_SBD(!tr->tr_num_free_bmem &&
17920 +                      list_empty(&tr->tr_free_bmem), sdp,);
17921 +
17922 +       return error;
17923 +}
17924 +
17925 +/**
17926 + * add_trans_to_ail - Add a ondisk commited transaction to the AIL
17927 + * @sdp: the filesystem
17928 + * @tr: the transaction
17929 + *
17930 + */
17931 +
17932 +static void
17933 +add_trans_to_ail(struct gfs_sbd *sdp, struct gfs_trans *tr)
17934 +{
17935 +       struct gfs_log_element *le;
17936 +
17937 +       while (!list_empty(&tr->tr_elements)) {
17938 +               le = list_entry(tr->tr_elements.next,
17939 +                               struct gfs_log_element, le_list);
17940 +               LO_ADD_TO_AIL(sdp, le);
17941 +       }
17942 +
17943 +       list_add(&tr->tr_list, &sdp->sd_log_ail);
17944 +}
17945 +
17946 +/**
17947 + * log_refund - Refund log segments to the free pool
17948 + * @sdp: The GFS superblock
17949 + * @tr: The tranaction to examine
17950 + *
17951 + * Look at the number of segments reserved for this transaction and the
17952 + * number of segments actually needed for it.  If they aren't the
17953 + * same, refund the difference to the free segment pool.
17954 + *
17955 + * Called with the log lock held
17956 + */
17957 +
17958 +static void
17959 +log_refund(struct gfs_sbd *sdp, struct gfs_trans *tr)
17960 +{
17961 +       struct gfs_log_buf *lb;
17962 +       struct list_head *bmem;
17963 +       unsigned int num_bufs = 0, num_bmem = 0;
17964 +       unsigned int segments;
17965 +
17966 +       LO_TRANS_SIZE(sdp, tr, NULL, NULL, &num_bufs, &num_bmem);
17967 +
17968 +       segments = gfs_blk2seg(sdp, num_bufs + 1);
17969 +       num_bufs += segments + 1;
17970 +       num_bmem += segments + 1;
17971 +
17972 +       if (tr->tr_seg_reserved > segments) {
17973 +               spin_lock(&sdp->sd_log_seg_lock);
17974 +               sdp->sd_log_seg_free += tr->tr_seg_reserved - segments;
17975 +               GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
17976 +                              sdp,);
17977 +               spin_unlock(&sdp->sd_log_seg_lock);
17978 +
17979 +               tr->tr_seg_reserved = segments;
17980 +       } else
17981 +               GFS_ASSERT_SBD(tr->tr_seg_reserved == segments, sdp,);
17982 +
17983 +       GFS_ASSERT_SBD(tr->tr_num_free_bufs >= num_bufs, sdp,);
17984 +       while (tr->tr_num_free_bufs > num_bufs) {
17985 +               lb = list_entry(tr->tr_free_bufs.next,
17986 +                               struct gfs_log_buf, lb_list);
17987 +               list_del(&lb->lb_list);
17988 +               kfree(lb);
17989 +               tr->tr_num_free_bufs--;
17990 +       }
17991 +
17992 +       GFS_ASSERT_SBD(tr->tr_num_free_bmem >= num_bmem, sdp,);
17993 +       while (tr->tr_num_free_bmem > num_bmem) {
17994 +               bmem = tr->tr_free_bmem.next;
17995 +               list_del(bmem);
17996 +               kfree(bmem);
17997 +               tr->tr_num_free_bmem--;
17998 +       }
17999 +}
18000 +
18001 +/**
18002 + * trans_combine - combine two transactions
18003 + * @sdp: the filesystem
18004 + * @tr: the surviving transaction
18005 + * @new_tr: the transaction that gets freed
18006 + *
18007 + * Assumes that the two transactions are independent.
18008 + */
18009 +
18010 +static void
18011 +trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
18012 +             struct gfs_trans *new_tr)
18013 +{
18014 +       struct gfs_log_element *le;
18015 +       struct gfs_log_buf *lb;
18016 +       struct list_head *bmem;
18017 +
18018 +       tr->tr_file = __FILE__;
18019 +       tr->tr_line = __LINE__;
18020 +       tr->tr_seg_reserved += new_tr->tr_seg_reserved;
18021 +       tr->tr_flags |= new_tr->tr_flags;
18022 +       tr->tr_num_free_bufs += new_tr->tr_num_free_bufs;
18023 +       tr->tr_num_free_bmem += new_tr->tr_num_free_bmem;
18024 +
18025 +       /*  Combine the elements of the two transactions  */
18026 +
18027 +       while (!list_empty(&new_tr->tr_elements)) {
18028 +               le = list_entry(new_tr->tr_elements.next,
18029 +                               struct gfs_log_element, le_list);
18030 +               GFS_ASSERT_SBD(le->le_trans == new_tr, sdp,);
18031 +               le->le_trans = tr;
18032 +               list_move(&le->le_list, &tr->tr_elements);
18033 +       }
18034 +
18035 +       LO_TRANS_COMBINE(sdp, tr, new_tr);
18036 +
18037 +       while (!list_empty(&new_tr->tr_free_bufs)) {
18038 +               lb = list_entry(new_tr->tr_free_bufs.next,
18039 +                               struct gfs_log_buf, lb_list);
18040 +               list_move(&lb->lb_list, &tr->tr_free_bufs);
18041 +               new_tr->tr_num_free_bufs--;
18042 +       }
18043 +       while (!list_empty(&new_tr->tr_free_bmem)) {
18044 +               bmem = new_tr->tr_free_bmem.next;
18045 +               list_move(bmem, &tr->tr_free_bmem);
18046 +               new_tr->tr_num_free_bmem--;
18047 +       }
18048 +
18049 +       GFS_ASSERT_SBD(!new_tr->tr_num_free_bufs, sdp,);
18050 +       GFS_ASSERT_SBD(!new_tr->tr_num_free_bmem, sdp,);
18051 +
18052 +       kfree(new_tr);
18053 +}
18054 +
18055 +/**
18056 + * log_flush_internal - flush incore transactions
18057 + * @sdp: the filesystem
18058 + * @gl: The glock structure to flush.  If NULL, flush the whole incore log
18059 + *
18060 + */
18061 +
18062 +static void
18063 +log_flush_internal(struct gfs_sbd *sdp, struct gfs_glock *gl)
18064 +{
18065 +       struct gfs_trans *trans = NULL, *tr;
18066 +       int error;
18067 +
18068 +       gfs_log_lock(sdp);
18069 +
18070 +       if (list_empty(&sdp->sd_log_incore))
18071 +               goto out;
18072 +
18073 +       if (gl) {
18074 +               if (!gl->gl_incore_le.le_trans)
18075 +                       goto out;
18076 +
18077 +               trans = gl->gl_incore_le.le_trans;
18078 +
18079 +               list_del(&trans->tr_list);
18080 +       } else {
18081 +               while (!list_empty(&sdp->sd_log_incore)) {
18082 +                       tr = list_entry(sdp->sd_log_incore.next,
18083 +                                       struct gfs_trans, tr_list);
18084 +
18085 +                       list_del(&tr->tr_list);
18086 +
18087 +                       if (trans)
18088 +                               trans_combine(sdp, trans, tr);
18089 +                       else
18090 +                               trans = tr;
18091 +               }
18092 +       }
18093 +
18094 +       GFS_ASSERT_SBD(trans, sdp,);
18095 +
18096 +       log_refund(sdp, trans);
18097 +
18098 +       /*  Actually do the stuff to commit the transaction  */
18099 +
18100 +       error = disk_commit(sdp, trans);
18101 +       if (error)
18102 +               gfs_io_error(sdp);
18103 +
18104 +       add_trans_to_ail(sdp, trans);
18105 +
18106 +       if (log_distance(sdp, sdp->sd_log_head, sdp->sd_log_dump_last) * GFS_DUMPS_PER_LOG >=
18107 +           sdp->sd_jdesc.ji_nsegment * sdp->sd_sb.sb_seg_size)
18108 +               set_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags);
18109 +
18110 + out:
18111 +       if (list_empty(&sdp->sd_log_incore))
18112 +               sdp->sd_vfs->s_dirt = FALSE;
18113 +
18114 +       gfs_log_unlock(sdp);
18115 +
18116 +       /*  Dump if we need to.  */
18117 +
18118 +       if (test_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags))
18119 +               gfs_log_dump(sdp, FALSE);
18120 +}
18121 +
18122 +/**
18123 + * gfs_log_flush - flush the whole incore log
18124 + * @sdp: the filesystem
18125 + *
18126 + */
18127 +
18128 +void
18129 +gfs_log_flush(struct gfs_sbd *sdp)
18130 +{
18131 +       log_flush_internal(sdp, NULL);
18132 +}
18133 +
18134 +/**
18135 + * gfs_log_flush_glock - flush the incore log for a glock
18136 + * @gl: the glock
18137 + *
18138 + */
18139 +
18140 +void
18141 +gfs_log_flush_glock(struct gfs_glock *gl)
18142 +{
18143 +       log_flush_internal(gl->gl_sbd, gl);
18144 +}
18145 +
18146 +/**
18147 + * incore_commit - commit a transaction in-core
18148 + * @sdp: the filesystem
18149 + * @new_tr: the transaction to commit
18150 + *
18151 + * Add the transaction @new_tr to the end of the incore commit list.
18152 + * Pull up and merge an previously commited transactions that share
18153 + * locks.  Also pull up any rename transactions that need it.
18154 + */
18155 +
18156 +static void
18157 +incore_commit(struct gfs_sbd *sdp, struct gfs_trans *new_tr)
18158 +{
18159 +       struct gfs_log_element *le;
18160 +       struct gfs_trans *trans = NULL, *exist_tr;
18161 +       struct gfs_log_buf *lb;
18162 +       struct list_head *bmem;
18163 +       struct list_head *tmp, *head, *next;
18164 +
18165 +       for (head = &new_tr->tr_elements, tmp = head->next;
18166 +            tmp != head;
18167 +            tmp = tmp->next) {
18168 +               le = list_entry(tmp, struct gfs_log_element, le_list);
18169 +
18170 +               exist_tr = LO_OVERLAP_TRANS(sdp, le);
18171 +               if (!exist_tr)
18172 +                       continue;
18173 +
18174 +               if (exist_tr != trans) {
18175 +                       list_del(&exist_tr->tr_list);
18176 +                       if (trans)
18177 +                               trans_combine(sdp, trans, exist_tr);
18178 +                       else
18179 +                               trans = exist_tr;
18180 +               }
18181 +       }
18182 +
18183 +       if (trans) {
18184 +               trans->tr_file = __FILE__;
18185 +               trans->tr_line = __LINE__;
18186 +               trans->tr_seg_reserved += new_tr->tr_seg_reserved;
18187 +               trans->tr_flags |= new_tr->tr_flags;
18188 +               trans->tr_num_free_bufs += new_tr->tr_num_free_bufs;
18189 +               trans->tr_num_free_bmem += new_tr->tr_num_free_bmem;
18190 +
18191 +               while (!list_empty(&new_tr->tr_free_bufs)) {
18192 +                       lb = list_entry(new_tr->tr_free_bufs.next,
18193 +                                       struct gfs_log_buf, lb_list);
18194 +                       list_move(&lb->lb_list, &trans->tr_free_bufs);
18195 +                       new_tr->tr_num_free_bufs--;
18196 +               }
18197 +               while (!list_empty(&new_tr->tr_free_bmem)) {
18198 +                       bmem = new_tr->tr_free_bmem.next;
18199 +                       list_move(bmem, &trans->tr_free_bmem);
18200 +                       new_tr->tr_num_free_bmem--;
18201 +               }
18202 +       } else
18203 +               trans = new_tr;
18204 +
18205 +       for (head = &new_tr->tr_elements, tmp = head->next, next = tmp->next;
18206 +            tmp != head;
18207 +            tmp = next, next = next->next) {
18208 +               le = list_entry(tmp, struct gfs_log_element, le_list);
18209 +               LO_INCORE_COMMIT(sdp, trans, le);
18210 +       }
18211 +
18212 +       if (trans != new_tr) {
18213 +               GFS_ASSERT_SBD(!new_tr->tr_num_free_bufs, sdp,);
18214 +               GFS_ASSERT_SBD(!new_tr->tr_num_free_bmem, sdp,);
18215 +               GFS_ASSERT_SBD(list_empty(&new_tr->tr_elements), sdp,);
18216 +               kfree(new_tr);
18217 +       }
18218 +
18219 +       log_refund(sdp, trans);
18220 +
18221 +       list_add(&trans->tr_list, &sdp->sd_log_incore);
18222 +}
18223 +
18224 +/**
18225 + * gfs_log_commit - Commit a transaction to the log
18226 + * @sdp: the filesystem
18227 + * @tr: the transaction
18228 + *
18229 + * Returns: 0 on success, -EXXX on failure
18230 + */
18231 +
18232 +void
18233 +gfs_log_commit(struct gfs_sbd *sdp, struct gfs_trans *tr)
18234 +{
18235 +       struct gfs_log_buf *lb;
18236 +       struct list_head *bmem;
18237 +       unsigned int num_mblks = 0, num_eblks = 0, num_bufs = 0, num_bmem = 0;
18238 +       unsigned int segments;
18239 +
18240 +       LO_TRANS_SIZE(sdp, tr, &num_mblks, &num_eblks, &num_bufs, &num_bmem);
18241 +
18242 +       GFS_ASSERT_SBD(num_mblks <= tr->tr_mblks_asked &&
18243 +                      num_eblks <= tr->tr_eblks_asked, sdp,
18244 +                      printk("type = (%s, %u)\n",
18245 +                             tr->tr_file, tr->tr_line);
18246 +                      printk("num_mblks = %u, tr->tr_mblks_asked = %u\n",
18247 +                             num_mblks, tr->tr_mblks_asked);
18248 +                      printk("num_eblks = %u, tr->tr_eblks_asked = %u\n",
18249 +                             num_eblks, tr->tr_eblks_asked););
18250 +
18251 +       segments = gfs_blk2seg(sdp, num_bufs + 1);
18252 +       num_bufs += segments + 1;
18253 +       num_bmem += segments + 1;
18254 +
18255 +       while (num_bufs--) {
18256 +               lb = gmalloc(sizeof(struct gfs_log_buf));
18257 +               memset(lb, 0, sizeof(struct gfs_log_buf));
18258 +               list_add(&lb->lb_list, &tr->tr_free_bufs);
18259 +               tr->tr_num_free_bufs++;
18260 +       }
18261 +       while (num_bmem--) {
18262 +               bmem = gmalloc(sdp->sd_sb.sb_bsize);
18263 +               list_add(bmem, &tr->tr_free_bmem);
18264 +               tr->tr_num_free_bmem++;
18265 +       }
18266 +
18267 +       gfs_log_lock(sdp);
18268 +
18269 +       incore_commit(sdp, tr);
18270 +
18271 +       if (sdp->sd_log_buffers > sdp->sd_tune.gt_incore_log_blocks) {
18272 +               gfs_log_unlock(sdp);
18273 +               gfs_log_flush(sdp);
18274 +       } else {
18275 +               sdp->sd_vfs->s_dirt = TRUE;
18276 +               gfs_log_unlock(sdp);
18277 +       }
18278 +
18279 +}
18280 +
18281 +/**
18282 + * gfs_log_dump - make a Log Dump entry in the log
18283 + * @sdp: the filesystem
18284 + * @force: if TRUE, always make the dump even if one has been made recently
18285 + *
18286 + */
18287 +
18288 +void
18289 +gfs_log_dump(struct gfs_sbd *sdp, int force)
18290 +{
18291 +       struct gfs_log_element *le;
18292 +       struct gfs_trans tr;
18293 +       struct gfs_log_buf *lb;
18294 +       struct list_head *bmem;
18295 +       unsigned int num_bufs, num_bmem;
18296 +       unsigned int segments;
18297 +       int error;
18298 +
18299 +       if (test_and_set_bit(SDF_IN_LOG_DUMP, &sdp->sd_flags)) {
18300 +               GFS_ASSERT_SBD(!force, sdp,);
18301 +               return;
18302 +       }
18303 +
18304 +       memset(&tr, 0, sizeof(struct gfs_trans));
18305 +       INIT_LIST_HEAD(&tr.tr_elements);
18306 +       INIT_LIST_HEAD(&tr.tr_free_bufs);
18307 +       INIT_LIST_HEAD(&tr.tr_free_bmem);
18308 +       INIT_LIST_HEAD(&tr.tr_bufs);
18309 +       tr.tr_flags = TRF_LOG_DUMP;
18310 +       tr.tr_file = __FILE__;
18311 +       tr.tr_line = __LINE__;
18312 +
18313 +       for (;;) {
18314 +               gfs_log_lock(sdp);
18315 +
18316 +               if (!force && !test_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags))
18317 +                       goto out;
18318 +
18319 +               num_bufs = num_bmem = 0;
18320 +               LO_DUMP_SIZE(sdp, NULL, &num_bufs, &num_bmem);
18321 +               GFS_ASSERT_SBD(num_bufs, sdp,);
18322 +               segments = gfs_blk2seg(sdp, num_bufs + 1);
18323 +               num_bufs += segments + 1;
18324 +               num_bmem += segments + 1;
18325 +
18326 +               if (tr.tr_seg_reserved >= segments &&
18327 +                   tr.tr_num_free_bufs >= num_bufs &&
18328 +                   tr.tr_num_free_bmem >= num_bmem)
18329 +                       break;
18330 +
18331 +               gfs_log_unlock(sdp);
18332 +
18333 +               if (tr.tr_seg_reserved < segments) {
18334 +                       error = gfs_log_reserve(sdp,
18335 +                                               segments - tr.tr_seg_reserved,
18336 +                                               TRUE);
18337 +                       GFS_ASSERT_SBD(!error, sdp,);
18338 +                       tr.tr_seg_reserved = segments;
18339 +               }
18340 +               while (tr.tr_num_free_bufs < num_bufs) {
18341 +                       lb = gmalloc(sizeof(struct gfs_log_buf));
18342 +                       memset(lb, 0, sizeof(struct gfs_log_buf));
18343 +                       list_add(&lb->lb_list, &tr.tr_free_bufs);
18344 +                       tr.tr_num_free_bufs++;
18345 +               }
18346 +               while (tr.tr_num_free_bmem < num_bmem) {
18347 +                       bmem = gmalloc(sdp->sd_sb.sb_bsize);
18348 +                       list_add(bmem, &tr.tr_free_bmem);
18349 +                       tr.tr_num_free_bmem++;
18350 +               }
18351 +       }
18352 +
18353 +       if (tr.tr_seg_reserved > segments) {
18354 +               spin_lock(&sdp->sd_log_seg_lock);
18355 +               sdp->sd_log_seg_free += tr.tr_seg_reserved - segments;
18356 +               GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
18357 +                              sdp,);
18358 +               spin_unlock(&sdp->sd_log_seg_lock);
18359 +               tr.tr_seg_reserved = segments;
18360 +       }
18361 +       while (tr.tr_num_free_bufs > num_bufs) {
18362 +               lb = list_entry(tr.tr_free_bufs.next,
18363 +                               struct gfs_log_buf, lb_list);
18364 +               list_del(&lb->lb_list);
18365 +               kfree(lb);
18366 +               tr.tr_num_free_bufs--;
18367 +       }
18368 +       while (tr.tr_num_free_bmem > num_bmem) {
18369 +               bmem = tr.tr_free_bmem.next;
18370 +               list_del(bmem);
18371 +               kfree(bmem);
18372 +               tr.tr_num_free_bmem--;
18373 +       }
18374 +
18375 +       LO_BUILD_DUMP(sdp, &tr);
18376 +
18377 +       error = disk_commit(sdp, &tr);
18378 +       if (error)
18379 +               gfs_io_error(sdp);
18380 +
18381 +       while (!list_empty(&tr.tr_elements)) {
18382 +               le = list_entry(tr.tr_elements.next,
18383 +                               struct gfs_log_element, le_list);
18384 +               LO_CLEAN_DUMP(sdp, le);
18385 +       }
18386 +
18387 +       /* If there isn't anything the AIL, we won't get back the log
18388 +          space we reserved unless we do it ourselves. */
18389 +
18390 +       if (list_empty(&sdp->sd_log_ail)) {
18391 +               spin_lock(&sdp->sd_log_seg_lock);
18392 +               sdp->sd_log_seg_free += tr.tr_seg_reserved;
18393 +               GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
18394 +                              sdp,);
18395 +               spin_unlock(&sdp->sd_log_seg_lock);
18396 +       }
18397 +
18398 +       clear_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags);
18399 +
18400 + out:
18401 +       gfs_log_unlock(sdp);
18402 +       clear_bit(SDF_IN_LOG_DUMP, &sdp->sd_flags);
18403 +}
18404 +
18405 +/**
18406 + * gfs_log_shutdown - write a shutdown header into a journal
18407 + * @sdp: the filesystem
18408 + *
18409 + * Returns: 0 on success, -EXXX on failure
18410 + */
18411 +
18412 +int
18413 +gfs_log_shutdown(struct gfs_sbd *sdp)
18414 +{
18415 +       struct gfs_log_buf *lb;
18416 +       char *bmem;
18417 +       struct gfs_log_header head;
18418 +       struct gfs_log_descriptor desc;
18419 +       unsigned int elements = 0;
18420 +       int error;
18421 +
18422 +       lb = gmalloc(sizeof(struct gfs_log_buf));
18423 +       memset(lb, 0, sizeof(struct gfs_log_buf));
18424 +       bmem = gmalloc(sdp->sd_sb.sb_bsize);
18425 +
18426 +       gfs_log_lock(sdp);
18427 +
18428 +       GFS_ASSERT_SBD(list_empty(&sdp->sd_log_ail), sdp,);
18429 +       GFS_ASSERT_SBD(sdp->sd_log_seg_free == sdp->sd_jdesc.ji_nsegment - 1,
18430 +                      sdp,);
18431 +       GFS_ASSERT_SBD(!sdp->sd_log_buffers, sdp,);
18432 +       GFS_ASSERT_SBD(gfs_log_is_header(sdp, sdp->sd_log_head - 1), sdp,);
18433 +
18434 +       /*  Build a "last" log descriptor  */
18435 +
18436 +       memset(&desc, 0, sizeof(struct gfs_log_descriptor));
18437 +       desc.ld_header.mh_magic = GFS_MAGIC;
18438 +       desc.ld_header.mh_type = GFS_METATYPE_LD;
18439 +       desc.ld_header.mh_format = GFS_FORMAT_LD;
18440 +       desc.ld_type = GFS_LOG_DESC_LAST;
18441 +       desc.ld_length = sdp->sd_sb.sb_seg_size - 1;
18442 +
18443 +       /*  Write the descriptor  */
18444 +
18445 +       gfs_logbh_init(sdp, &lb->lb_bh, sdp->sd_log_head, bmem);
18446 +       memset(bmem, 0, sdp->sd_sb.sb_bsize);
18447 +       gfs_desc_out(&desc, lb->lb_bh.b_data);
18448 +       error = gfs_logbh_start(sdp, &lb->lb_bh);
18449 +       if (!error)
18450 +               error = gfs_logbh_wait(sdp, &lb->lb_bh);
18451 +       gfs_logbh_uninit(sdp, &lb->lb_bh);
18452 +
18453 +       if (error)
18454 +               goto out;
18455 +
18456 +       /*  Move to the next header  */
18457 +
18458 +       while (!gfs_log_is_header(sdp, sdp->sd_log_head))
18459 +               log_incr_head(sdp, &sdp->sd_log_head);
18460 +
18461 +       LO_DUMP_SIZE(sdp, &elements, NULL, NULL);
18462 +
18463 +       /*  Build the shutdown header  */
18464 +
18465 +       memset(&head, 0, sizeof (struct gfs_log_header));
18466 +       head.lh_header.mh_magic = GFS_MAGIC;
18467 +       head.lh_header.mh_type = GFS_METATYPE_LH;
18468 +       head.lh_header.mh_format = GFS_FORMAT_LH;
18469 +       head.lh_flags = GFS_LOG_HEAD_UNMOUNT;
18470 +       head.lh_first = sdp->sd_log_head;
18471 +       head.lh_sequence = sdp->sd_sequence + 1;
18472 +       /*  Don't care about tail  */
18473 +       head.lh_last_dump = (elements) ? sdp->sd_log_dump_last : 0;
18474 +
18475 +       /*  Write out the shutdown header  */
18476 +
18477 +       gfs_logbh_init(sdp, &lb->lb_bh, sdp->sd_log_head, bmem);
18478 +       memset(bmem, 0, sdp->sd_sb.sb_bsize);
18479 +       gfs_log_header_out(&head, lb->lb_bh.b_data);
18480 +       gfs_log_header_out(&head,
18481 +                          lb->lb_bh.b_data + GFS_BASIC_BLOCK -
18482 +                          sizeof(struct gfs_log_header));
18483 +       error = gfs_logbh_start(sdp, &lb->lb_bh);
18484 +       if (!error)
18485 +               error = gfs_logbh_wait(sdp, &lb->lb_bh);
18486 +       gfs_logbh_uninit(sdp, &lb->lb_bh);
18487 +
18488 +      out:
18489 +       gfs_log_unlock(sdp);
18490 +
18491 +       kfree(lb);
18492 +       kfree(bmem);
18493 +
18494 +       return error;
18495 +}
18496 diff -urN linux-orig/fs/gfs/log.h linux-patched/fs/gfs/log.h
18497 --- linux-orig/fs/gfs/log.h     1969-12-31 18:00:00.000000000 -0600
18498 +++ linux-patched/fs/gfs/log.h  2004-06-30 13:27:49.346710434 -0500
18499 @@ -0,0 +1,79 @@
18500 +/******************************************************************************
18501 +*******************************************************************************
18502 +**
18503 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
18504 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
18505 +**
18506 +**  This copyrighted material is made available to anyone wishing to use,
18507 +**  modify, copy, or redistribute it subject to the terms and conditions
18508 +**  of the GNU General Public License v.2.
18509 +**
18510 +*******************************************************************************
18511 +******************************************************************************/
18512 +
18513 +#ifndef __LOG_DOT_H__
18514 +#define __LOG_DOT_H__
18515 +
18516 +/**
18517 + * gfs_log_lock - acquire the right to mess with the log manager
18518 + * @sdp: the filesystem
18519 + *
18520 + */
18521 +
18522 +static __inline__ void
18523 +gfs_log_lock(struct gfs_sbd *sdp)
18524 +{
18525 +       down(&sdp->sd_log_lock);
18526 +}
18527 +
18528 +/**
18529 + * gfs_log_unlock - release the right to mess with the log manager
18530 + * @sdp: the filesystem
18531 + *
18532 + */
18533 +
18534 +static __inline__ void
18535 +gfs_log_unlock(struct gfs_sbd *sdp)
18536 +{
18537 +       up(&sdp->sd_log_lock);
18538 +}
18539 +
18540 +unsigned int gfs_struct2blk(struct gfs_sbd *sdp, unsigned int nstruct,
18541 +                           unsigned int ssize);
18542 +unsigned int gfs_blk2seg(struct gfs_sbd *sdp, unsigned int blocks);
18543 +
18544 +int gfs_log_reserve(struct gfs_sbd *sdp, unsigned int segments, int jump_queue);
18545 +void gfs_log_release(struct gfs_sbd *sdp, unsigned int segments);
18546 +
18547 +void gfs_ail_start(struct gfs_sbd *sdp, int flags);
18548 +int gfs_ail_empty(struct gfs_sbd *sdp);
18549 +
18550 +void gfs_log_commit(struct gfs_sbd *sdp, struct gfs_trans *trans);
18551 +void gfs_log_flush(struct gfs_sbd *sdp);
18552 +void gfs_log_flush_glock(struct gfs_glock *gl);
18553 +
18554 +int gfs_log_shutdown(struct gfs_sbd *sdp);
18555 +
18556 +void gfs_log_dump(struct gfs_sbd *sdp, int force);
18557 +
18558 +/*  Internal crap used the log operations  */
18559 +
18560 +/**
18561 + * gfs_log_is_header - Discover if block is on journal header
18562 + * @sdp: The GFS superblock
18563 + * @block: The block number
18564 + *
18565 + * Returns: TRUE if the block is on a journal segment boundary, FALSE otherwise
18566 + */
18567 +
18568 +static __inline__ int
18569 +gfs_log_is_header(struct gfs_sbd *sdp, uint64_t block)
18570 +{
18571 +       return !do_mod(block, sdp->sd_sb.sb_seg_size);
18572 +}
18573 +
18574 +struct gfs_log_buf *gfs_log_get_buf(struct gfs_sbd *sdp, struct gfs_trans *tr);
18575 +void gfs_log_fake_buf(struct gfs_sbd *sdp, struct gfs_trans *tr, char *data,
18576 +                     struct buffer_head *unlock);
18577 +
18578 +#endif /* __LOG_DOT_H__ */
18579 diff -urN linux-orig/fs/gfs/lops.c linux-patched/fs/gfs/lops.c
18580 --- linux-orig/fs/gfs/lops.c    1969-12-31 18:00:00.000000000 -0600
18581 +++ linux-patched/fs/gfs/lops.c 2004-06-30 13:27:49.348709970 -0500
18582 @@ -0,0 +1,1563 @@
18583 +/******************************************************************************
18584 +*******************************************************************************
18585 +**
18586 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
18587 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
18588 +**
18589 +**  This copyrighted material is made available to anyone wishing to use,
18590 +**  modify, copy, or redistribute it subject to the terms and conditions
18591 +**  of the GNU General Public License v.2.
18592 +**
18593 +*******************************************************************************
18594 +******************************************************************************/
18595 +
18596 +#include <linux/sched.h>
18597 +#include <linux/slab.h>
18598 +#include <linux/smp_lock.h>
18599 +#include <linux/spinlock.h>
18600 +#include <asm/semaphore.h>
18601 +#include <linux/completion.h>
18602 +#include <linux/buffer_head.h>
18603 +
18604 +#include "gfs.h"
18605 +#include "dio.h"
18606 +#include "glock.h"
18607 +#include "log.h"
18608 +#include "lops.h"
18609 +#include "quota.h"
18610 +#include "recovery.h"
18611 +#include "trans.h"
18612 +#include "unlinked.h"
18613 +
18614 +/**
18615 + * generic_le_add - generic routine to add a log element to a transaction
18616 + * @sdp: the filesystem
18617 + * @le: the log entry
18618 + *
18619 + */
18620 +
18621 +static void
18622 +generic_le_add(struct gfs_sbd *sdp, struct gfs_log_element *le)
18623 +{
18624 +       struct gfs_trans *tr;
18625 +
18626 +       GFS_ASSERT_SBD(le->le_ops &&
18627 +                      !le->le_trans &&
18628 +                      list_empty(&le->le_list), sdp,);
18629 +
18630 +       tr = current_transaction;
18631 +       GFS_ASSERT_SBD(tr, sdp,);
18632 +
18633 +       le->le_trans = tr;
18634 +       list_add(&le->le_list, &tr->tr_elements);
18635 +}
18636 +
18637 +/**
18638 + * glock_trans_end - drop a glock reference
18639 + * @sdp: the filesystem
18640 + * @le: the log element
18641 + *
18642 + */
18643 +
18644 +static void
18645 +glock_trans_end(struct gfs_sbd *sdp, struct gfs_log_element *le)
18646 +{
18647 +       struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le);
18648 +
18649 +       GFS_ASSERT_GLOCK(gfs_glock_is_locked_by_me(gl) &&
18650 +                        gfs_glock_is_held_excl(gl), gl,);
18651 +       gfs_glock_put(gl);
18652 +}
18653 +
18654 +/**
18655 + * glock_print - print debug info about a log element
18656 + * @sdp: the filesystem
18657 + * @le: the log element
18658 + * @where: is this a new transaction or a incore transaction
18659 + *
18660 + */
18661 +
18662 +static void
18663 +glock_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where)
18664 +{
18665 +       struct gfs_glock *gl;
18666 +
18667 +       switch (where) {
18668 +       case TRANS_IS_NEW:
18669 +               gl = container_of(le, struct gfs_glock, gl_new_le);
18670 +               break;
18671 +       case TRANS_IS_INCORE:
18672 +               gl = container_of(le, struct gfs_glock, gl_incore_le);
18673 +               break;
18674 +       default:
18675 +               GFS_ASSERT_SBD(FALSE, sdp,);
18676 +       }
18677 +
18678 +       printk("  Glock:  (%u, %"PRIu64")\n",
18679 +              gl->gl_name.ln_type,
18680 +              gl->gl_name.ln_number);
18681 +}
18682 +
18683 +/**
18684 + * glock_overlap_trans - Find any incore transactions that might overlap with this LE
18685 + * @sdp: the filesystem
18686 + * @le: the log element
18687 + *
18688 + */
18689 +
18690 +static struct gfs_trans *
18691 +glock_overlap_trans(struct gfs_sbd *sdp, struct gfs_log_element *le)
18692 +{
18693 +       struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le);
18694 +
18695 +       return gl->gl_incore_le.le_trans;
18696 +}
18697 +
18698 +/**
18699 + * glock_incore_commit - commit this LE to the incore log
18700 + * @sdp: the filesystem
18701 + * @tr: the incore transaction this LE is a part of
18702 + * @le: the log element
18703 + *
18704 + */
18705 +
18706 +static void
18707 +glock_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
18708 +                   struct gfs_log_element *le)
18709 +{
18710 +       struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le);
18711 +
18712 +       if (gl->gl_incore_le.le_trans)
18713 +               GFS_ASSERT_GLOCK(gl->gl_incore_le.le_trans == tr, gl,);
18714 +       else {
18715 +               gl->gl_incore_le.le_trans = tr;
18716 +               list_add(&gl->gl_incore_le.le_list, &tr->tr_elements);
18717 +               if (tr != le->le_trans)
18718 +                       tr->tr_num_gl++;
18719 +       }
18720 +
18721 +       le->le_trans = NULL;
18722 +       list_del_init(&le->le_list);
18723 +}
18724 +
18725 +/**
18726 + * glock_add_to_ail - Add this LE to the AIL
18727 + * @sdp: the filesystem
18728 + * @le: the log element
18729 + *
18730 + */
18731 +
18732 +static void
18733 +glock_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
18734 +{
18735 +       le->le_trans = NULL;
18736 +       list_del_init(&le->le_list);
18737 +}
18738 +
18739 +/**
18740 + * glock_trans_combine - combine to incore transactions
18741 + * @sdp: the filesystem
18742 + * @tr: the surviving transaction
18743 + * @new_tr: the transaction that's going to disappear
18744 + *
18745 + */
18746 +
18747 +static void
18748 +glock_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
18749 +                   struct gfs_trans *new_tr)
18750 +{
18751 +       tr->tr_num_gl += new_tr->tr_num_gl;
18752 +}
18753 +
18754 +/**
18755 + * buf_print - print debug info about a log element
18756 + * @sdp: the filesystem
18757 + * @le: the log element
18758 + * @where: is this a new transaction or a incore transaction
18759 + *
18760 + */
18761 +
18762 +static void
18763 +buf_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where)
18764 +{
18765 +       struct gfs_bufdata *bd;
18766 +
18767 +       switch (where) {
18768 +       case TRANS_IS_NEW:
18769 +               bd = container_of(le, struct gfs_bufdata, bd_new_le);
18770 +               break;
18771 +       case TRANS_IS_INCORE:
18772 +               bd = container_of(le, struct gfs_bufdata, bd_incore_le);
18773 +               break;
18774 +       default:
18775 +               GFS_ASSERT_SBD(FALSE, sdp,);
18776 +       }
18777 +
18778 +       printk("  Buffer:  %"PRIu64"\n", (uint64_t)bd->bd_bh->b_blocknr);
18779 +}
18780 +
18781 +/**
18782 + * buf_incore_commit - commit this LE to the incore log
18783 + * @sdp: the filesystem
18784 + * @tr: the incore transaction this LE is a part of
18785 + * @le: the log element
18786 + *
18787 + */
18788 +
18789 +static void
18790 +buf_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
18791 +                 struct gfs_log_element *le)
18792 +{
18793 +       struct gfs_bufdata *bd = container_of(le, struct gfs_bufdata, bd_new_le);
18794 +
18795 +       if (bd->bd_frozen) {
18796 +               kfree(bd->bd_frozen);
18797 +               bd->bd_frozen = NULL;
18798 +       }
18799 +
18800 +       if (bd->bd_incore_le.le_trans) {
18801 +               GFS_ASSERT_SBD(bd->bd_incore_le.le_trans == tr, sdp,);
18802 +               gfs_dunpin(sdp, bd->bd_bh, NULL);
18803 +       } else {
18804 +               bd->bd_incore_le.le_trans = tr;
18805 +               list_add(&bd->bd_incore_le.le_list, &tr->tr_elements);
18806 +               if (tr != le->le_trans)
18807 +                       tr->tr_num_buf++;
18808 +
18809 +               sdp->sd_log_buffers++;
18810 +       }
18811 +
18812 +       le->le_trans = NULL;
18813 +       list_del_init(&le->le_list);
18814 +}
18815 +
18816 +/**
18817 + * buf_add_to_ail - Add this LE to the AIL
18818 + * @sdp: the filesystem
18819 + * @le: the log element
18820 + *
18821 + */
18822 +
18823 +static void
18824 +buf_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
18825 +{
18826 +       struct gfs_bufdata *bd = container_of(le,
18827 +                                              struct gfs_bufdata,
18828 +                                              bd_incore_le);
18829 +
18830 +       gfs_dunpin(sdp, bd->bd_bh, le->le_trans);
18831 +
18832 +       le->le_trans = NULL;
18833 +       list_del_init(&le->le_list);
18834 +
18835 +       GFS_ASSERT_SBD(sdp->sd_log_buffers, sdp,);
18836 +       sdp->sd_log_buffers--;
18837 +}
18838 +
18839 +/**
18840 + * buf_trans_size - compute how much space the LE class takes up in a transaction
18841 + * @sdp: the filesystem
18842 + * @tr: the transaction
18843 + * @mblks: the number of regular metadata blocks
18844 + * @eblks: the number of extra blocks
18845 + * @blocks: the number of log blocks
18846 + * @bmem: the number of buffer-sized chunks of memory we need
18847 + *
18848 + */
18849 +
18850 +static void
18851 +buf_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr,
18852 +              unsigned int *mblks, unsigned int *eblks,
18853 +              unsigned int *blocks, unsigned int *bmem)
18854 +{
18855 +       unsigned int cblks;
18856 +
18857 +       if (tr->tr_num_buf) {
18858 +               cblks = gfs_struct2blk(sdp, tr->tr_num_buf,
18859 +                                      sizeof(struct gfs_block_tag));
18860 +
18861 +               if (mblks)
18862 +                       *mblks += tr->tr_num_buf;
18863 +               if (blocks)
18864 +                       *blocks += tr->tr_num_buf + cblks;
18865 +               if (bmem)
18866 +                       *bmem += cblks;
18867 +       }
18868 +}
18869 +
18870 +/**
18871 + * buf_trans_combine - combine to incore transactions
18872 + * @sdp: the filesystem
18873 + * @tr: the surviving transaction
18874 + * @new_tr: the transaction that's going to disappear
18875 + *
18876 + */
18877 +
18878 +static void
18879 +buf_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
18880 +                 struct gfs_trans *new_tr)
18881 +{
18882 +       tr->tr_num_buf += new_tr->tr_num_buf;
18883 +}
18884 +
18885 +/**
18886 + * increment_generation - increment the generation number in metadata buffer
18887 + * @sdp: the filesystem
18888 + * @bd: the struct gfs_bufdata structure associated with the buffer
18889 + *
18890 + */
18891 +
18892 +static void
18893 +increment_generation(struct gfs_sbd *sdp, struct gfs_bufdata *bd)
18894 +{
18895 +       struct gfs_meta_header *mh, *mh2;
18896 +       uint64_t tmp64;
18897 +
18898 +       mh = (struct gfs_meta_header *)bd->bd_bh->b_data;
18899 +
18900 +       tmp64 = gfs64_to_cpu(mh->mh_generation) + 1;
18901 +       tmp64 = cpu_to_gfs64(tmp64);
18902 +
18903 +       if (bd->bd_frozen) {
18904 +               mh2 = (struct gfs_meta_header *)bd->bd_frozen;
18905 +               GFS_ASSERT_SBD(mh->mh_generation == mh2->mh_generation, sdp,);
18906 +               mh2->mh_generation = tmp64;
18907 +       }
18908 +       mh->mh_generation = tmp64;
18909 +}
18910 +
18911 +/**
18912 + * buf_build_bhlist - create the buffers that will make up the ondisk part of a transaction
18913 + * @sdp: the filesystem
18914 + * @tr: the transaction
18915 + *
18916 + */
18917 +
18918 +static void
18919 +buf_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr)
18920 +{
18921 +       struct list_head *tmp, *head;
18922 +       struct gfs_log_element *le;
18923 +       struct gfs_bufdata *bd;
18924 +       struct gfs_log_descriptor desc;
18925 +       struct gfs_block_tag tag;
18926 +       struct gfs_log_buf *clb = NULL;
18927 +       unsigned int num_ctl;
18928 +       unsigned int offset = sizeof(struct gfs_log_descriptor);
18929 +       unsigned int x, bufs;
18930 +
18931 +       if (!tr->tr_num_buf)
18932 +               return;
18933 +
18934 +       /* set up control buffers for descriptor and tags */
18935 +
18936 +       num_ctl = gfs_struct2blk(sdp, tr->tr_num_buf,
18937 +                                sizeof(struct gfs_block_tag));
18938 +
18939 +       for (x = 0; x < num_ctl; x++) {
18940 +               if (clb)
18941 +                       gfs_log_get_buf(sdp, tr);
18942 +               else
18943 +                       clb = gfs_log_get_buf(sdp, tr);
18944 +       }
18945 +
18946 +       memset(&desc, 0, sizeof(struct gfs_log_descriptor));
18947 +       desc.ld_header.mh_magic = GFS_MAGIC;
18948 +       desc.ld_header.mh_type = GFS_METATYPE_LD;
18949 +       desc.ld_header.mh_format = GFS_FORMAT_LD;
18950 +       desc.ld_type = GFS_LOG_DESC_METADATA;
18951 +       desc.ld_length = num_ctl + tr->tr_num_buf;
18952 +       desc.ld_data1 = tr->tr_num_buf;
18953 +       gfs_desc_out(&desc, clb->lb_bh.b_data);
18954 +
18955 +       x = 1;
18956 +       bufs = 0;
18957 +
18958 +       for (head = &tr->tr_elements, tmp = head->next;
18959 +            tmp != head;
18960 +            tmp = tmp->next) {
18961 +               le = list_entry(tmp, struct gfs_log_element, le_list);
18962 +               if (le->le_ops != &gfs_buf_lops)
18963 +                       continue;
18964 +               bd = container_of(le, struct gfs_bufdata, bd_incore_le);
18965 +
18966 +               gfs_meta_check(sdp, bd->bd_bh);
18967 +
18968 +               gfs_lock_buffer(bd->bd_bh);
18969 +
18970 +               increment_generation(sdp, bd);
18971 +
18972 +               gfs_log_fake_buf(sdp, tr,
18973 +                                (bd->bd_frozen) ? bd->bd_frozen : bd->bd_bh->b_data,
18974 +                                bd->bd_bh);
18975 +
18976 +               if (offset + sizeof(struct gfs_block_tag) > sdp->sd_sb.sb_bsize) {
18977 +                       clb = list_entry(clb->lb_list.prev,
18978 +                                        struct gfs_log_buf, lb_list);
18979 +                       if (gfs_log_is_header(sdp, clb->lb_bh.b_blocknr))
18980 +                               clb = list_entry(clb->lb_list.prev,
18981 +                                                struct gfs_log_buf, lb_list);
18982 +                       x++;
18983 +                       offset = 0;
18984 +               }
18985 +
18986 +               memset(&tag, 0, sizeof(struct gfs_block_tag));
18987 +               tag.bt_blkno = bd->bd_bh->b_blocknr;
18988 +
18989 +               gfs_block_tag_out(&tag, clb->lb_bh.b_data + offset);
18990 +
18991 +               offset += sizeof(struct gfs_block_tag);
18992 +               bufs++;
18993 +       }
18994 +
18995 +       GFS_ASSERT_SBD(x == num_ctl, sdp,);
18996 +       GFS_ASSERT_SBD(bufs == tr->tr_num_buf, sdp,);
18997 +}
18998 +
18999 +/**
19000 + * buf_before_scan - called before journal replay
19001 + * @sdp: the filesystem
19002 + * @jid: the journal ID about to be replayed
19003 + * @head: the current head of the log
19004 + * @pass: the pass through the journal
19005 + *
19006 + */
19007 +
19008 +static void
19009 +buf_before_scan(struct gfs_sbd *sdp, unsigned int jid,
19010 +               struct gfs_log_header *head, unsigned int pass)
19011 +{
19012 +       if (pass == GFS_RECPASS_A1)
19013 +               sdp->sd_recovery_replays =
19014 +                       sdp->sd_recovery_skips =
19015 +                       sdp->sd_recovery_sames = 0;
19016 +}
19017 +
19018 +/**
19019 + * replay_block - Replay a single metadata block
19020 + * @sdp: the filesystem
19021 + * @jdesc: the struct gfs_jindex structure for the journal being replayed
19022 + * @gl: the journal's glock
19023 + * @tag: the block tag describing the inplace location of the block
19024 + * @blkno: the location of the log's copy of the block
19025 + *
19026 + * Returns: 0 on success, -EXXX on failure
19027 + */
19028 +
19029 +static int
19030 +replay_block(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19031 +            struct gfs_glock *gl, struct gfs_block_tag *tag, uint64_t blkno)
19032 +{
19033 +       struct buffer_head *inplace_bh, *log_bh;
19034 +       struct gfs_meta_header inplace_mh, log_mh;
19035 +       int replay_block = TRUE;
19036 +       int error = 0;
19037 +
19038 +       gfs_replay_check(sdp);
19039 +
19040 +       /* Warning:  Using a real buffer here instead of a tempbh can be bad
19041 +          on a OS that won't support multiple simultaneous buffers for the
19042 +          same block on different glocks. */
19043 +
19044 +       error = gfs_dread(sdp, tag->bt_blkno, gl,
19045 +                         DIO_START | DIO_WAIT, &inplace_bh);
19046 +       if (error)
19047 +               return error;
19048 +       gfs_meta_check(sdp, inplace_bh);
19049 +       gfs_meta_header_in(&inplace_mh, inplace_bh->b_data);
19050 +
19051 +       error = gfs_dread(sdp, blkno, gl, DIO_START | DIO_WAIT, &log_bh);
19052 +       if (error) {
19053 +               brelse(inplace_bh);
19054 +               return error;
19055 +       }
19056 +       gfs_meta_check(sdp, log_bh);
19057 +       gfs_meta_header_in(&log_mh, log_bh->b_data);
19058 +
19059 +       if (log_mh.mh_generation < inplace_mh.mh_generation) {
19060 +               replay_block = FALSE;
19061 +               sdp->sd_recovery_skips++;
19062 +       } else if (log_mh.mh_generation == inplace_mh.mh_generation) {
19063 +               if (memcmp(log_bh->b_data,
19064 +                          inplace_bh->b_data,
19065 +                          sdp->sd_sb.sb_bsize) == 0) {
19066 +                       replay_block = FALSE;
19067 +                       sdp->sd_recovery_sames++;
19068 +               }
19069 +       }
19070 +
19071 +       if (replay_block) {
19072 +               memcpy(inplace_bh->b_data,
19073 +                      log_bh->b_data,
19074 +                      sdp->sd_sb.sb_bsize);
19075 +
19076 +               error = gfs_replay_buf(gl, inplace_bh);
19077 +               if (!error)
19078 +                       sdp->sd_recovery_replays++;
19079 +       }
19080 +
19081 +       brelse(log_bh);
19082 +       brelse(inplace_bh);
19083 +
19084 +       return error;
19085 +}
19086 +
19087 +/**
19088 + * buf_scan_elements - Replay a metadata log descriptor
19089 + * @sdp: the filesystem
19090 + * @jdesc: the struct gfs_jindex structure for the journal being replayed
19091 + * @gl: the journal's glock
19092 + * @start: the starting block of the descriptor
19093 + * @desc: the descriptor structure
19094 + * @pass: the pass through the journal
19095 + *
19096 + * Returns: 0 on success, -EXXX on failure
19097 + */
19098 +
19099 +static int
19100 +buf_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19101 +                 struct gfs_glock *gl, uint64_t start,
19102 +                 struct gfs_log_descriptor *desc, unsigned int pass)
19103 +{
19104 +       struct gfs_block_tag tag;
19105 +       struct buffer_head *bh;
19106 +       uint64_t cblk = start;
19107 +       unsigned int num_tags = desc->ld_data1;
19108 +       unsigned int offset = sizeof(struct gfs_log_descriptor);
19109 +       unsigned int x;
19110 +       int error;
19111 +
19112 +       if (pass != GFS_RECPASS_A1)
19113 +               return 0;
19114 +       if (desc->ld_type != GFS_LOG_DESC_METADATA)
19115 +               return 0;
19116 +
19117 +       x = gfs_struct2blk(sdp, num_tags, sizeof(struct gfs_block_tag));
19118 +       while (x--) {
19119 +               error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
19120 +               if (error)
19121 +                       return error;
19122 +       }
19123 +
19124 +       for (;;) {
19125 +               GFS_ASSERT_SBD(num_tags, sdp,);
19126 +
19127 +               error = gfs_dread(sdp, cblk, gl, DIO_START | DIO_WAIT, &bh);
19128 +               if (error)
19129 +                       return error;
19130 +
19131 +               /* Do readahead for the inplace blocks in this control block */
19132 +               {
19133 +                       unsigned int o2 = offset;
19134 +                       unsigned int nt2 = num_tags;
19135 +
19136 +                       while (o2 + sizeof(struct gfs_block_tag) <=
19137 +                              sdp->sd_sb.sb_bsize) {
19138 +                               gfs_block_tag_in(&tag, bh->b_data + o2);
19139 +                               gfs_start_ra(gl, tag.bt_blkno, 1);
19140 +                               if (!--nt2)
19141 +                                       break;
19142 +                               o2 += sizeof(struct gfs_block_tag);
19143 +                       }
19144 +               }
19145 +
19146 +               while (offset + sizeof(struct gfs_block_tag) <=
19147 +                      sdp->sd_sb.sb_bsize) {
19148 +                       gfs_block_tag_in(&tag, bh->b_data + offset);
19149 +
19150 +                       error = replay_block(sdp, jdesc, gl, &tag, start);
19151 +                       if (error)
19152 +                               goto out_drelse;
19153 +
19154 +                       if (!--num_tags)
19155 +                               goto out_drelse;
19156 +
19157 +                       error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
19158 +                       if (error)
19159 +                               goto out_drelse;
19160 +
19161 +                       offset += sizeof(struct gfs_block_tag);
19162 +               }
19163 +
19164 +               brelse(bh);
19165 +
19166 +               error = gfs_increment_blkno(sdp, jdesc, gl, &cblk, TRUE);
19167 +               if (error)
19168 +                       return error;
19169 +
19170 +               offset = 0;
19171 +       }
19172 +
19173 +       return 0;
19174 +
19175 + out_drelse:
19176 +       brelse(bh);
19177 +
19178 +       return error;
19179 +}
19180 +
19181 +/**
19182 + * buf_after_scan - called after journal replay
19183 + * @sdp: the filesystem
19184 + * @jid: the journal ID about to be replayed
19185 + * @pass: the pass through the journal
19186 + *
19187 + */
19188 +
19189 +static void
19190 +buf_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass)
19191 +{
19192 +       if (pass == GFS_RECPASS_A1) {
19193 +               printk("GFS: fsid=%s: jid=%u: Replayed %u of %u blocks\n",
19194 +                      sdp->sd_fsname, jid,
19195 +                      sdp->sd_recovery_replays,
19196 +                      sdp->sd_recovery_replays + sdp->sd_recovery_skips +
19197 +                      sdp->sd_recovery_sames);
19198 +               printk("GFS: fsid=%s: jid=%u: replays = %u, skips = %u, sames = %u\n",
19199 +                      sdp->sd_fsname, jid, sdp->sd_recovery_replays,
19200 +                      sdp->sd_recovery_skips, sdp->sd_recovery_sames);
19201 +       }
19202 +}
19203 +
19204 +/**
19205 + * unlinked_print - print debug info about a log element
19206 + * @sdp: the filesystem
19207 + * @le: the log element
19208 + * @where: is this a new transaction or a incore transaction
19209 + *
19210 + */
19211 +
19212 +static void
19213 +unlinked_print(struct gfs_sbd *sdp, struct gfs_log_element *le,
19214 +              unsigned int where)
19215 +{
19216 +       struct gfs_unlinked *ul;
19217 +       char *type;
19218 +
19219 +       switch (where) {
19220 +       case TRANS_IS_NEW:
19221 +               ul = container_of(le, struct gfs_unlinked, ul_new_le);
19222 +               type = (test_bit(ULF_NEW_UL, &ul->ul_flags)) ?
19223 +                       "unlink" : "dealloc";
19224 +               break;
19225 +       case TRANS_IS_INCORE:
19226 +               ul = container_of(le, struct gfs_unlinked, ul_incore_le);
19227 +               type = (test_bit(ULF_INCORE_UL, &ul->ul_flags)) ?
19228 +                       "unlink" : "dealloc";
19229 +               break;
19230 +       default:
19231 +               GFS_ASSERT_SBD(FALSE, sdp,);
19232 +       }
19233 +
19234 +       printk("  unlinked:  %"PRIu64"/%"PRIu64", %s\n",
19235 +              ul->ul_inum.no_formal_ino, ul->ul_inum.no_addr,
19236 +              type);
19237 +}
19238 +
19239 +/**
19240 + * unlinked_incore_commit - commit this LE to the incore log
19241 + * @sdp: the filesystem
19242 + * @tr: the incore transaction this LE is a part of
19243 + * @le: the log element
19244 + *
19245 + */
19246 +
19247 +static void
19248 +unlinked_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
19249 +                      struct gfs_log_element *le)
19250 +{
19251 +       struct gfs_unlinked *ul = container_of(le,
19252 +                                              struct gfs_unlinked,
19253 +                                              ul_new_le);
19254 +       int n = !!test_bit(ULF_NEW_UL, &ul->ul_flags);
19255 +       int i = !!test_bit(ULF_INCORE_UL, &ul->ul_flags);
19256 +
19257 +       if (ul->ul_incore_le.le_trans) {
19258 +               GFS_ASSERT_SBD(ul->ul_incore_le.le_trans == tr, sdp,);
19259 +               GFS_ASSERT_SBD(n != i, sdp,);
19260 +
19261 +               ul->ul_incore_le.le_trans = NULL;
19262 +               list_del_init(&ul->ul_incore_le.le_list);
19263 +               gfs_unlinked_put(sdp, ul);
19264 +
19265 +               if (i) {
19266 +                       GFS_ASSERT_SBD(tr->tr_num_iul, sdp,);
19267 +                       tr->tr_num_iul--;
19268 +               } else {
19269 +                       GFS_ASSERT_SBD(tr->tr_num_ida, sdp,);
19270 +                       tr->tr_num_ida--;
19271 +               }
19272 +       } else {
19273 +               gfs_unlinked_hold(sdp, ul);
19274 +               ul->ul_incore_le.le_trans = tr;
19275 +               list_add(&ul->ul_incore_le.le_list, &tr->tr_elements);
19276 +
19277 +               if (n) {
19278 +                       set_bit(ULF_INCORE_UL, &ul->ul_flags);
19279 +                       if (tr != le->le_trans)
19280 +                               tr->tr_num_iul++;
19281 +               } else {
19282 +                       clear_bit(ULF_INCORE_UL, &ul->ul_flags);
19283 +                       if (tr != le->le_trans)
19284 +                               tr->tr_num_ida++;
19285 +               }
19286 +       }
19287 +
19288 +       if (n) {
19289 +               gfs_unlinked_hold(sdp, ul);
19290 +               GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags), sdp,);
19291 +               set_bit(ULF_IC_LIST, &ul->ul_flags);
19292 +               atomic_inc(&sdp->sd_unlinked_ic_count);
19293 +       } else {
19294 +               GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags), sdp,);
19295 +               clear_bit(ULF_IC_LIST, &ul->ul_flags);
19296 +               gfs_unlinked_put(sdp, ul);
19297 +               GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count), sdp,);
19298 +               atomic_dec(&sdp->sd_unlinked_ic_count);
19299 +       }
19300 +
19301 +       le->le_trans = NULL;
19302 +       list_del_init(&le->le_list);
19303 +       gfs_unlinked_put(sdp, ul);
19304 +}
19305 +
19306 +/**
19307 + * unlinked_add_to_ail - Add this LE to the AIL
19308 + * @sdp: the filesystem
19309 + * @le: the log element
19310 + *
19311 + */
19312 +
19313 +static void
19314 +unlinked_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
19315 +{
19316 +       struct gfs_unlinked *ul = container_of(le,
19317 +                                               struct gfs_unlinked,
19318 +                                               ul_incore_le);
19319 +       int i = !!test_bit(ULF_INCORE_UL, &ul->ul_flags);
19320 +
19321 +       if (i) {
19322 +               gfs_unlinked_hold(sdp, ul);
19323 +               GFS_ASSERT_SBD(!test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
19324 +               set_bit(ULF_OD_LIST, &ul->ul_flags);
19325 +               atomic_inc(&sdp->sd_unlinked_od_count);
19326 +       } else {
19327 +               GFS_ASSERT_SBD(test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
19328 +               clear_bit(ULF_OD_LIST, &ul->ul_flags);
19329 +               gfs_unlinked_put(sdp, ul);
19330 +               GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_od_count), sdp,);
19331 +               atomic_dec(&sdp->sd_unlinked_od_count);
19332 +       }
19333 +
19334 +       le->le_trans = NULL;
19335 +       list_del_init(&le->le_list);
19336 +       gfs_unlinked_put(sdp, ul);
19337 +}
19338 +
19339 +/**
19340 + * unlinked_clean_dump - clean up a LE after a log dump
19341 + * @sdp: the filesystem
19342 + * @le: the log element
19343 + *
19344 + */
19345 +
19346 +static void
19347 +unlinked_clean_dump(struct gfs_sbd *sdp, struct gfs_log_element *le)
19348 +{
19349 +       le->le_trans = NULL;
19350 +       list_del_init(&le->le_list);
19351 +}
19352 +
19353 +/**
19354 + * unlinked_trans_size - compute how much space the LE class takes up in a transaction
19355 + * @sdp: the filesystem
19356 + * @tr: the transaction
19357 + * @mblks: the number of regular metadata blocks
19358 + * @eblks: the number of extra blocks
19359 + * @blocks: the number of log blocks
19360 + * @bmem: the number of buffer-sized chunks of memory we need
19361 + *
19362 + */
19363 +
19364 +static void
19365 +unlinked_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr,
19366 +                   unsigned int *mblks, unsigned int *eblks,
19367 +                   unsigned int *blocks, unsigned int *bmem)
19368 +{
19369 +       unsigned int ublks = 0;
19370 +
19371 +       if (tr->tr_num_iul)
19372 +               ublks = gfs_struct2blk(sdp, tr->tr_num_iul,
19373 +                                      sizeof(struct gfs_inum));
19374 +       if (tr->tr_num_ida)
19375 +               ublks += gfs_struct2blk(sdp, tr->tr_num_ida,
19376 +                                       sizeof(struct gfs_inum));
19377 +
19378 +       if (eblks)
19379 +               *eblks += ublks;
19380 +       if (blocks)
19381 +               *blocks += ublks;
19382 +       if (bmem)
19383 +               *bmem += ublks;
19384 +}
19385 +
19386 +/**
19387 + * unlinked_trans_combine - combine to incore transactions
19388 + * @sdp: the filesystem
19389 + * @tr: the surviving transaction
19390 + * @new_tr: the transaction that's going to disappear
19391 + *
19392 + */
19393 +
19394 +static void
19395 +unlinked_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
19396 +                      struct gfs_trans *new_tr)
19397 +{
19398 +       tr->tr_num_iul += new_tr->tr_num_iul;
19399 +       tr->tr_num_ida += new_tr->tr_num_ida;
19400 +}
19401 +
19402 +/**
19403 + * unlinked_build_bhlist - create the buffers that will make up the ondisk part of a transaction
19404 + * @sdp: the filesystem
19405 + * @tr: the transaction
19406 + *
19407 + */
19408 +
19409 +static void
19410 +unlinked_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr)
19411 +{
19412 +       struct list_head *tmp, *head;
19413 +       struct gfs_log_element *le;
19414 +       struct gfs_unlinked *ul;
19415 +       struct gfs_log_descriptor desc;
19416 +       struct gfs_log_buf *lb;
19417 +       unsigned int pass = 2;
19418 +       unsigned int type, number;
19419 +       unsigned int offset, entries;
19420 +
19421 +       while (pass--) {
19422 +               if (tr->tr_flags & TRF_LOG_DUMP) {
19423 +                       if (pass) {
19424 +                               type = GFS_LOG_DESC_IUL;
19425 +                               number = tr->tr_num_iul;
19426 +                       } else
19427 +                               break;
19428 +               } else {
19429 +                       if (pass) {
19430 +                               type = GFS_LOG_DESC_IUL;
19431 +                               number = tr->tr_num_iul;
19432 +                       } else {
19433 +                               type = GFS_LOG_DESC_IDA;
19434 +                               number = tr->tr_num_ida;
19435 +                       }
19436 +
19437 +                       if (!number)
19438 +                               continue;
19439 +               }
19440 +
19441 +               lb = gfs_log_get_buf(sdp, tr);
19442 +
19443 +               memset(&desc, 0, sizeof(struct gfs_log_descriptor));
19444 +               desc.ld_header.mh_magic = GFS_MAGIC;
19445 +               desc.ld_header.mh_type = GFS_METATYPE_LD;
19446 +               desc.ld_header.mh_format = GFS_FORMAT_LD;
19447 +               desc.ld_type = type;
19448 +               desc.ld_length = gfs_struct2blk(sdp, number, sizeof(struct gfs_inum));
19449 +               desc.ld_data1 = (tr->tr_flags & TRF_LOG_DUMP) ? TRUE : FALSE;
19450 +               gfs_desc_out(&desc, lb->lb_bh.b_data);
19451 +
19452 +               offset = sizeof(struct gfs_log_descriptor);
19453 +               entries = 0;
19454 +
19455 +               for (head = &tr->tr_elements, tmp = head->next;
19456 +                    tmp != head;
19457 +                    tmp = tmp->next) {
19458 +                       le = list_entry(tmp, struct gfs_log_element, le_list);
19459 +                       if (le->le_ops != &gfs_unlinked_lops)
19460 +                               continue;
19461 +                       if (tr->tr_flags & TRF_LOG_DUMP)
19462 +                               ul = container_of(le,
19463 +                                                 struct gfs_unlinked,
19464 +                                                 ul_ondisk_le);
19465 +                       else {
19466 +                               ul = container_of(le,
19467 +                                                 struct gfs_unlinked,
19468 +                                                 ul_incore_le);
19469 +                               if (!!test_bit(ULF_INCORE_UL, &ul->ul_flags) != pass)
19470 +                                       continue;
19471 +                       }
19472 +
19473 +                       if (offset + sizeof(struct gfs_inum) > sdp->sd_sb.sb_bsize) {
19474 +                               offset = 0;
19475 +                               lb = gfs_log_get_buf(sdp, tr);
19476 +                       }
19477 +
19478 +                       gfs_inum_out(&ul->ul_inum,
19479 +                                    lb->lb_bh.b_data + offset);
19480 +
19481 +                       offset += sizeof(struct gfs_inum);
19482 +                       entries++;
19483 +               }
19484 +
19485 +               GFS_ASSERT_SBD(entries == number, sdp,);
19486 +       }
19487 +}
19488 +
19489 +/**
19490 + * unlinked_dump_size - compute how much space the LE class takes up in a log dump
19491 + * @sdp: the filesystem
19492 + * @elements: the number of log elements in the dump
19493 + * @blocks: the number of blocks in the dump
19494 + * @bmem: the number of buffer-sized chunks of memory we need
19495 + *
19496 + */
19497 +
19498 +static void
19499 +unlinked_dump_size(struct gfs_sbd *sdp, unsigned int *elements,
19500 +                  unsigned int *blocks, unsigned int *bmem)
19501 +{
19502 +       unsigned int c = atomic_read(&sdp->sd_unlinked_od_count);
19503 +       unsigned int b = gfs_struct2blk(sdp, c, sizeof(struct gfs_inum));
19504 +
19505 +       if (elements)
19506 +               *elements += c;
19507 +       if (blocks)
19508 +               *blocks += b;
19509 +       if (bmem)
19510 +               *bmem += b;
19511 +}
19512 +
19513 +/**
19514 + * unlinked_build_dump - create a transaction that represents a log dump for this LE class
19515 + * @sdp: the filesystem
19516 + * @tr: the transaction to fill
19517 + *
19518 + */
19519 +
19520 +static void
19521 +unlinked_build_dump(struct gfs_sbd *sdp, struct gfs_trans *tr)
19522 +{
19523 +       struct list_head *tmp, *head;
19524 +       struct gfs_unlinked *ul;
19525 +       unsigned int x = 0;
19526 +
19527 +       tr->tr_num_iul = atomic_read(&sdp->sd_unlinked_od_count);
19528 +
19529 +       spin_lock(&sdp->sd_unlinked_lock);
19530 +
19531 +       for (head = &sdp->sd_unlinked_list, tmp = head->next;
19532 +            tmp != head;
19533 +            tmp = tmp->next) {
19534 +               ul = list_entry(tmp, struct gfs_unlinked, ul_list);
19535 +               if (!test_bit(ULF_OD_LIST, &ul->ul_flags))
19536 +                       continue;
19537 +
19538 +               GFS_ASSERT_SBD(!ul->ul_ondisk_le.le_trans, sdp,);
19539 +               ul->ul_ondisk_le.le_trans = tr;
19540 +               list_add(&ul->ul_ondisk_le.le_list, &tr->tr_elements);
19541 +
19542 +               x++;
19543 +       }
19544 +
19545 +       spin_unlock(&sdp->sd_unlinked_lock);
19546 +
19547 +       GFS_ASSERT_SBD(x == atomic_read(&sdp->sd_unlinked_od_count), sdp,);
19548 +}
19549 +
19550 +/**
19551 + * unlinked_before_scan - called before a log dump is recovered
19552 + * @sdp: the filesystem
19553 + * @jid: the journal ID about to be scanned
19554 + * @head: the current head of the log
19555 + * @pass: the pass through the journal
19556 + *
19557 + */
19558 +
19559 +static void
19560 +unlinked_before_scan(struct gfs_sbd *sdp, unsigned int jid,
19561 +                    struct gfs_log_header *head, unsigned int pass)
19562 +{
19563 +       if (pass == GFS_RECPASS_B1)
19564 +               clear_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags);
19565 +}
19566 +
19567 +/**
19568 + * unlinked_scan_elements - scan unlinked inodes from the journal
19569 + * @sdp: the filesystem
19570 + * @jdesc: the struct gfs_jindex structure for the journal being scaned
19571 + * @gl: the journal's glock
19572 + * @start: the starting block of the descriptor
19573 + * @desc: the descriptor structure
19574 + * @pass: the pass through the journal
19575 + *
19576 + * Returns: 0 on success, -EXXX on failure
19577 + */
19578 +
19579 +static int
19580 +unlinked_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19581 +                      struct gfs_glock *gl, uint64_t start,
19582 +                      struct gfs_log_descriptor *desc, unsigned int pass)
19583 +{
19584 +       struct gfs_inum inum;
19585 +       struct buffer_head *bh;
19586 +       unsigned int offset = sizeof(struct gfs_log_descriptor);
19587 +       unsigned int x;
19588 +       int error;
19589 +
19590 +       if (pass != GFS_RECPASS_B1)
19591 +               return 0;
19592 +
19593 +       switch (desc->ld_type) {
19594 +       case GFS_LOG_DESC_IUL:
19595 +               if (test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags))
19596 +                       GFS_ASSERT_SBD(!desc->ld_data1, sdp,);
19597 +               else {
19598 +                       GFS_ASSERT_SBD(desc->ld_data1, sdp,);
19599 +                       set_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags);
19600 +               }
19601 +               break;
19602 +
19603 +       case GFS_LOG_DESC_IDA:
19604 +               GFS_ASSERT_SBD(test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags),
19605 +                              sdp,);
19606 +               break;
19607 +
19608 +       default:
19609 +               return 0;
19610 +       }
19611 +
19612 +       for (x = 0; x < desc->ld_length; x++) {
19613 +               error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh);
19614 +               if (error)
19615 +                       return error;
19616 +
19617 +               for (;
19618 +                    offset + sizeof(struct gfs_inum) <= sdp->sd_sb.sb_bsize;
19619 +                    offset += sizeof(struct gfs_inum)) {
19620 +                       gfs_inum_in(&inum, bh->b_data + offset);
19621 +
19622 +                       if (inum.no_addr)
19623 +                               gfs_unlinked_merge(sdp, desc->ld_type, &inum);
19624 +               }
19625 +
19626 +               brelse(bh);
19627 +
19628 +               error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
19629 +               if (error)
19630 +                       return error;
19631 +
19632 +               offset = 0;
19633 +       }
19634 +
19635 +       return 0;
19636 +}
19637 +
19638 +/**
19639 + * unlinked_after_scan - called after a log dump is recovered
19640 + * @sdp: the filesystem
19641 + * @jid: the journal ID about to be scanned
19642 + * @pass: the pass through the journal
19643 + *
19644 + */
19645 +
19646 +static void
19647 +unlinked_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass)
19648 +{
19649 +       if (pass == GFS_RECPASS_B1) {
19650 +               GFS_ASSERT_SBD(test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags),
19651 +                              sdp,);
19652 +               printk("GFS: fsid=%s: Found %d unlinked inodes\n",
19653 +                      sdp->sd_fsname, atomic_read(&sdp->sd_unlinked_ic_count));
19654 +       }
19655 +}
19656 +
19657 +/**
19658 + * quota_print - print debug info about a log element
19659 + * @sdp: the filesystem
19660 + * @le: the log element
19661 + * @where: is this a new transaction or a incore transaction
19662 + *
19663 + */
19664 +
19665 +static void
19666 +quota_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where)
19667 +{
19668 +       struct gfs_quota_le *ql;
19669 +
19670 +       ql = container_of(le, struct gfs_quota_le, ql_le);
19671 +       printk("  quota:  %s %u:  %"PRId64" blocks\n",
19672 +              (test_bit(QDF_USER, &ql->ql_data->qd_flags)) ? "user" : "group",
19673 +              ql->ql_data->qd_id, ql->ql_change);
19674 +}
19675 +
19676 +/**
19677 + * quota_incore_commit - commit this LE to the incore log
19678 + * @sdp: the filesystem
19679 + * @tr: the incore transaction this LE is a part of
19680 + * @le: the log element
19681 + *
19682 + */
19683 +
19684 +static void
19685 +quota_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
19686 +                   struct gfs_log_element *le)
19687 +{
19688 +       struct gfs_quota_le *ql = container_of(le, struct gfs_quota_le, ql_le);
19689 +       struct gfs_quota_data *qd = ql->ql_data;
19690 +
19691 +       GFS_ASSERT_SBD(ql->ql_change, sdp,);
19692 +
19693 +       /*  Make this change under the sd_quota_lock, so other processes
19694 +          checking qd_change_ic don't have to acquire the log lock.  */
19695 +
19696 +       spin_lock(&sdp->sd_quota_lock);
19697 +       qd->qd_change_new -= ql->ql_change;
19698 +       qd->qd_change_ic += ql->ql_change;
19699 +       spin_unlock(&sdp->sd_quota_lock);
19700 +
19701 +       if (le->le_trans == tr)
19702 +               list_add(&ql->ql_data_list, &qd->qd_le_list);
19703 +       else {
19704 +               struct list_head *tmp, *head;
19705 +               struct gfs_quota_le *tmp_ql;
19706 +               int found = FALSE;
19707 +
19708 +               for (head = &qd->qd_le_list, tmp = head->next;
19709 +                    tmp != head;
19710 +                    tmp = tmp->next) {
19711 +                       tmp_ql = list_entry(tmp, struct gfs_quota_le, ql_data_list);
19712 +                       if (tmp_ql->ql_le.le_trans != tr)
19713 +                               continue;
19714 +
19715 +                       tmp_ql->ql_change += ql->ql_change;
19716 +
19717 +                       list_del(&le->le_list);
19718 +                       gfs_quota_put(sdp, qd);
19719 +                       kfree(ql);
19720 +
19721 +                       if (!tmp_ql->ql_change) {
19722 +                               list_del(&tmp_ql->ql_data_list);
19723 +                               list_del(&tmp_ql->ql_le.le_list);
19724 +                               gfs_quota_put(sdp, tmp_ql->ql_data);
19725 +                               kfree(tmp_ql);
19726 +                               tr->tr_num_q--;
19727 +                       }
19728 +
19729 +                       found = TRUE;
19730 +                       break;
19731 +               }
19732 +
19733 +               if (!found) {
19734 +                       le->le_trans = tr;
19735 +                       list_move(&le->le_list, &tr->tr_elements);
19736 +                       tr->tr_num_q++;
19737 +                       list_add(&ql->ql_data_list, &qd->qd_le_list);
19738 +               }
19739 +       }
19740 +}
19741 +
19742 +/**
19743 + * quota_add_to_ail - Add this LE to the AIL
19744 + * @sdp: the filesystem
19745 + * @le: the log element
19746 + *
19747 + */
19748 +
19749 +static void
19750 +quota_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
19751 +{
19752 +       struct gfs_quota_le *ql = container_of(le, struct gfs_quota_le, ql_le);
19753 +       struct gfs_quota_data *qd = ql->ql_data;
19754 +
19755 +       qd->qd_change_od += ql->ql_change;
19756 +       if (qd->qd_change_od) {
19757 +               if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) {
19758 +                       gfs_quota_hold(sdp, qd);
19759 +                       set_bit(QDF_OD_LIST, &qd->qd_flags);
19760 +                       atomic_inc(&sdp->sd_quota_od_count);
19761 +               }
19762 +       } else {
19763 +               GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags), sdp,);
19764 +               clear_bit(QDF_OD_LIST, &qd->qd_flags);
19765 +               gfs_quota_put(sdp, qd);
19766 +               GFS_ASSERT_SBD(atomic_read(&sdp->sd_quota_od_count), sdp,);
19767 +               atomic_dec(&sdp->sd_quota_od_count);
19768 +       }
19769 +
19770 +       list_del(&ql->ql_data_list);
19771 +       list_del(&le->le_list);
19772 +       gfs_quota_put(sdp, qd);
19773 +       kfree(ql);
19774 +}
19775 +
19776 +/**
19777 + * quota_clean_dump - clean up a LE after a log dump
19778 + * @sdp: the filesystem
19779 + * @le: the log element
19780 + *
19781 + */
19782 +
19783 +static void
19784 +quota_clean_dump(struct gfs_sbd *sdp, struct gfs_log_element *le)
19785 +{
19786 +       le->le_trans = NULL;
19787 +       list_del_init(&le->le_list);
19788 +}
19789 +
19790 +/**
19791 + * quota_trans_size - compute how much space the LE class takes up in a transaction
19792 + * @sdp: the filesystem
19793 + * @tr: the transaction
19794 + * @mblks: the number of regular metadata blocks
19795 + * @eblks: the number of extra blocks
19796 + * @blocks: the number of log blocks
19797 + * @bmem: the number of buffer-sized chunks of memory we need
19798 + *
19799 + */
19800 +
19801 +static void
19802 +quota_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr,
19803 +                unsigned int *mblks, unsigned int *eblks,
19804 +                unsigned int *blocks, unsigned int *bmem)
19805 +{
19806 +       unsigned int qblks;
19807 +
19808 +       if (tr->tr_num_q) {
19809 +               qblks = gfs_struct2blk(sdp, tr->tr_num_q,
19810 +                                      sizeof(struct gfs_quota_tag));
19811 +
19812 +               if (eblks)
19813 +                       *eblks += qblks;
19814 +               if (blocks)
19815 +                       *blocks += qblks;
19816 +               if (bmem)
19817 +                       *bmem += qblks;
19818 +       }
19819 +}
19820 +
19821 +/**
19822 + * quota_trans_combine - combine to incore transactions
19823 + * @sdp: the filesystem
19824 + * @tr: the surviving transaction
19825 + * @new_tr: the transaction that's going to disappear
19826 + *
19827 + */
19828 +
19829 +static void
19830 +quota_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
19831 +                   struct gfs_trans *new_tr)
19832 +{
19833 +       tr->tr_num_q += new_tr->tr_num_q;
19834 +}
19835 +
19836 +/**
19837 + * quota_build_bhlist - create the buffers that will make up the ondisk part of a transaction
19838 + * @sdp: the filesystem
19839 + * @tr: the transaction
19840 + *
19841 + */
19842 +
19843 +static void
19844 +quota_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr)
19845 +{
19846 +       struct list_head *tmp, *head;
19847 +       struct gfs_log_element *le;
19848 +       struct gfs_quota_le *ql;
19849 +       struct gfs_log_descriptor desc;
19850 +       struct gfs_quota_tag tag;
19851 +       struct gfs_log_buf *lb;
19852 +       unsigned int offset = sizeof(struct gfs_log_descriptor), entries = 0;
19853 +
19854 +       if (!tr->tr_num_q && !(tr->tr_flags & TRF_LOG_DUMP))
19855 +               return;
19856 +
19857 +       lb = gfs_log_get_buf(sdp, tr);
19858 +
19859 +       memset(&desc, 0, sizeof(struct gfs_log_descriptor));
19860 +       desc.ld_header.mh_magic = GFS_MAGIC;
19861 +       desc.ld_header.mh_type = GFS_METATYPE_LD;
19862 +       desc.ld_header.mh_format = GFS_FORMAT_LD;
19863 +       desc.ld_type = GFS_LOG_DESC_Q;
19864 +       desc.ld_length = gfs_struct2blk(sdp, tr->tr_num_q,
19865 +                                       sizeof(struct gfs_quota_tag));
19866 +       desc.ld_data1 = tr->tr_num_q;
19867 +       desc.ld_data2 = (tr->tr_flags & TRF_LOG_DUMP) ? TRUE : FALSE;
19868 +       gfs_desc_out(&desc, lb->lb_bh.b_data);
19869 +
19870 +       for (head = &tr->tr_elements, tmp = head->next;
19871 +            tmp != head;
19872 +            tmp = tmp->next) {
19873 +               le = list_entry(tmp, struct gfs_log_element, le_list);
19874 +               if (le->le_ops != &gfs_quota_lops)
19875 +                       continue;
19876 +
19877 +               ql = container_of(le, struct gfs_quota_le, ql_le);
19878 +
19879 +               if (offset + sizeof(struct gfs_quota_tag) >
19880 +                   sdp->sd_sb.sb_bsize) {
19881 +                       offset = 0;
19882 +                       lb = gfs_log_get_buf(sdp, tr);
19883 +               }
19884 +
19885 +               memset(&tag, 0, sizeof(struct gfs_quota_tag));
19886 +               tag.qt_change = ql->ql_change;
19887 +               tag.qt_flags = (test_bit(QDF_USER, &ql->ql_data->qd_flags)) ?
19888 +                       GFS_QTF_USER : 0;
19889 +               tag.qt_id = ql->ql_data->qd_id;
19890 +
19891 +               gfs_quota_tag_out(&tag, lb->lb_bh.b_data + offset);
19892 +
19893 +               offset += sizeof(struct gfs_quota_tag);
19894 +               entries++;
19895 +       }
19896 +
19897 +       GFS_ASSERT_SBD(entries == tr->tr_num_q, sdp,);
19898 +}
19899 +
19900 +/**
19901 + * quota_dump_size - compute how much space the LE class takes up in a log dump
19902 + * @sdp: the filesystem
19903 + * @elements: the number of log elements in the dump
19904 + * @blocks: the number of blocks in the dump
19905 + * @bmem: the number of buffer-sized chunks of memory we need
19906 + *
19907 + */
19908 +
19909 +static void
19910 +quota_dump_size(struct gfs_sbd *sdp, unsigned int *elements,
19911 +               unsigned int *blocks, unsigned int *bmem)
19912 +{
19913 +       unsigned int c = atomic_read(&sdp->sd_quota_od_count);
19914 +       unsigned int b = gfs_struct2blk(sdp, c, sizeof(struct gfs_quota_tag));
19915 +
19916 +       if (elements)
19917 +               *elements += c;
19918 +       if (blocks)
19919 +               *blocks += b;
19920 +       if (bmem)
19921 +               *bmem += b;
19922 +}
19923 +
19924 +/**
19925 + * quota_build_dump - create a transaction that represents a log dump for this LE class
19926 + * @sdp: the filesystem
19927 + * @tr: the transaction to fill
19928 + *
19929 + */
19930 +
19931 +static void
19932 +quota_build_dump(struct gfs_sbd *sdp, struct gfs_trans *tr)
19933 +{
19934 +       struct list_head *tmp, *head;
19935 +       struct gfs_quota_data *qd;
19936 +       struct gfs_quota_le *ql;
19937 +       unsigned int x = 0;
19938 +
19939 +       tr->tr_num_q = atomic_read(&sdp->sd_quota_od_count);
19940 +
19941 +       spin_lock(&sdp->sd_quota_lock);
19942 +
19943 +       for (head = &sdp->sd_quota_list, tmp = head->next;
19944 +            tmp != head;
19945 +            tmp = tmp->next) {
19946 +               qd = list_entry(tmp, struct gfs_quota_data, qd_list);
19947 +               if (!test_bit(QDF_OD_LIST, &qd->qd_flags))
19948 +                       continue;
19949 +
19950 +               ql = &qd->qd_ondisk_ql;
19951 +
19952 +               ql->ql_le.le_ops = &gfs_quota_lops;
19953 +               GFS_ASSERT_SBD(!ql->ql_le.le_trans, sdp,);
19954 +               ql->ql_le.le_trans = tr;
19955 +               list_add(&ql->ql_le.le_list, &tr->tr_elements);
19956 +
19957 +               ql->ql_data = qd;
19958 +               ql->ql_change = qd->qd_change_od;
19959 +
19960 +               x++;
19961 +       }
19962 +
19963 +       spin_unlock(&sdp->sd_quota_lock);
19964 +
19965 +       GFS_ASSERT_SBD(x == atomic_read(&sdp->sd_quota_od_count), sdp,);
19966 +}
19967 +
19968 +/**
19969 + * quota_before_scan - called before a log dump is recovered
19970 + * @sdp: the filesystem
19971 + * @jid: the journal ID about to be scanned
19972 + * @head: the current head of the log
19973 + * @pass: the pass through the journal
19974 + *
19975 + */
19976 +
19977 +static void
19978 +quota_before_scan(struct gfs_sbd *sdp, unsigned int jid,
19979 +                 struct gfs_log_header *head, unsigned int pass)
19980 +{
19981 +       if (pass == GFS_RECPASS_B1)
19982 +               clear_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags);
19983 +}
19984 +
19985 +/**
19986 + * quota_scan_elements - scan quota inodes from the journal
19987 + * @sdp: the filesystem
19988 + * @jdesc: the struct gfs_jindex structure for the journal being scaned
19989 + * @gl: the journal's glock
19990 + * @start: the starting block of the descriptor
19991 + * @desc: the descriptor structure
19992 + * @pass: the pass through the journal
19993 + *
19994 + * Returns: 0 on success, -EXXX on failure
19995 + */
19996 +
19997 +static int
19998 +quota_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19999 +                   struct gfs_glock *gl, uint64_t start,
20000 +                   struct gfs_log_descriptor *desc, unsigned int pass)
20001 +{
20002 +       struct gfs_quota_tag tag;
20003 +       struct buffer_head *bh;
20004 +       unsigned int num_tags = desc->ld_data1;
20005 +       unsigned int offset = sizeof(struct gfs_log_descriptor);
20006 +       unsigned int x;
20007 +       int error;
20008 +
20009 +       if (pass != GFS_RECPASS_B1)
20010 +               return 0;
20011 +       if (desc->ld_type != GFS_LOG_DESC_Q)
20012 +               return 0;
20013 +
20014 +       if (test_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags))
20015 +               GFS_ASSERT_SBD(!desc->ld_data2, sdp,);
20016 +       else {
20017 +               GFS_ASSERT_SBD(desc->ld_data2, sdp,);
20018 +               set_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags);
20019 +       }
20020 +
20021 +       if (!num_tags)
20022 +               return 0;
20023 +
20024 +       for (x = 0; x < desc->ld_length; x++) {
20025 +               error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh);
20026 +               if (error)
20027 +                       return error;
20028 +
20029 +               while (offset + sizeof(struct gfs_quota_tag) <=
20030 +                      sdp->sd_sb.sb_bsize) {
20031 +                       gfs_quota_tag_in(&tag, bh->b_data + offset);
20032 +
20033 +                       error = gfs_quota_merge(sdp, &tag);
20034 +                       if (error)
20035 +                               goto out_drelse;
20036 +
20037 +                       if (!--num_tags)
20038 +                               goto out_drelse;
20039 +
20040 +                       offset += sizeof(struct gfs_quota_tag);
20041 +               }
20042 +
20043 +               brelse(bh);
20044 +
20045 +               error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
20046 +               if (error)
20047 +                       return error;
20048 +
20049 +               offset = 0;
20050 +       }
20051 +
20052 +       return 0;
20053 +
20054 + out_drelse:
20055 +       brelse(bh);
20056 +
20057 +       return error;
20058 +}
20059 +
20060 +/**
20061 + * quota_after_scan - called after a log dump is recovered
20062 + * @sdp: the filesystem
20063 + * @jid: the journal ID about to be scanned
20064 + * @pass: the pass through the journal
20065 + *
20066 + */
20067 +
20068 +static void
20069 +quota_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass)
20070 +{
20071 +       if (pass == GFS_RECPASS_B1) {
20072 +               GFS_ASSERT_SBD(!sdp->sd_sb.sb_quota_di.no_formal_ino ||
20073 +                              test_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags),
20074 +                              sdp,);
20075 +               printk("GFS: fsid=%s: Found quota changes for %d IDs\n",
20076 +                      sdp->sd_fsname, atomic_read(&sdp->sd_quota_od_count));
20077 +       }
20078 +}
20079 +
20080 +struct gfs_log_operations gfs_glock_lops = {
20081 +       .lo_add = generic_le_add,
20082 +       .lo_trans_end = glock_trans_end,
20083 +       .lo_print = glock_print,
20084 +       .lo_overlap_trans = glock_overlap_trans,
20085 +       .lo_incore_commit = glock_incore_commit,
20086 +       .lo_add_to_ail = glock_add_to_ail,
20087 +       .lo_trans_combine = glock_trans_combine,
20088 +       .lo_name = "glock"
20089 +};
20090 +
20091 +struct gfs_log_operations gfs_buf_lops = {
20092 +       .lo_add = generic_le_add,
20093 +       .lo_print = buf_print,
20094 +       .lo_incore_commit = buf_incore_commit,
20095 +       .lo_add_to_ail = buf_add_to_ail,
20096 +       .lo_trans_size = buf_trans_size,
20097 +       .lo_trans_combine = buf_trans_combine,
20098 +       .lo_build_bhlist = buf_build_bhlist,
20099 +       .lo_before_scan = buf_before_scan,
20100 +       .lo_scan_elements = buf_scan_elements,
20101 +       .lo_after_scan = buf_after_scan,
20102 +       .lo_name = "buf"
20103 +};
20104 +
20105 +struct gfs_log_operations gfs_unlinked_lops = {
20106 +       .lo_add = generic_le_add,
20107 +       .lo_print = unlinked_print,
20108 +       .lo_incore_commit = unlinked_incore_commit,
20109 +       .lo_add_to_ail = unlinked_add_to_ail,
20110 +       .lo_clean_dump = unlinked_clean_dump,
20111 +       .lo_trans_size = unlinked_trans_size,
20112 +       .lo_trans_combine = unlinked_trans_combine,
20113 +       .lo_build_bhlist = unlinked_build_bhlist,
20114 +       .lo_dump_size = unlinked_dump_size,
20115 +       .lo_build_dump = unlinked_build_dump,
20116 +       .lo_before_scan = unlinked_before_scan,
20117 +       .lo_scan_elements = unlinked_scan_elements,
20118 +       .lo_after_scan = unlinked_after_scan,
20119 +       .lo_name = "unlinked"
20120 +};
20121 +
20122 +struct gfs_log_operations gfs_quota_lops = {
20123 +       .lo_add = generic_le_add,
20124 +       .lo_print = quota_print,
20125 +       .lo_incore_commit = quota_incore_commit,
20126 +       .lo_add_to_ail = quota_add_to_ail,
20127 +       .lo_clean_dump = quota_clean_dump,
20128 +       .lo_trans_size = quota_trans_size,
20129 +       .lo_trans_combine = quota_trans_combine,
20130 +       .lo_build_bhlist = quota_build_bhlist,
20131 +       .lo_dump_size = quota_dump_size,
20132 +       .lo_build_dump = quota_build_dump,
20133 +       .lo_before_scan = quota_before_scan,
20134 +       .lo_scan_elements = quota_scan_elements,
20135 +       .lo_after_scan = quota_after_scan,
20136 +       .lo_name = "quota"
20137 +};
20138 +
20139 +struct gfs_log_operations *gfs_log_ops[] = {
20140 +       &gfs_glock_lops,
20141 +       &gfs_buf_lops,
20142 +       &gfs_unlinked_lops,
20143 +       &gfs_quota_lops,
20144 +       NULL
20145 +};
20146 diff -urN linux-orig/fs/gfs/lops.h linux-patched/fs/gfs/lops.h
20147 --- linux-orig/fs/gfs/lops.h    1969-12-31 18:00:00.000000000 -0600
20148 +++ linux-patched/fs/gfs/lops.h 2004-06-30 13:27:49.348709970 -0500
20149 @@ -0,0 +1,179 @@
20150 +/******************************************************************************
20151 +*******************************************************************************
20152 +**
20153 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20154 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20155 +**
20156 +**  This copyrighted material is made available to anyone wishing to use,
20157 +**  modify, copy, or redistribute it subject to the terms and conditions
20158 +**  of the GNU General Public License v.2.
20159 +**
20160 +*******************************************************************************
20161 +******************************************************************************/
20162 +
20163 +#ifndef __LOPS_DOT_H__
20164 +#define __LOPS_DOT_H__
20165 +
20166 +extern struct gfs_log_operations gfs_glock_lops;
20167 +extern struct gfs_log_operations gfs_buf_lops;
20168 +extern struct gfs_log_operations gfs_unlinked_lops;
20169 +extern struct gfs_log_operations gfs_quota_lops;
20170 +
20171 +extern struct gfs_log_operations *gfs_log_ops[];
20172 +
20173 +#define INIT_LE(le, lops) \
20174 +do \
20175 +{ \
20176 +  (le)->le_ops = (lops); \
20177 +  (le)->le_trans = NULL; \
20178 +  INIT_LIST_HEAD(&(le)->le_list); \
20179 +} \
20180 +while (0)
20181 +
20182 +#define LO_ADD(sdp, le) \
20183 +do \
20184 +{ \
20185 +  if ((le)->le_ops->lo_add) \
20186 +    (le)->le_ops->lo_add((sdp), (le)); \
20187 +} \
20188 +while (0)
20189 +
20190 +#define LO_TRANS_END(sdp, le) \
20191 +do \
20192 +{ \
20193 +  if ((le)->le_ops->lo_trans_end) \
20194 +    (le)->le_ops->lo_trans_end((sdp), (le)); \
20195 +} \
20196 +while (0)
20197 +
20198 +#define LO_PRINT(sdp, le, where) \
20199 +do \
20200 +{ \
20201 +  if ((le)->le_ops->lo_print) \
20202 +    (le)->le_ops->lo_print((sdp), (le), (where)); \
20203 +} \
20204 +while (0)
20205 +
20206 +static __inline__ struct gfs_trans *
20207 +LO_OVERLAP_TRANS(struct gfs_sbd *sdp, struct gfs_log_element *le)
20208 +{
20209 +       if (le->le_ops->lo_overlap_trans)
20210 +               return le->le_ops->lo_overlap_trans(sdp, le);
20211 +       else
20212 +               return NULL;
20213 +}
20214 +
20215 +#define LO_INCORE_COMMIT(sdp, tr, le) \
20216 +do \
20217 +{ \
20218 +  if ((le)->le_ops->lo_incore_commit) \
20219 +    (le)->le_ops->lo_incore_commit((sdp), (tr), (le)); \
20220 +} \
20221 +while (0)
20222 +
20223 +#define LO_ADD_TO_AIL(sdp, le) \
20224 +do \
20225 +{ \
20226 +  if ((le)->le_ops->lo_add_to_ail) \
20227 +    (le)->le_ops->lo_add_to_ail((sdp), (le)); \
20228 +} \
20229 +while (0)
20230 +
20231 +#define LO_CLEAN_DUMP(sdp, le) \
20232 +do \
20233 +{ \
20234 +  if ((le)->le_ops->lo_clean_dump) \
20235 +    (le)->le_ops->lo_clean_dump((sdp), (le)); \
20236 +} \
20237 +while (0)
20238 +
20239 +#define LO_TRANS_SIZE(sdp, tr, mblks, eblks, blocks, bmem) \
20240 +do \
20241 +{ \
20242 +  int __lops_x; \
20243 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20244 +    if (gfs_log_ops[__lops_x]->lo_trans_size) \
20245 +      gfs_log_ops[__lops_x]->lo_trans_size((sdp), (tr), (mblks), (eblks), (blocks), (bmem)); \
20246 +} \
20247 +while (0)
20248 +
20249 +#define LO_TRANS_COMBINE(sdp, tr, new_tr) \
20250 +do \
20251 +{ \
20252 +  int __lops_x; \
20253 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20254 +    if (gfs_log_ops[__lops_x]->lo_trans_combine) \
20255 +      gfs_log_ops[__lops_x]->lo_trans_combine((sdp), (tr), (new_tr)); \
20256 +} \
20257 +while (0)
20258 +
20259 +#define LO_BUILD_BHLIST(sdp, tr) \
20260 +do \
20261 +{ \
20262 +  int __lops_x; \
20263 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20264 +    if (gfs_log_ops[__lops_x]->lo_build_bhlist) \
20265 +      gfs_log_ops[__lops_x]->lo_build_bhlist((sdp), (tr)); \
20266 +} \
20267 +while (0)
20268 +
20269 +#define LO_DUMP_SIZE(sdp, elements, blocks, bmem) \
20270 +do \
20271 +{ \
20272 +  int __lops_x; \
20273 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20274 +    if (gfs_log_ops[__lops_x]->lo_dump_size) \
20275 +      gfs_log_ops[__lops_x]->lo_dump_size((sdp), (elements), (blocks), (bmem)); \
20276 +} \
20277 +while (0)
20278 +
20279 +#define LO_BUILD_DUMP(sdp, tr) \
20280 +do \
20281 +{ \
20282 +  int __lops_x; \
20283 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20284 +    if (gfs_log_ops[__lops_x]->lo_build_dump) \
20285 +      gfs_log_ops[__lops_x]->lo_build_dump((sdp), (tr)); \
20286 +} \
20287 +while (0)
20288 +
20289 +#define LO_BEFORE_SCAN(sdp, jid, head, pass) \
20290 +do \
20291 +{ \
20292 +  int __lops_x; \
20293 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20294 +    if (gfs_log_ops[__lops_x]->lo_before_scan) \
20295 +      gfs_log_ops[__lops_x]->lo_before_scan((sdp), (jid), (head), (pass)); \
20296 +} \
20297 +while (0)
20298 +
20299 +static __inline__ int
20300 +LO_SCAN_ELEMENTS(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
20301 +                struct gfs_glock *gl, uint64_t start,
20302 +                struct gfs_log_descriptor *desc, unsigned int pass)
20303 +{
20304 +       int x;
20305 +       int error;
20306 +
20307 +       for (x = 0; gfs_log_ops[x]; x++)
20308 +               if (gfs_log_ops[x]->lo_scan_elements) {
20309 +                       error = gfs_log_ops[x]->lo_scan_elements(sdp, jdesc, gl,
20310 +                                                                start, desc, pass);
20311 +                       if (error)
20312 +                               return error;
20313 +               }
20314 +
20315 +       return 0;
20316 +}
20317 +
20318 +#define LO_AFTER_SCAN(sdp, jid, pass) \
20319 +do \
20320 +{ \
20321 +  int __lops_x; \
20322 +  for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20323 +    if (gfs_log_ops[__lops_x]->lo_after_scan) \
20324 +      gfs_log_ops[__lops_x]->lo_after_scan((sdp), (jid), (pass)); \
20325 +} \
20326 +while (0)
20327 +
20328 +#endif /* __LOPS_DOT_H__ */
20329 diff -urN linux-orig/fs/gfs/lvb.c linux-patched/fs/gfs/lvb.c
20330 --- linux-orig/fs/gfs/lvb.c     1969-12-31 18:00:00.000000000 -0600
20331 +++ linux-patched/fs/gfs/lvb.c  2004-06-30 13:27:49.349709738 -0500
20332 @@ -0,0 +1,148 @@
20333 +/******************************************************************************
20334 +*******************************************************************************
20335 +**
20336 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20337 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20338 +**
20339 +**  This copyrighted material is made available to anyone wishing to use,
20340 +**  modify, copy, or redistribute it subject to the terms and conditions
20341 +**  of the GNU General Public License v.2.
20342 +**
20343 +*******************************************************************************
20344 +******************************************************************************/
20345 +
20346 +#include <linux/sched.h>
20347 +#include <linux/slab.h>
20348 +#include <linux/smp_lock.h>
20349 +#include <linux/spinlock.h>
20350 +#include <asm/semaphore.h>
20351 +#include <linux/completion.h>
20352 +#include <linux/buffer_head.h>
20353 +
20354 +#include "gfs.h"
20355 +
20356 +#define pv(struct, member, fmt) printk("  "#member" = "fmt"\n", struct->member);
20357 +
20358 +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));}
20359 +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));}
20360 +#define CPIN_16(s1, s2, member) {(s1->member) = gfs16_to_cpu((s2->member));}
20361 +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs16((s1->member));}
20362 +#define CPIN_32(s1, s2, member) {(s1->member) = gfs32_to_cpu((s2->member));}
20363 +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs32((s1->member));}
20364 +#define CPIN_64(s1, s2, member) {(s1->member) = gfs64_to_cpu((s2->member));}
20365 +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs64((s1->member));}
20366 +
20367 +/**
20368 + * gfs_rgrp_lvb_in - Read in rgrp data
20369 + * @rb: the cpu-order structure
20370 + * @lvb: the lvb
20371 + *
20372 + */
20373 +
20374 +void
20375 +gfs_rgrp_lvb_in(struct gfs_rgrp_lvb *rb, char *lvb)
20376 +{
20377 +       struct gfs_rgrp_lvb *str = (struct gfs_rgrp_lvb *)lvb;
20378 +
20379 +       CPIN_32(rb, str, rb_magic);
20380 +       CPIN_32(rb, str, rb_free);
20381 +       CPIN_32(rb, str, rb_useddi);
20382 +       CPIN_32(rb, str, rb_freedi);
20383 +       CPIN_32(rb, str, rb_usedmeta);
20384 +       CPIN_32(rb, str, rb_freemeta);
20385 +}
20386 +
20387 +/**
20388 + * gfs_rgrp_lvb_out - Write out rgrp data
20389 + * @rb: the cpu-order structure
20390 + * @lvb: the lvb
20391 + *
20392 + */
20393 +
20394 +void
20395 +gfs_rgrp_lvb_out(struct gfs_rgrp_lvb *rb, char *lvb)
20396 +{
20397 +       struct gfs_rgrp_lvb *str = (struct gfs_rgrp_lvb *)lvb;
20398 +
20399 +       CPOUT_32(rb, str, rb_magic);
20400 +       CPOUT_32(rb, str, rb_free);
20401 +       CPOUT_32(rb, str, rb_useddi);
20402 +       CPOUT_32(rb, str, rb_freedi);
20403 +       CPOUT_32(rb, str, rb_usedmeta);
20404 +       CPOUT_32(rb, str, rb_freemeta);
20405 +}
20406 +
20407 +/**
20408 + * gfs_rgrp_lvb_print - Print out rgrp data
20409 + * @rb: the cpu-order structure
20410 + * @console - TRUE if this should be printed to the console,
20411 + *            FALSE if it should be just printed to the incore debug
20412 + *            buffer
20413 + */
20414 +
20415 +void
20416 +gfs_rgrp_lvb_print(struct gfs_rgrp_lvb *rb)
20417 +{
20418 +       pv(rb, rb_magic, "%u");
20419 +       pv(rb, rb_free, "%u");
20420 +       pv(rb, rb_useddi, "%u");
20421 +       pv(rb, rb_freedi, "%u");
20422 +       pv(rb, rb_usedmeta, "%u");
20423 +       pv(rb, rb_freemeta, "%u");
20424 +}
20425 +
20426 +/**
20427 + * gfs_quota_lvb_in - Read in quota data
20428 + * @rb: the cpu-order structure
20429 + * @lvb: the lvb
20430 + *
20431 + */
20432 +
20433 +void
20434 +gfs_quota_lvb_in(struct gfs_quota_lvb *qb, char *lvb)
20435 +{
20436 +       struct gfs_quota_lvb *str = (struct gfs_quota_lvb *)lvb;
20437 +
20438 +       CPIN_32(qb, str, qb_magic);
20439 +       CPIN_32(qb, str, qb_pad);
20440 +       CPIN_64(qb, str, qb_limit);
20441 +       CPIN_64(qb, str, qb_warn);
20442 +       CPIN_64(qb, str, qb_value);
20443 +}
20444 +
20445 +/**
20446 + * gfs_quota_lvb_out - Write out quota data
20447 + * @rb: the cpu-order structure
20448 + * @lvb: the lvb
20449 + *
20450 + */
20451 +
20452 +void
20453 +gfs_quota_lvb_out(struct gfs_quota_lvb *qb, char *lvb)
20454 +{
20455 +       struct gfs_quota_lvb *str = (struct gfs_quota_lvb *)lvb;
20456 +
20457 +       CPOUT_32(qb, str, qb_magic);
20458 +       CPOUT_32(qb, str, qb_pad);
20459 +       CPOUT_64(qb, str, qb_limit);
20460 +       CPOUT_64(qb, str, qb_warn);
20461 +       CPOUT_64(qb, str, qb_value);
20462 +}
20463 +
20464 +/**
20465 + * gfs_quota_lvb_print - Print out quota data
20466 + * @rb: the cpu-order structure
20467 + * @console - TRUE if this should be printed to the console,
20468 + *            FALSE if it should be just printed to the incore debug
20469 + *            buffer
20470 + */
20471 +
20472 +void
20473 +gfs_quota_lvb_print(struct gfs_quota_lvb *qb)
20474 +{
20475 +       pv(qb, qb_magic, "%u");
20476 +       pv(qb, qb_pad, "%u");
20477 +       pv(qb, qb_limit, "%"PRIu64);
20478 +       pv(qb, qb_warn, "%"PRIu64);
20479 +       pv(qb, qb_value, "%"PRId64);
20480 +}
20481 diff -urN linux-orig/fs/gfs/lvb.h linux-patched/fs/gfs/lvb.h
20482 --- linux-orig/fs/gfs/lvb.h     1969-12-31 18:00:00.000000000 -0600
20483 +++ linux-patched/fs/gfs/lvb.h  2004-06-30 13:27:49.349709738 -0500
20484 @@ -0,0 +1,48 @@
20485 +/******************************************************************************
20486 +*******************************************************************************
20487 +**
20488 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20489 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20490 +**
20491 +**  This copyrighted material is made available to anyone wishing to use,
20492 +**  modify, copy, or redistribute it subject to the terms and conditions
20493 +**  of the GNU General Public License v.2.
20494 +**
20495 +*******************************************************************************
20496 +******************************************************************************/
20497 +
20498 +#ifndef __LVB_DOT_H__
20499 +#define __LVB_DOT_H__
20500 +
20501 +#define GFS_MIN_LVB_SIZE (32)
20502 +
20503 +struct gfs_rgrp_lvb {
20504 +       uint32_t rb_magic;
20505 +       uint32_t rb_free;
20506 +       uint32_t rb_useddi;
20507 +       uint32_t rb_freedi;
20508 +       uint32_t rb_usedmeta;
20509 +       uint32_t rb_freemeta;
20510 +};
20511 +
20512 +struct gfs_quota_lvb {
20513 +       uint32_t qb_magic;
20514 +       uint32_t qb_pad;
20515 +       uint64_t qb_limit;
20516 +       uint64_t qb_warn;
20517 +       int64_t qb_value;
20518 +};
20519 +
20520 +/*  Translation functions  */
20521 +
20522 +void gfs_rgrp_lvb_in(struct gfs_rgrp_lvb *rb, char *lvb);
20523 +void gfs_rgrp_lvb_out(struct gfs_rgrp_lvb *rb, char *lvb);
20524 +void gfs_quota_lvb_in(struct gfs_quota_lvb *qb, char *lvb);
20525 +void gfs_quota_lvb_out(struct gfs_quota_lvb *qb, char *lvb);
20526 +
20527 +/*  Printing functions  */
20528 +
20529 +void gfs_rgrp_lvb_print(struct gfs_rgrp_lvb *rb);
20530 +void gfs_quota_lvb_print(struct gfs_quota_lvb *qb);
20531 +
20532 +#endif /* __LVB_DOT_H__ */
20533 diff -urN linux-orig/fs/gfs/main.c linux-patched/fs/gfs/main.c
20534 --- linux-orig/fs/gfs/main.c    1969-12-31 18:00:00.000000000 -0600
20535 +++ linux-patched/fs/gfs/main.c 2004-06-30 13:27:49.349709738 -0500
20536 @@ -0,0 +1,142 @@
20537 +/******************************************************************************
20538 +*******************************************************************************
20539 +**
20540 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20541 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20542 +**
20543 +**  This copyrighted material is made available to anyone wishing to use,
20544 +**  modify, copy, or redistribute it subject to the terms and conditions
20545 +**  of the GNU General Public License v.2.
20546 +**
20547 +*******************************************************************************
20548 +******************************************************************************/
20549 +
20550 +#include <linux/sched.h>
20551 +#include <linux/slab.h>
20552 +#include <linux/smp_lock.h>
20553 +#include <linux/spinlock.h>
20554 +#include <asm/semaphore.h>
20555 +#include <linux/completion.h>
20556 +#include <linux/buffer_head.h>
20557 +#include <linux/proc_fs.h>
20558 +#include <linux/module.h>
20559 +#include <linux/init.h>
20560 +
20561 +#include "gfs.h"
20562 +#include "mount.h"
20563 +#include "ops_fstype.h"
20564 +
20565 +struct proc_dir_entry *gfs_proc_entry = NULL;
20566 +
20567 +/**
20568 + * init_gfs_fs - Register GFS as a filesystem
20569 + *
20570 + * Returns: 0 on success, error code on failure
20571 + */
20572 +
20573 +int __init
20574 +init_gfs_fs(void)
20575 +{
20576 +       int error = 0;
20577 +
20578 +       init_MUTEX(&gfs_mount_args_lock);
20579 +
20580 +       gfs_proc_entry = create_proc_read_entry("fs/gfs", S_IFREG | 0200, NULL, NULL, NULL);
20581 +       if (!gfs_proc_entry) {
20582 +               printk("GFS: can't register /proc/fs/gfs\n");
20583 +               error = -EINVAL;
20584 +               goto fail;
20585 +       }
20586 +       gfs_proc_entry->write_proc = gfs_proc_write;
20587 +
20588 +       gfs_random_number = xtime.tv_nsec;
20589 +
20590 +       gfs_glock_cachep = kmem_cache_create("gfs_glock", sizeof(struct gfs_glock),
20591 +                                            0, 0,
20592 +                                            NULL, NULL);
20593 +       if (!gfs_glock_cachep)
20594 +               goto fail2;
20595 +
20596 +       gfs_inode_cachep = kmem_cache_create("gfs_inode", sizeof(struct gfs_inode),
20597 +                                            0, 0,
20598 +                                            NULL, NULL);
20599 +       if (!gfs_inode_cachep)
20600 +               goto fail2;
20601 +
20602 +       gfs_bufdata_cachep = kmem_cache_create("gfs_bufdata", sizeof(struct gfs_bufdata),
20603 +                                              0, 0,
20604 +                                              NULL, NULL);
20605 +       if (!gfs_bufdata_cachep)
20606 +               goto fail2;
20607 +
20608 +       gfs_mhc_cachep = kmem_cache_create("gfs_meta_header_cache", sizeof(struct gfs_meta_header_cache),
20609 +                                          0, 0,
20610 +                                          NULL, NULL);
20611 +       if (!gfs_mhc_cachep)
20612 +               goto fail2;
20613 +
20614 +       error = register_filesystem(&gfs_fs_type);
20615 +       if (error)
20616 +               goto fail2;
20617 +
20618 +       printk("GFS %s (built %s %s) installed\n",
20619 +              GFS_RELEASE_NAME, __DATE__, __TIME__);
20620 +
20621 +       return 0;
20622 +
20623 +      fail2:
20624 +       if (gfs_mhc_cachep)
20625 +               kmem_cache_destroy(gfs_mhc_cachep);
20626 +
20627 +       if (gfs_bufdata_cachep)
20628 +               kmem_cache_destroy(gfs_bufdata_cachep);
20629 +
20630 +       if (gfs_inode_cachep)
20631 +               kmem_cache_destroy(gfs_inode_cachep);
20632 +
20633 +       if (gfs_glock_cachep)
20634 +               kmem_cache_destroy(gfs_glock_cachep);
20635 +
20636 +       down(&gfs_mount_args_lock);
20637 +       if (gfs_mount_args) {
20638 +               kfree(gfs_mount_args);
20639 +               gfs_mount_args = NULL;
20640 +       }
20641 +       up(&gfs_mount_args_lock);
20642 +       remove_proc_entry("fs/gfs", NULL);
20643 +
20644 +      fail:
20645 +       return error;
20646 +}
20647 +
20648 +/**
20649 + * exit_gfs_fs - Unregister the file system
20650 + *
20651 + */
20652 +
20653 +void __exit
20654 +exit_gfs_fs(void)
20655 +{
20656 +       unregister_filesystem(&gfs_fs_type);
20657 +
20658 +       kmem_cache_destroy(gfs_mhc_cachep);
20659 +       kmem_cache_destroy(gfs_bufdata_cachep);
20660 +       kmem_cache_destroy(gfs_inode_cachep);
20661 +       kmem_cache_destroy(gfs_glock_cachep);
20662 +
20663 +       down(&gfs_mount_args_lock);
20664 +       if (gfs_mount_args) {
20665 +               kfree(gfs_mount_args);
20666 +               gfs_mount_args = NULL;
20667 +       }
20668 +       up(&gfs_mount_args_lock);
20669 +       remove_proc_entry("fs/gfs", NULL);
20670 +}
20671 +
20672 +MODULE_DESCRIPTION("Global File System " GFS_RELEASE_NAME);
20673 +MODULE_AUTHOR("Red Hat, Inc.");
20674 +MODULE_LICENSE("GPL");
20675 +
20676 +module_init(init_gfs_fs);
20677 +module_exit(exit_gfs_fs);
20678 +
20679 diff -urN linux-orig/fs/gfs/mount.c linux-patched/fs/gfs/mount.c
20680 --- linux-orig/fs/gfs/mount.c   1969-12-31 18:00:00.000000000 -0600
20681 +++ linux-patched/fs/gfs/mount.c        2004-06-30 13:27:49.349709738 -0500
20682 @@ -0,0 +1,212 @@
20683 +/******************************************************************************
20684 +*******************************************************************************
20685 +**
20686 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20687 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20688 +**
20689 +**  This copyrighted material is made available to anyone wishing to use,
20690 +**  modify, copy, or redistribute it subject to the terms and conditions
20691 +**  of the GNU General Public License v.2.
20692 +**
20693 +*******************************************************************************
20694 +******************************************************************************/
20695 +
20696 +#include <linux/sched.h>
20697 +#include <linux/slab.h>
20698 +#include <linux/smp_lock.h>
20699 +#include <linux/spinlock.h>
20700 +#include <asm/semaphore.h>
20701 +#include <linux/completion.h>
20702 +#include <linux/buffer_head.h>
20703 +#include <linux/module.h>
20704 +#include <asm/uaccess.h>
20705 +
20706 +#include "gfs.h"
20707 +#include "mount.h"
20708 +
20709 +char *gfs_mount_args = NULL;
20710 +struct semaphore gfs_mount_args_lock;
20711 +
20712 +/**
20713 + * gfs_make_args - Parse mount arguments
20714 + * @data:
20715 + * @args:
20716 + *
20717 + * Return: 0 on success, -EXXX on failure
20718 + */
20719 +
20720 +int
20721 +gfs_make_args(char *data, struct gfs_args *args)
20722 +{
20723 +       char *options, *x, *y;
20724 +       int do_free = FALSE;
20725 +       int error = 0;
20726 +
20727 +       /*  If someone preloaded options, use those instead  */
20728 +
20729 +       down(&gfs_mount_args_lock);
20730 +       if (gfs_mount_args) {
20731 +               data = gfs_mount_args;
20732 +               gfs_mount_args = NULL;
20733 +               do_free = TRUE;
20734 +       }
20735 +       up(&gfs_mount_args_lock);
20736 +
20737 +       /*  Set some defaults  */
20738 +
20739 +       memset(args, 0, sizeof(struct gfs_args));
20740 +       args->ar_num_glockd = GFS_GLOCKD_DEFAULT;
20741 +
20742 +       /*  Split the options into tokens with the "," character and
20743 +           process them  */
20744 +
20745 +       for (options = data; (x = strsep(&options, ",")); ) {
20746 +               if (!*x)
20747 +                       continue;
20748 +
20749 +               y = strchr(x, '=');
20750 +               if (y)
20751 +                       *y++ = 0;
20752 +
20753 +               if (!strcmp(x, "lockproto")) {
20754 +                       if (!y) {
20755 +                               printk("GFS: need argument to lockproto\n");
20756 +                               error = -EINVAL;
20757 +                               break;
20758 +                       }
20759 +                       strncpy(args->ar_lockproto, y, 256);
20760 +                       args->ar_lockproto[255] = 0;
20761 +               }
20762 +
20763 +               else if (!strcmp(x, "locktable")) {
20764 +                       if (!y) {
20765 +                               printk("GFS: need argument to locktable\n");
20766 +                               error = -EINVAL;
20767 +                               break;
20768 +                       }
20769 +                       strncpy(args->ar_locktable, y, 256);
20770 +                       args->ar_locktable[255] = 0;
20771 +               }
20772 +
20773 +               else if (!strcmp(x, "hostdata")) {
20774 +                       if (!y) {
20775 +                               printk("GFS: need argument to hostdata\n");
20776 +                               error = -EINVAL;
20777 +                               break;
20778 +                       }
20779 +                       strncpy(args->ar_hostdata, y, 256);
20780 +                       args->ar_hostdata[255] = 0;
20781 +               }
20782 +
20783 +               else if (!strcmp(x, "ignore_local_fs"))
20784 +                       args->ar_ignore_local_fs = TRUE;
20785 +
20786 +               else if (!strcmp(x, "localflocks"))
20787 +                       args->ar_localflocks = TRUE;
20788 +
20789 +               else if (!strcmp(x, "localcaching"))
20790 +                       args->ar_localcaching = TRUE;
20791 +
20792 +               else if (!strcmp(x, "upgrade"))
20793 +                       args->ar_upgrade = TRUE;
20794 +
20795 +               else if (!strcmp(x, "num_glockd")) {
20796 +                       if (!y) {
20797 +                               printk("GFS: need argument to num_glockd\n");
20798 +                               error = -EINVAL;
20799 +                               break;
20800 +                       }
20801 +                       sscanf(y, "%u", &args->ar_num_glockd);
20802 +                       if (!args->ar_num_glockd || args->ar_num_glockd > GFS_GLOCKD_MAX) {
20803 +                               printk("GFS: 0 < num_glockd <= %u  (not %u)\n",
20804 +                                      GFS_GLOCKD_MAX, args->ar_num_glockd);
20805 +                               error = -EINVAL;
20806 +                               break;
20807 +                       }
20808 +               }
20809 +
20810 +               else if (!strcmp(x, "acl"))
20811 +                       args->ar_posixacls = TRUE;
20812 +
20813 +               /*  Unknown  */
20814 +
20815 +               else {
20816 +                       printk("GFS: unknown option: %s\n", x);
20817 +                       error = -EINVAL;
20818 +                       break;
20819 +               }
20820 +       }
20821 +
20822 +       if (error)
20823 +               printk("GFS: invalid mount option(s)\n");
20824 +
20825 +       if (do_free)
20826 +               kfree(data);
20827 +
20828 +       return error;
20829 +}
20830 +
20831 +/**
20832 + * gfs_proc_write - Read in some mount options
20833 + * @file: unused
20834 + * @buffer: a buffer of mount options
20835 + * @count: the length of the mount options
20836 + * @data: unused
20837 + *
20838 + * Called when someone writes to /proc/fs/gfs.
20839 + * It allows you to specify mount options when you can't do it
20840 + * from mount.  i.e. from a inital ramdisk
20841 + *
20842 + * Returns: 0 on success, -EXXX on failure
20843 + */
20844 +
20845 +int
20846 +gfs_proc_write(struct file *file,
20847 +              const char *buffer, unsigned long count,
20848 +              void *data)
20849 +{
20850 +       int error;
20851 +       char *p;
20852 +
20853 +       if (!try_module_get(THIS_MODULE))
20854 +               return -EAGAIN; /* Huh!?! */
20855 +       down(&gfs_mount_args_lock);
20856 +
20857 +       if (gfs_mount_args) {
20858 +               kfree(gfs_mount_args);
20859 +               gfs_mount_args = NULL;
20860 +       }
20861 +
20862 +       if (!count) {
20863 +               error = 0;
20864 +               goto fail;
20865 +       }
20866 +
20867 +       gfs_mount_args = gmalloc(count + 1);
20868 +
20869 +       error = -EFAULT;
20870 +       if (copy_from_user(gfs_mount_args, buffer, count))
20871 +               goto fail_free;
20872 +
20873 +       gfs_mount_args[count] = 0;
20874 +
20875 +       /*  Get rid of extra newlines  */
20876 +
20877 +       for (p = gfs_mount_args; *p; p++)
20878 +               if (*p == '\n')
20879 +                       *p = 0;
20880 +
20881 +       up(&gfs_mount_args_lock);
20882 +       module_put(THIS_MODULE);
20883 +
20884 +       return count;
20885 +
20886 +      fail_free:
20887 +       kfree(gfs_mount_args);
20888 +       gfs_mount_args = NULL;
20889 +
20890 +      fail:
20891 +       up(&gfs_mount_args_lock);
20892 +       module_put(THIS_MODULE);
20893 +       return error;
20894 +}
20895 diff -urN linux-orig/fs/gfs/mount.h linux-patched/fs/gfs/mount.h
20896 --- linux-orig/fs/gfs/mount.h   1969-12-31 18:00:00.000000000 -0600
20897 +++ linux-patched/fs/gfs/mount.h        2004-06-30 13:27:49.349709738 -0500
20898 @@ -0,0 +1,27 @@
20899 +/******************************************************************************
20900 +*******************************************************************************
20901 +**
20902 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20903 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20904 +**
20905 +**  This copyrighted material is made available to anyone wishing to use,
20906 +**  modify, copy, or redistribute it subject to the terms and conditions
20907 +**  of the GNU General Public License v.2.
20908 +**
20909 +*******************************************************************************
20910 +******************************************************************************/
20911 +
20912 +#ifndef __MOUNT_DOT_H__
20913 +#define __MOUNT_DOT_H__
20914 +
20915 +int gfs_make_args(char *data, struct gfs_args *args);
20916 +
20917 +/*  Allow args to be passed to GFS when using an initial ram disk  */
20918 +
20919 +extern char *gfs_mount_args;
20920 +extern struct semaphore gfs_mount_args_lock;
20921 +
20922 +int gfs_proc_write(struct file *file, const char *buffer,
20923 +                  unsigned long count, void *data);
20924 +
20925 +#endif /* __MOUNT_DOT_H__ */
20926 diff -urN linux-orig/fs/gfs/ondisk.c linux-patched/fs/gfs/ondisk.c
20927 --- linux-orig/fs/gfs/ondisk.c  1969-12-31 18:00:00.000000000 -0600
20928 +++ linux-patched/fs/gfs/ondisk.c       2004-06-30 13:27:49.350709506 -0500
20929 @@ -0,0 +1,28 @@
20930 +/******************************************************************************
20931 +*******************************************************************************
20932 +**
20933 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20934 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20935 +**
20936 +**  This copyrighted material is made available to anyone wishing to use,
20937 +**  modify, copy, or redistribute it subject to the terms and conditions
20938 +**  of the GNU General Public License v.2.
20939 +**
20940 +*******************************************************************************
20941 +******************************************************************************/
20942 +
20943 +#include <linux/sched.h>
20944 +#include <linux/slab.h>
20945 +#include <linux/smp_lock.h>
20946 +#include <linux/spinlock.h>
20947 +#include <asm/semaphore.h>
20948 +#include <linux/completion.h>
20949 +#include <linux/buffer_head.h>
20950 +
20951 +#include "gfs.h"
20952 +
20953 +#define pv(struct, member, fmt) printk("  "#member" = "fmt"\n", struct->member);
20954 +
20955 +#define WANT_GFS_CONVERSION_FUNCTIONS
20956 +#include <linux/gfs_ondisk.h>
20957 +
20958 diff -urN linux-orig/fs/gfs/ops_address.c linux-patched/fs/gfs/ops_address.c
20959 --- linux-orig/fs/gfs/ops_address.c     1969-12-31 18:00:00.000000000 -0600
20960 +++ linux-patched/fs/gfs/ops_address.c  2004-06-30 13:27:49.350709506 -0500
20961 @@ -0,0 +1,476 @@
20962 +/******************************************************************************
20963 +*******************************************************************************
20964 +**
20965 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
20966 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
20967 +**
20968 +**  This copyrighted material is made available to anyone wishing to use,
20969 +**  modify, copy, or redistribute it subject to the terms and conditions
20970 +**  of the GNU General Public License v.2.
20971 +**
20972 +*******************************************************************************
20973 +******************************************************************************/
20974 +
20975 +#include <linux/sched.h>
20976 +#include <linux/slab.h>
20977 +#include <linux/smp_lock.h>
20978 +#include <linux/spinlock.h>
20979 +#include <asm/semaphore.h>
20980 +#include <linux/completion.h>
20981 +#include <linux/buffer_head.h>
20982 +#include <linux/pagemap.h>
20983 +
20984 +#include "gfs.h"
20985 +#include "bmap.h"
20986 +#include "dio.h"
20987 +#include "file.h"
20988 +#include "glock.h"
20989 +#include "inode.h"
20990 +#include "ops_address.h"
20991 +#include "page.h"
20992 +#include "quota.h"
20993 +#include "trans.h"
20994 +
20995 +/**
20996 + * get_block - Fills in a buffer head with details about a block
20997 + * @inode: The inode
20998 + * @lblock: The block number to look up
20999 + * @bh_result: The buffer head to return the result in
21000 + * @create: Non-zero if we may add block to the file
21001 + *
21002 + * Returns: errno
21003 + */
21004 +
21005 +static int
21006 +get_block(struct inode *inode, sector_t lblock,
21007 +         struct buffer_head *bh_result, int create)
21008 +{
21009 +       struct gfs_inode *ip = vn2ip(inode);
21010 +       int new = create;
21011 +       uint64_t dblock;
21012 +       int error;
21013 +
21014 +       error = gfs_block_map(ip, lblock, &new, &dblock, NULL);
21015 +       if (error)
21016 +               return error;
21017 +
21018 +       GFS_ASSERT_INODE(dblock || !create, ip,);
21019 +
21020 +       if (!dblock)
21021 +               return 0;
21022 +
21023 +       map_bh(bh_result, inode->i_sb, dblock);
21024 +       if (new)
21025 +               set_buffer_new(bh_result);
21026 +
21027 +       return 0;
21028 +}
21029 +
21030 +/**
21031 + * get_block_noalloc - Fills in a buffer head with details about a block
21032 + * @inode: The inode
21033 + * @lblock: The block number to look up
21034 + * @bh_result: The buffer head to return the result in
21035 + * @create: Non-zero if we may add block to the file
21036 + *
21037 + * Returns: errno
21038 + */
21039 +
21040 +static int
21041 +get_block_noalloc(struct inode *inode, sector_t lblock,
21042 +                 struct buffer_head *bh_result, int create)
21043 +{
21044 +       int error;
21045 +
21046 +       error = get_block(inode, lblock, bh_result, FALSE);
21047 +
21048 +       GFS_ASSERT_INODE(!create || buffer_mapped(bh_result),
21049 +                        vn2ip(inode),);
21050 +
21051 +       return error;
21052 +}
21053 +
21054 +/**
21055 + * get_blocks -
21056 + * @inode:
21057 + * @lblock:
21058 + * @max_blocks:
21059 + * @bh_result:
21060 + * @create:
21061 + *
21062 + * Returns: errno
21063 + */
21064 +
21065 +static int
21066 +get_blocks(struct inode *inode, sector_t lblock,
21067 +          unsigned long max_blocks,
21068 +          struct buffer_head *bh_result, int create)
21069 +{
21070 +       struct gfs_inode *ip = vn2ip(inode);
21071 +       int new = create;
21072 +       uint64_t dblock;
21073 +       uint32_t extlen;
21074 +       int error;
21075 +
21076 +       error = gfs_block_map(ip, lblock, &new, &dblock, &extlen);
21077 +       if (error)
21078 +               return error;
21079 +
21080 +       GFS_ASSERT_INODE(dblock || !create, ip,);
21081 +
21082 +       if (!dblock)
21083 +               return 0;
21084 +
21085 +       map_bh(bh_result, inode->i_sb, dblock);
21086 +       if (new)
21087 +               set_buffer_new(bh_result);
21088 +
21089 +       if (extlen > max_blocks)
21090 +               extlen = max_blocks;
21091 +       bh_result->b_size = extlen << inode->i_blkbits;
21092 +
21093 +       return 0;
21094 +}
21095 +
21096 +/**
21097 + * get_blocks_noalloc -
21098 + * @inode:
21099 + * @lblock:
21100 + * @max_blocks:
21101 + * @bh_result:
21102 + * @create:
21103 + *
21104 + * Returns: errno
21105 + */
21106 +
21107 +static int
21108 +get_blocks_noalloc(struct inode *inode, sector_t lblock,
21109 +                  unsigned long max_blocks,
21110 +                  struct buffer_head *bh_result, int create)
21111 +{
21112 +       int error;
21113 +
21114 +       error = get_blocks(inode, lblock, max_blocks, bh_result, FALSE);
21115 +
21116 +       GFS_ASSERT_INODE(!create || buffer_mapped(bh_result),
21117 +                        vn2ip(inode),);
21118 +
21119 +       return error;
21120 +}
21121 +
21122 +/**
21123 + * gfs_writepage - Write complete page
21124 + * @page: Page to write
21125 + *
21126 + * Returns: errno
21127 + */
21128 +
21129 +static int
21130 +gfs_writepage(struct page *page, struct writeback_control *wbc)
21131 +{
21132 +       struct gfs_inode *ip = vn2ip(page->mapping->host);
21133 +       int error;
21134 +
21135 +       atomic_inc(&ip->i_sbd->sd_ops_address);
21136 +
21137 +       GFS_ASSERT_INODE(gfs_glock_is_held_excl(ip->i_gl) &&
21138 +                        !gfs_is_stuffed(ip), ip,);
21139 +
21140 +       error = block_write_full_page(page, get_block_noalloc, wbc);
21141 +
21142 +       gfs_flush_meta_cache(ip);
21143 +
21144 +       if (error == -EIO)
21145 +               gfs_io_error_inode(ip);
21146 +
21147 +       return error;
21148 +}
21149 +
21150 +/**
21151 + * stuffed_readpage - Fill in a Linux page with stuffed file data
21152 + * @ip: the inode
21153 + * @page: the page
21154 + *
21155 + * Returns: errno
21156 + */
21157 +
21158 +static int
21159 +stuffed_readpage(struct gfs_inode *ip, struct page *page)
21160 +{
21161 +       struct buffer_head *dibh;
21162 +       void *kaddr;
21163 +       int error;
21164 +
21165 +       GFS_ASSERT_INODE(PageLocked(page), ip,);
21166 +
21167 +       error = gfs_get_inode_buffer(ip, &dibh);
21168 +       if (!error) {
21169 +               kaddr = kmap(page);
21170 +               memcpy((char *)kaddr,
21171 +                      dibh->b_data + sizeof(struct gfs_dinode),
21172 +                      ip->i_di.di_size);
21173 +               memset((char *)kaddr + ip->i_di.di_size,
21174 +                      0,
21175 +                      PAGE_CACHE_SIZE - ip->i_di.di_size);
21176 +               kunmap(page);
21177 +
21178 +               brelse(dibh);
21179 +
21180 +               SetPageUptodate(page);
21181 +       }
21182 +
21183 +       return error;
21184 +}
21185 +
21186 +/**
21187 + * readi_readpage - readpage that goes through gfs_internal_read()
21188 + * @page: The page to read
21189 + *
21190 + * Returns: errno
21191 + */
21192 +
21193 +static int
21194 +readi_readpage(struct page *page)
21195 +{
21196 +       struct gfs_inode *ip = vn2ip(page->mapping->host);
21197 +       void *kaddr;
21198 +       int ret;
21199 +
21200 +       kaddr = kmap(page);
21201 +
21202 +       ret = gfs_internal_read(ip, kaddr,
21203 +                               (uint64_t)page->index << PAGE_CACHE_SHIFT,
21204 +                               PAGE_CACHE_SIZE);
21205 +       if (ret >= 0) {
21206 +               if (ret < PAGE_CACHE_SIZE)
21207 +                       memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
21208 +               SetPageUptodate(page);
21209 +               ret = 0;
21210 +       }
21211 +
21212 +       kunmap(page);
21213 +
21214 +       unlock_page(page);
21215 +
21216 +       return ret;
21217 +}
21218 +
21219 +/**
21220 + * gfs_readpage - readpage with locking
21221 + * @file: The file to read a page for
21222 + * @page: The page to read
21223 + *
21224 + * Returns: errno
21225 + */
21226 +
21227 +static int
21228 +gfs_readpage(struct file *file, struct page *page)
21229 +{
21230 +       struct gfs_inode *ip = vn2ip(page->mapping->host);
21231 +       int error;
21232 +
21233 +       atomic_inc(&ip->i_sbd->sd_ops_address);
21234 +
21235 +       if (!gfs_glock_is_locked_by_me(ip->i_gl)) {
21236 +               unlock_page(page);
21237 +               bitch_about(ip->i_sbd, &ip->i_sbd->sd_last_unlocked_aop,
21238 +                           "unlocked readpage request");
21239 +               return -ENOSYS;
21240 +       }
21241 +
21242 +       if (!gfs_is_jdata(ip)) {
21243 +               if (gfs_is_stuffed(ip) && !page->index) {
21244 +                       error = stuffed_readpage(ip, page);
21245 +                       unlock_page(page);
21246 +               } else
21247 +                       error = block_read_full_page(page, get_block);
21248 +       } else
21249 +               error = readi_readpage(page);
21250 +
21251 +       if (error == -EIO)
21252 +               gfs_io_error_inode(ip);
21253 +
21254 +       return error;
21255 +}
21256 +
21257 +/**
21258 + * gfs_prepare_write - Prepare to write to a file
21259 + * @file: The file to write to
21260 + * @page: The page which is to be prepared for writing
21261 + * @from: From (byte range within page)
21262 + * @to: To (byte range within page)
21263 + *
21264 + * Returns: errno
21265 + */
21266 +
21267 +static int
21268 +gfs_prepare_write(struct file *file, struct page *page,
21269 +                 unsigned from, unsigned to)
21270 +{
21271 +       struct gfs_inode *ip = vn2ip(page->mapping->host);
21272 +       struct gfs_sbd *sdp = ip->i_sbd;
21273 +       int error = 0;
21274 +
21275 +       atomic_inc(&sdp->sd_ops_address);
21276 +
21277 +       if (!gfs_glock_is_locked_by_me(ip->i_gl)) {
21278 +               bitch_about(sdp, &sdp->sd_last_unlocked_aop,
21279 +                           "unlocked prepare_write request");
21280 +               return -ENOSYS;
21281 +       }
21282 +
21283 +       if (gfs_is_stuffed(ip)) {
21284 +               uint64_t file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
21285 +
21286 +               if (file_size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) {
21287 +                       error = gfs_unstuff_dinode(ip, gfs_unstuffer_page, page);
21288 +                       if (!error)
21289 +                               error = block_prepare_write(page, from, to, get_block);
21290 +               } else if (!PageUptodate(page))
21291 +                       error = stuffed_readpage(ip, page);
21292 +       } else
21293 +               error = block_prepare_write(page, from, to, get_block);
21294 +
21295 +       if (error == -EIO)
21296 +               gfs_io_error_inode(ip);
21297 +
21298 +       return error;
21299 +}
21300 +
21301 +/**
21302 + * gfs_commit_write - Commit write to a file
21303 + * @file: The file to write to
21304 + * @page: The page containing the data
21305 + * @from: From (byte range within page)
21306 + * @to: To (byte range within page)
21307 + *
21308 + * Returns: errno
21309 + */
21310 +
21311 +static int
21312 +gfs_commit_write(struct file *file, struct page *page,
21313 +                unsigned from, unsigned to)
21314 +{
21315 +       struct inode *inode = page->mapping->host;
21316 +       struct gfs_inode *ip = vn2ip(inode);
21317 +       struct gfs_sbd *sdp = ip->i_sbd;
21318 +       int error;
21319 +
21320 +       atomic_inc(&sdp->sd_ops_address);
21321 +
21322 +       if (gfs_is_stuffed(ip)) {
21323 +               struct buffer_head *dibh;
21324 +               uint64_t file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
21325 +               void *kaddr;
21326 +
21327 +               GFS_ASSERT_INODE(PageLocked(page), ip,);
21328 +
21329 +               error = gfs_get_inode_buffer(ip, &dibh);
21330 +               if (error)
21331 +                       goto fail;
21332 +
21333 +               gfs_trans_add_bh(ip->i_gl, dibh);
21334 +
21335 +               kaddr = kmap(page);
21336 +               memcpy(dibh->b_data + sizeof(struct gfs_dinode) + from,
21337 +                      (char *)kaddr + from,
21338 +                      to - from);
21339 +               kunmap(page);
21340 +
21341 +               brelse(dibh);
21342 +
21343 +               SetPageUptodate(page);
21344 +
21345 +               if (inode->i_size < file_size)
21346 +                       i_size_write(inode, file_size);
21347 +       } else {
21348 +               error = generic_commit_write(file, page, from, to);
21349 +               if (error)
21350 +                       goto fail;
21351 +       }
21352 +
21353 +       return 0;
21354 +
21355 + fail:
21356 +       ClearPageUptodate(page);
21357 +
21358 +       return error;
21359 +}
21360 +
21361 +/**
21362 + * gfs_bmap - Block map function
21363 + * @mapping: Address space info
21364 + * @lblock: The block to map
21365 + *
21366 + * Returns: The disk address for the block or 0 on hole or error
21367 + */
21368 +
21369 +static sector_t
21370 +gfs_bmap(struct address_space *mapping, sector_t lblock)
21371 +{
21372 +       struct gfs_inode *ip = vn2ip(mapping->host);
21373 +       struct gfs_holder i_gh;
21374 +       int dblock = 0;
21375 +       int error;
21376 +
21377 +       atomic_inc(&ip->i_sbd->sd_ops_address);
21378 +
21379 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
21380 +       if (error)
21381 +               return 0;
21382 +
21383 +       if (!gfs_is_stuffed(ip))
21384 +               dblock = generic_block_bmap(mapping, lblock, get_block);
21385 +
21386 +       gfs_glock_dq_uninit(&i_gh);
21387 +
21388 +       return dblock;
21389 +}
21390 +
21391 +/**
21392 + * gfs_direct_IO -
21393 + * @rw:
21394 + * @iocb:
21395 + * @iov:
21396 + * @offset:
21397 + * @nr_segs:
21398 + *
21399 + * Returns: errno
21400 + */
21401 +
21402 +static int
21403 +gfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
21404 +             loff_t offset, unsigned long nr_segs)
21405 +{
21406 +       struct file *file = iocb->ki_filp;
21407 +       struct inode *inode = file->f_mapping->host;
21408 +       struct gfs_inode *ip = vn2ip(inode);
21409 +       get_blocks_t *gb = get_blocks;
21410 +       int error;
21411 +
21412 +       atomic_inc(&ip->i_sbd->sd_ops_address);
21413 +
21414 +       GFS_ASSERT_INODE(gfs_glock_is_locked_by_me(ip->i_gl), ip,);
21415 +       GFS_ASSERT_INODE(!gfs_is_stuffed(ip), ip,);
21416 +
21417 +       if (rw == WRITE && !current_transaction)
21418 +               gb = get_blocks_noalloc;
21419 +
21420 +       error = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
21421 +                                  offset, nr_segs, gb, NULL);
21422 +
21423 +       if (error == -EIO)
21424 +               gfs_io_error_inode(ip);
21425 +
21426 +       return error;
21427 +}
21428 +
21429 +struct address_space_operations gfs_file_aops = {
21430 +       .writepage = gfs_writepage,
21431 +       .readpage = gfs_readpage,
21432 +       .sync_page = block_sync_page,
21433 +       .prepare_write = gfs_prepare_write,
21434 +       .commit_write = gfs_commit_write,
21435 +       .bmap = gfs_bmap,
21436 +       .direct_IO = gfs_direct_IO,
21437 +};
21438 diff -urN linux-orig/fs/gfs/ops_address.h linux-patched/fs/gfs/ops_address.h
21439 --- linux-orig/fs/gfs/ops_address.h     1969-12-31 18:00:00.000000000 -0600
21440 +++ linux-patched/fs/gfs/ops_address.h  2004-06-30 13:27:49.350709506 -0500
21441 @@ -0,0 +1,19 @@
21442 +/******************************************************************************
21443 +*******************************************************************************
21444 +**
21445 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
21446 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
21447 +**
21448 +**  This copyrighted material is made available to anyone wishing to use,
21449 +**  modify, copy, or redistribute it subject to the terms and conditions
21450 +**  of the GNU General Public License v.2.
21451 +**
21452 +*******************************************************************************
21453 +******************************************************************************/
21454 +
21455 +#ifndef __OPS_ADDRESS_DOT_H__
21456 +#define __OPS_ADDRESS_DOT_H__
21457 +
21458 +extern struct address_space_operations gfs_file_aops;
21459 +
21460 +#endif /* __OPS_ADDRESS_DOT_H__ */
21461 diff -urN linux-orig/fs/gfs/ops_dentry.c linux-patched/fs/gfs/ops_dentry.c
21462 --- linux-orig/fs/gfs/ops_dentry.c      1969-12-31 18:00:00.000000000 -0600
21463 +++ linux-patched/fs/gfs/ops_dentry.c   2004-06-30 13:27:49.350709506 -0500
21464 @@ -0,0 +1,124 @@
21465 +/******************************************************************************
21466 +*******************************************************************************
21467 +**
21468 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
21469 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
21470 +**
21471 +**  This copyrighted material is made available to anyone wishing to use,
21472 +**  modify, copy, or redistribute it subject to the terms and conditions
21473 +**  of the GNU General Public License v.2.
21474 +**
21475 +*******************************************************************************
21476 +******************************************************************************/
21477 +
21478 +#include <linux/sched.h>
21479 +#include <linux/slab.h>
21480 +#include <linux/smp_lock.h>
21481 +#include <linux/spinlock.h>
21482 +#include <asm/semaphore.h>
21483 +#include <linux/completion.h>
21484 +#include <linux/buffer_head.h>
21485 +
21486 +#include "gfs.h"
21487 +#include "dir.h"
21488 +#include "glock.h"
21489 +#include "ops_dentry.h"
21490 +
21491 +/**
21492 + * gfs_drevalidate - Check directory lookup consistency
21493 + * @dentry: the mapping to check
21494 + * @nd:
21495 + *
21496 + * Check to make sure the lookup necessary to arrive at this inode from its
21497 + * parent is still good.
21498 + *
21499 + * Returns: 1 if the dentry is ok, 0 if it isn't
21500 + */
21501 +
21502 +static int
21503 +gfs_drevalidate(struct dentry *dentry, struct nameidata *nd)
21504 +{
21505 +       struct dentry *parent = dget_parent(dentry);
21506 +       struct gfs_inode *dip;
21507 +       struct inode *inode;
21508 +       struct gfs_holder d_gh;
21509 +       struct gfs_inode *ip;
21510 +       struct gfs_inum inum;
21511 +       unsigned int type;
21512 +       int error;
21513 +
21514 +       lock_kernel();
21515 +
21516 +       dip = vn2ip(parent->d_inode);
21517 +       GFS_ASSERT(dip,);
21518 +
21519 +       atomic_inc(&dip->i_sbd->sd_ops_dentry);
21520 +
21521 +       if (dip->i_sbd->sd_args.ar_localcaching)
21522 +               goto valid;
21523 +
21524 +       inode = dentry->d_inode;
21525 +       if (inode && is_bad_inode(inode))
21526 +               goto invalid;
21527 +
21528 +       error = gfs_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
21529 +       if (error)
21530 +               goto fail;
21531 +
21532 +       error = gfs_dir_search(dip, &dentry->d_name, &inum, &type);
21533 +       switch (error) {
21534 +       case 0:
21535 +               if (!inode)
21536 +                       goto invalid_gunlock;
21537 +               break;
21538 +       case -ENOENT:
21539 +               if (!inode)
21540 +                       goto valid_gunlock;
21541 +               goto invalid_gunlock;
21542 +       default:
21543 +               goto fail_gunlock;
21544 +       }
21545 +
21546 +       ip = vn2ip(inode);
21547 +       GFS_ASSERT_SBD(ip, dip->i_sbd,);
21548 +
21549 +       if (ip->i_num.no_formal_ino != inum.no_formal_ino)
21550 +               goto invalid_gunlock;
21551 +
21552 +       GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
21553 +
21554 + valid_gunlock:
21555 +       gfs_glock_dq_uninit(&d_gh);
21556 +
21557 + valid:
21558 +       unlock_kernel();
21559 +       dput(parent);
21560 +       return 1;
21561 +
21562 + invalid_gunlock:
21563 +       gfs_glock_dq_uninit(&d_gh);
21564 +
21565 + invalid:
21566 +       if (inode && S_ISDIR(inode->i_mode)) {
21567 +               if (have_submounts(dentry))
21568 +                       goto valid;
21569 +               shrink_dcache_parent(dentry);
21570 +       }
21571 +       d_drop(dentry);
21572 +
21573 +       unlock_kernel();
21574 +       dput(parent);
21575 +       return 0;
21576 +
21577 + fail_gunlock:
21578 +       gfs_glock_dq_uninit(&d_gh);
21579 +
21580 + fail:
21581 +       unlock_kernel();
21582 +       dput(parent);
21583 +       return 0;
21584 +}
21585 +
21586 +struct dentry_operations gfs_dops = {
21587 +       .d_revalidate = gfs_drevalidate,
21588 +};
21589 diff -urN linux-orig/fs/gfs/ops_dentry.h linux-patched/fs/gfs/ops_dentry.h
21590 --- linux-orig/fs/gfs/ops_dentry.h      1969-12-31 18:00:00.000000000 -0600
21591 +++ linux-patched/fs/gfs/ops_dentry.h   2004-06-30 13:27:49.351709274 -0500
21592 @@ -0,0 +1,19 @@
21593 +/******************************************************************************
21594 +*******************************************************************************
21595 +**
21596 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
21597 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
21598 +**
21599 +**  This copyrighted material is made available to anyone wishing to use,
21600 +**  modify, copy, or redistribute it subject to the terms and conditions
21601 +**  of the GNU General Public License v.2.
21602 +**
21603 +*******************************************************************************
21604 +******************************************************************************/
21605 +
21606 +#ifndef __OPS_DENTRY_DOT_H__
21607 +#define __OPS_DENTRY_DOT_H__
21608 +
21609 +extern struct dentry_operations gfs_dops;
21610 +
21611 +#endif /* __OPS_DENTRY_DOT_H__ */
21612 diff -urN linux-orig/fs/gfs/ops_export.c linux-patched/fs/gfs/ops_export.c
21613 --- linux-orig/fs/gfs/ops_export.c      1969-12-31 18:00:00.000000000 -0600
21614 +++ linux-patched/fs/gfs/ops_export.c   2004-06-30 13:27:49.351709274 -0500
21615 @@ -0,0 +1,415 @@
21616 +/******************************************************************************
21617 +*******************************************************************************
21618 +**
21619 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
21620 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
21621 +**
21622 +**  This copyrighted material is made available to anyone wishing to use,
21623 +**  modify, copy, or redistribute it subject to the terms and conditions
21624 +**  of the GNU General Public License v.2.
21625 +**
21626 +*******************************************************************************
21627 +******************************************************************************/
21628 +
21629 +#include <linux/sched.h>
21630 +#include <linux/slab.h>
21631 +#include <linux/smp_lock.h>
21632 +#include <linux/spinlock.h>
21633 +#include <asm/semaphore.h>
21634 +#include <linux/completion.h>
21635 +#include <linux/buffer_head.h>
21636 +
21637 +#include "gfs.h"
21638 +#include "dio.h"
21639 +#include "dir.h"
21640 +#include "glock.h"
21641 +#include "glops.h"
21642 +#include "inode.h"
21643 +#include "ops_export.h"
21644 +#include "rgrp.h"
21645 +
21646 +struct inode_cookie
21647 +{
21648 +       uint64_t formal_ino;
21649 +       uint32_t gen;
21650 +       int gen_valid;
21651 +};
21652 +
21653 +struct get_name_filldir
21654 +{
21655 +       uint64_t formal_ino;
21656 +       char *name;
21657 +};
21658 +
21659 +/**
21660 + * gfs_decode_fh -
21661 + * @param1: description
21662 + * @param2: description
21663 + * @param3: description
21664 + *
21665 + * Function description
21666 + *
21667 + * Returns: what is returned
21668 + */
21669 +
21670 +struct dentry *
21671 +gfs_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, int fh_type,
21672 +             int (*acceptable)(void *context, struct dentry *dentry),
21673 +             void *context)
21674 +{
21675 +       struct inode_cookie this, parent;
21676 +
21677 +       atomic_inc(&vfs2sdp(sb)->sd_ops_export);
21678 +
21679 +       if (fh_type != fh_len)
21680 +               return NULL;
21681 +
21682 +       memset(&parent, 0, sizeof(struct inode_cookie));
21683 +
21684 +       switch (fh_type) {
21685 +       case 6:
21686 +               parent.gen_valid = TRUE;
21687 +               parent.gen = fh[5];
21688 +       case 5:
21689 +               parent.formal_ino = ((uint64_t)gfs32_to_cpu(fh[3])) << 32;
21690 +               parent.formal_ino |= (uint64_t)gfs32_to_cpu(fh[4]);
21691 +       case 3:
21692 +               this.gen_valid = TRUE;
21693 +               this.gen = gfs32_to_cpu(fh[2]);
21694 +               this.formal_ino = ((uint64_t)gfs32_to_cpu(fh[0])) << 32;
21695 +               this.formal_ino |= (uint64_t)gfs32_to_cpu(fh[1]);
21696 +               break;
21697 +       default:
21698 +               return NULL;
21699 +       }
21700 +
21701 +       return gfs_export_ops.find_exported_dentry(sb, &this, &parent,
21702 +                                                  acceptable, context);
21703 +}
21704 +
21705 +/**
21706 + * gfs_encode_fh -
21707 + * @param1: description
21708 + * @param2: description
21709 + * @param3: description
21710 + *
21711 + * Function description
21712 + *
21713 + * Returns: what is returned
21714 + */
21715 +
21716 +int
21717 +gfs_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
21718 +             int connectable)
21719 +{
21720 +       struct inode *inode = dentry->d_inode;
21721 +       struct gfs_inode *ip = vn2ip(inode);
21722 +       int maxlen = *len;
21723 +
21724 +       atomic_inc(&ip->i_sbd->sd_ops_export);
21725 +
21726 +       if (maxlen < 3)
21727 +               return 255;
21728 +
21729 +       fh[0] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino >> 32));
21730 +       fh[1] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino & 0xFFFFFFFF));
21731 +       fh[2] = cpu_to_gfs32(inode->i_generation);
21732 +       *len = 3;
21733 +
21734 +       if (maxlen < 5 || !connectable)
21735 +               return 3;
21736 +
21737 +       spin_lock(&dentry->d_lock);
21738 +
21739 +       inode = dentry->d_parent->d_inode;
21740 +       ip = vn2ip(inode);
21741 +
21742 +       fh[3] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino >> 32));
21743 +       fh[4] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino & 0xFFFFFFFF));
21744 +       *len = 5;
21745 +
21746 +       if (maxlen < 6) {
21747 +               spin_unlock(&dentry->d_lock);
21748 +               return 5;
21749 +       }
21750 +
21751 +       fh[5] = cpu_to_gfs32(inode->i_generation);
21752 +
21753 +       spin_unlock(&dentry->d_lock);
21754 +
21755 +       *len = 6;
21756 +
21757 +       return 6;
21758 +}
21759 +
21760 +/**
21761 + * get_name_filldir -
21762 + * @param1: description
21763 + * @param2: description
21764 + * @param3: description
21765 + *
21766 + * Function description
21767 + *
21768 + * Returns: what is returned
21769 + */
21770 +
21771 +static int
21772 +get_name_filldir(void *opaque,
21773 +                const char *name, unsigned int length,
21774 +                uint64_t offset,
21775 +                struct gfs_inum *inum, unsigned int type)
21776 +{
21777 +       struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
21778 +
21779 +       if (inum->no_formal_ino != gnfd->formal_ino)
21780 +               return 0;
21781 +
21782 +       memcpy(gnfd->name, name, length);
21783 +       gnfd->name[length] = 0;
21784 +
21785 +       return 1;
21786 +}
21787 +
21788 +/**
21789 + * gfs_get_name -
21790 + * @param1: description
21791 + * @param2: description
21792 + * @param3: description
21793 + *
21794 + * Function description
21795 + *
21796 + * Returns: what is returned
21797 + */
21798 +
21799 +int gfs_get_name(struct dentry *parent, char *name,
21800 +                struct dentry *child)
21801 +{
21802 +       struct inode *dir = parent->d_inode;
21803 +       struct inode *inode = child->d_inode;
21804 +       struct gfs_inode *dip, *ip;
21805 +       struct get_name_filldir gnfd;
21806 +       struct gfs_holder gh;
21807 +       uint64_t offset = 0;
21808 +       int error;
21809 +
21810 +       if (!dir)
21811 +               return -EINVAL;
21812 +
21813 +       atomic_inc(&vfs2sdp(dir->i_sb)->sd_ops_export);
21814 +
21815 +       if (!S_ISDIR(dir->i_mode) || !inode)
21816 +               return -EINVAL;
21817 +
21818 +       dip = vn2ip(dir);
21819 +       ip = vn2ip(inode);
21820 +
21821 +       *name = 0;
21822 +       gnfd.formal_ino = ip->i_num.no_formal_ino;
21823 +       gnfd.name = name;
21824 +
21825 +       error = gfs_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
21826 +       if (error)
21827 +               return error;
21828 +
21829 +       error = gfs_dir_read(dip, &offset, &gnfd, get_name_filldir);
21830 +
21831 +       gfs_glock_dq_uninit(&gh);
21832 +
21833 +       if (!error & !*name)
21834 +               error = -ENOENT;
21835 +
21836 +       return error;
21837 +}
21838 +
21839 +/**
21840 + * gfs_get_parent -
21841 + * @param1: description
21842 + * @param2: description
21843 + * @param3: description
21844 + *
21845 + * Function description
21846 + *
21847 + * Returns: what is returned
21848 + */
21849 +
21850 +struct dentry *
21851 +gfs_get_parent(struct dentry *child)
21852 +{
21853 +       struct gfs_inode *dip = vn2ip(child->d_inode);
21854 +       struct gfs_holder d_gh, i_gh;
21855 +       struct qstr dotdot = { .name = "..", .len = 2 };
21856 +       struct gfs_inode *ip;
21857 +       struct inode *inode;
21858 +       struct dentry *dentry;
21859 +       int error;
21860 +
21861 +       atomic_inc(&dip->i_sbd->sd_ops_export);
21862 +
21863 +       gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
21864 +       error = gfs_lookupi(&d_gh, &dotdot, TRUE, &i_gh);
21865 +       if (error)
21866 +               goto fail;
21867 +
21868 +       error = -ENOENT;
21869 +       if (!i_gh.gh_gl)
21870 +               goto fail;
21871 +
21872 +       ip = gl2ip(i_gh.gh_gl);
21873 +
21874 +       gfs_glock_dq_uninit(&d_gh);
21875 +       gfs_glock_dq_uninit(&i_gh);
21876 +
21877 +       inode = gfs_iget(ip, CREATE);
21878 +       gfs_inode_put(ip);
21879 +
21880 +       if (!inode)
21881 +               return ERR_PTR(-ENOMEM);
21882 +
21883 +       dentry = d_alloc_anon(inode);
21884 +       if (!dentry) {
21885 +               iput(inode);
21886 +               return ERR_PTR(-ENOMEM);
21887 +       }
21888 +
21889 +       return dentry;
21890 +
21891 + fail:
21892 +       gfs_holder_uninit(&d_gh);
21893 +       return ERR_PTR(error);
21894 +}
21895 +
21896 +/**
21897 + * gfs_get_dentry -
21898 + * @param1: description
21899 + * @param2: description
21900 + * @param3: description
21901 + *
21902 + * Function description
21903 + *
21904 + * Returns: what is returned
21905 + */
21906 +
21907 +struct dentry *
21908 +gfs_get_dentry(struct super_block *sb, void *inump)
21909 +{
21910 +       struct gfs_sbd *sdp = vfs2sdp(sb);
21911 +       struct inode_cookie *cookie = (struct inode_cookie *)inump;
21912 +       struct gfs_inum inum;
21913 +       struct gfs_holder i_gh, ri_gh, rgd_gh;
21914 +       struct gfs_rgrpd *rgd;
21915 +       struct buffer_head *bh;
21916 +       struct gfs_dinode *di;
21917 +       struct gfs_inode *ip;
21918 +       struct inode *inode;
21919 +       struct dentry *dentry;
21920 +       int error;
21921 +
21922 +       atomic_inc(&sdp->sd_ops_export);
21923 +
21924 +       if (!cookie->formal_ino ||
21925 +           cookie->formal_ino == sdp->sd_jiinode->i_num.no_formal_ino ||
21926 +           cookie->formal_ino == sdp->sd_riinode->i_num.no_formal_ino ||
21927 +           cookie->formal_ino == sdp->sd_qinode->i_num.no_formal_ino ||
21928 +           cookie->formal_ino == sdp->sd_linode->i_num.no_formal_ino)
21929 +               return ERR_PTR(-EINVAL);
21930 +
21931 +       inum.no_formal_ino = cookie->formal_ino;
21932 +       inum.no_addr = cookie->formal_ino;
21933 +
21934 +       error = gfs_glock_nq_num(sdp,
21935 +                                inum.no_formal_ino, &gfs_inode_glops,
21936 +                                LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
21937 +                                &i_gh);
21938 +       if (error)
21939 +               return ERR_PTR(error);
21940 +
21941 +       error = gfs_inode_get(i_gh.gh_gl, &inum, NO_CREATE, &ip);
21942 +       if (error)
21943 +               goto fail;
21944 +       if (ip)
21945 +               goto out;
21946 +
21947 +       error = gfs_rindex_hold(sdp, &ri_gh);
21948 +       if (error)
21949 +               goto fail;
21950 +
21951 +       error = -EINVAL;
21952 +       rgd = gfs_blk2rgrpd(sdp, inum.no_addr);
21953 +       if (!rgd)
21954 +               goto fail_rindex;
21955 +
21956 +       error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
21957 +       if (error)
21958 +               goto fail_rindex;
21959 +
21960 +       error = -ESTALE;
21961 +       if (gfs_get_block_type(rgd, inum.no_addr) != GFS_BLKST_USEDMETA)
21962 +               goto fail_rgd;
21963 +
21964 +       error = gfs_dread(sdp, inum.no_addr, i_gh.gh_gl,
21965 +                         DIO_START | DIO_WAIT, &bh);
21966 +       if (error)
21967 +               goto fail_rgd;
21968 +
21969 +       di = (struct gfs_dinode *)bh->b_data;
21970 +
21971 +       error = -ESTALE;
21972 +       if (gfs32_to_cpu(di->di_header.mh_magic) != GFS_MAGIC ||
21973 +           gfs32_to_cpu(di->di_header.mh_type) != GFS_METATYPE_DI ||
21974 +           (gfs32_to_cpu(di->di_flags) & GFS_DIF_UNUSED))
21975 +               goto fail_relse;
21976 +
21977 +       brelse(bh);
21978 +       gfs_glock_dq_uninit(&rgd_gh);
21979 +       gfs_glock_dq_uninit(&ri_gh);
21980 +
21981 +       error = gfs_inode_get(i_gh.gh_gl, &inum, CREATE, &ip);
21982 +       if (error)
21983 +               goto fail;
21984 +
21985 +       atomic_inc(&sdp->sd_fh2dentry_misses);
21986 +
21987 + out:
21988 +       gfs_glock_dq_uninit(&i_gh);
21989 +
21990 +       inode = gfs_iget(ip, CREATE);
21991 +       gfs_inode_put(ip);
21992 +
21993 +       if (!inode)
21994 +               return ERR_PTR(-ENOMEM);
21995 +
21996 +       if (cookie->gen_valid && cookie->gen != inode->i_generation) {
21997 +               iput(inode);
21998 +               return ERR_PTR(-ESTALE);
21999 +       }
22000 +
22001 +       dentry = d_alloc_anon(inode);
22002 +       if (!dentry) {
22003 +               iput(inode);
22004 +               return ERR_PTR(-ENOMEM);
22005 +       }
22006 +
22007 +       return dentry;
22008 +
22009 + fail_relse:
22010 +        brelse(bh);
22011 +
22012 + fail_rgd:
22013 +       gfs_glock_dq_uninit(&rgd_gh);
22014 +
22015 + fail_rindex:
22016 +       gfs_glock_dq_uninit(&ri_gh);
22017 +
22018 + fail:
22019 +       gfs_glock_dq_uninit(&i_gh);
22020 +       return ERR_PTR(error);
22021 +}
22022 +
22023 +struct export_operations gfs_export_ops = {
22024 +       .decode_fh = gfs_decode_fh,
22025 +       .encode_fh = gfs_encode_fh,
22026 +       .get_name = gfs_get_name,
22027 +       .get_parent = gfs_get_parent,
22028 +       .get_dentry = gfs_get_dentry,
22029 +};
22030 +
22031 diff -urN linux-orig/fs/gfs/ops_export.h linux-patched/fs/gfs/ops_export.h
22032 --- linux-orig/fs/gfs/ops_export.h      1969-12-31 18:00:00.000000000 -0600
22033 +++ linux-patched/fs/gfs/ops_export.h   2004-06-30 13:27:49.351709274 -0500
22034 @@ -0,0 +1,19 @@
22035 +/******************************************************************************
22036 +*******************************************************************************
22037 +**
22038 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
22039 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
22040 +**
22041 +**  This copyrighted material is made available to anyone wishing to use,
22042 +**  modify, copy, or redistribute it subject to the terms and conditions
22043 +**  of the GNU General Public License v.2.
22044 +**
22045 +*******************************************************************************
22046 +******************************************************************************/
22047 +
22048 +#ifndef __OPS_EXPORT_DOT_H__
22049 +#define __OPS_EXPORT_DOT_H__
22050 +
22051 +extern struct export_operations gfs_export_ops;
22052 +
22053 +#endif /* __OPS_EXPORT_DOT_H__ */
22054 diff -urN linux-orig/fs/gfs/ops_file.c linux-patched/fs/gfs/ops_file.c
22055 --- linux-orig/fs/gfs/ops_file.c        1969-12-31 18:00:00.000000000 -0600
22056 +++ linux-patched/fs/gfs/ops_file.c     2004-06-30 13:27:49.352709042 -0500
22057 @@ -0,0 +1,1552 @@
22058 +/******************************************************************************
22059 +*******************************************************************************
22060 +**
22061 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
22062 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
22063 +**
22064 +**  This copyrighted material is made available to anyone wishing to use,
22065 +**  modify, copy, or redistribute it subject to the terms and conditions
22066 +**  of the GNU General Public License v.2.
22067 +**
22068 +*******************************************************************************
22069 +******************************************************************************/
22070 +
22071 +#include <linux/sched.h>
22072 +#include <linux/slab.h>
22073 +#include <linux/smp_lock.h>
22074 +#include <linux/spinlock.h>
22075 +#include <asm/semaphore.h>
22076 +#include <linux/completion.h>
22077 +#include <linux/buffer_head.h>
22078 +#include <asm/uaccess.h>
22079 +#include <linux/pagemap.h>
22080 +#include <linux/uio.h>
22081 +#include <linux/blkdev.h>
22082 +#include <linux/mm.h>
22083 +
22084 +#include "gfs.h"
22085 +#include "bmap.h"
22086 +#include "dio.h"
22087 +#include "dir.h"
22088 +#include "file.h"
22089 +#include "flock.h"
22090 +#include "glock.h"
22091 +#include "glops.h"
22092 +#include "inode.h"
22093 +#include "ioctl.h"
22094 +#include "log.h"
22095 +#include "ops_file.h"
22096 +#include "ops_vm.h"
22097 +#include "quota.h"
22098 +#include "rgrp.h"
22099 +#include "trans.h"
22100 +
22101 +struct filldir_bad_entry {
22102 +       char *fbe_name;
22103 +       unsigned int fbe_length;
22104 +       uint64_t fbe_offset;
22105 +       struct gfs_inum fbe_inum;
22106 +       unsigned int fbe_type;
22107 +};
22108 +
22109 +struct filldir_bad {
22110 +       struct gfs_sbd *fdb_sbd;
22111 +       int fdb_prefetch;
22112 +
22113 +       struct filldir_bad_entry *fdb_entry;
22114 +       unsigned int fdb_entry_num;
22115 +       unsigned int fdb_entry_off;
22116 +
22117 +       char *fdb_name;
22118 +       unsigned int fdb_name_size;
22119 +       unsigned int fdb_name_off;
22120 +};
22121 +
22122 +struct filldir_reg {
22123 +       struct gfs_sbd *fdr_sbd;
22124 +       int fdr_prefetch;
22125 +
22126 +       filldir_t fdr_filldir;
22127 +       void *fdr_opaque;
22128 +};
22129 +
22130 +typedef ssize_t(*do_rw_t) (struct file * file,
22131 +                          char *buf,
22132 +                          size_t size, loff_t * offset,
22133 +                          unsigned int num_gh, struct gfs_holder * ghs);
22134 +
22135 +/**
22136 + * gfs_llseek - seek to a location in a file
22137 + * @file: the file
22138 + * @offset: the offset
22139 + * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
22140 + *
22141 + * SEEK_END requires the glock for the file because it references the
22142 + * file's size.
22143 + *
22144 + * Returns: The new offset, or -EXXX on error
22145 + */
22146 +
22147 +static loff_t
22148 +gfs_llseek(struct file *file, loff_t offset, int origin)
22149 +{
22150 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22151 +       struct gfs_holder i_gh;
22152 +       loff_t error;
22153 +
22154 +       atomic_inc(&ip->i_sbd->sd_ops_file);
22155 +
22156 +       if (origin == 2) {
22157 +               error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
22158 +               if (!error) {
22159 +                       error = remote_llseek(file, offset, origin);
22160 +                       gfs_glock_dq_uninit(&i_gh);
22161 +               }
22162 +       } else
22163 +               error = remote_llseek(file, offset, origin);
22164 +
22165 +       return error;
22166 +}
22167 +
22168 +#define vma2state(vma) \
22169 +((((vma)->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) == \
22170 + (VM_MAYWRITE | VM_MAYSHARE)) ? \
22171 + LM_ST_EXCLUSIVE : LM_ST_SHARED) \
22172 +
22173 +/**
22174 + * functionname - summary
22175 + * @param1: description
22176 + * @param2: description
22177 + * @param3: description
22178 + *
22179 + * Function description
22180 + *
22181 + * Returns: what is returned
22182 + */
22183 +
22184 +static ssize_t
22185 +walk_vm_hard(struct file *file, char *buf, size_t size, loff_t *offset,
22186 +            do_rw_t operation)
22187 +{
22188 +       struct gfs_holder *ghs;
22189 +       unsigned int num_gh = 0;
22190 +       ssize_t count;
22191 +
22192 +       {
22193 +               struct super_block *sb = file->f_dentry->d_inode->i_sb;
22194 +               struct mm_struct *mm = current->mm;
22195 +               struct vm_area_struct *vma;
22196 +               unsigned long start = (unsigned long)buf;
22197 +               unsigned long end = start + size;
22198 +               int dumping = (current->flags & PF_DUMPCORE);
22199 +               unsigned int x = 0;
22200 +
22201 +               for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
22202 +                       if (end <= vma->vm_start)
22203 +                               break;
22204 +                       if (vma->vm_file &&
22205 +                           vma->vm_file->f_dentry->d_inode->i_sb == sb) {
22206 +                               num_gh++;
22207 +                       }
22208 +               }
22209 +
22210 +               ghs = kmalloc((num_gh + 1) * sizeof(struct gfs_holder), GFP_KERNEL);
22211 +               if (!ghs) {
22212 +                       if (!dumping)
22213 +                               up_read(&mm->mmap_sem);
22214 +                       return -ENOMEM;
22215 +               }
22216 +
22217 +               for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
22218 +                       if (end <= vma->vm_start)
22219 +                               break;
22220 +                       if (vma->vm_file) {
22221 +                               struct inode *inode = vma->vm_file->f_dentry->d_inode;
22222 +                               if (inode->i_sb == sb)
22223 +                                       gfs_holder_init(vn2ip(inode)->i_gl,
22224 +                                                       vma2state(vma),
22225 +                                                       0, &ghs[x++]);
22226 +                       }
22227 +               }
22228 +
22229 +               if (!dumping)
22230 +                       up_read(&mm->mmap_sem);
22231 +
22232 +               GFS_ASSERT_SBD(x == num_gh, vfs2sdp(sb),);
22233 +       }
22234 +
22235 +       count = operation(file, buf, size, offset, num_gh, ghs);
22236 +
22237 +       while (num_gh--)
22238 +               gfs_holder_uninit(&ghs[num_gh]);
22239 +       kfree(ghs);
22240 +
22241 +       return count;
22242 +}
22243 +
22244 +/**
22245 + * walk_vma - Walk the vmas associated with a buffer for read or write.
22246 + *    If any of them are gfs, pass the gfs inode down to the read/write
22247 + *    worker function so that locks can be acquired in the correct order.
22248 + * @file: The file to read/write from/to
22249 + * @buf: The buffer to copy to/from
22250 + * @size: The amount of data requested
22251 + * @offset: The current file offset
22252 + * @operation: The read or write worker function
22253 + *
22254 + * Outputs: Offset - updated according to number of bytes written
22255 + *
22256 + * Returns: The number of bytes written, -errno on failure
22257 + */
22258 +
22259 +static ssize_t
22260 +walk_vm(struct file *file, char *buf, size_t size, loff_t *offset,
22261 +       do_rw_t operation)
22262 +{
22263 +       if (current->mm) {
22264 +               struct super_block *sb = file->f_dentry->d_inode->i_sb;
22265 +               struct mm_struct *mm = current->mm;
22266 +               struct vm_area_struct *vma;
22267 +               unsigned long start = (unsigned long)buf;
22268 +               unsigned long end = start + size;
22269 +               int dumping = (current->flags & PF_DUMPCORE);
22270 +
22271 +               if (!dumping)
22272 +                       down_read(&mm->mmap_sem);
22273 +
22274 +               for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
22275 +                       if (end <= vma->vm_start)
22276 +                               break;
22277 +                       if (vma->vm_file &&
22278 +                           vma->vm_file->f_dentry->d_inode->i_sb == sb)
22279 +                               goto do_locks;
22280 +               }
22281 +
22282 +               if (!dumping)
22283 +                       up_read(&mm->mmap_sem);
22284 +       }
22285 +
22286 +       {
22287 +               struct gfs_holder gh;
22288 +               return operation(file, buf, size, offset, 0, &gh);
22289 +       }
22290 +
22291 + do_locks:
22292 +       return walk_vm_hard(file, buf, size, offset, operation);
22293 +}
22294 +
22295 +/**
22296 + * functionname - summary
22297 + * @param1: description
22298 + * @param2: description
22299 + * @param3: description
22300 + *
22301 + * Function description
22302 + *
22303 + * Returns: what is returned
22304 + */
22305 +
22306 +static ssize_t
22307 +do_read_readi(struct file *file, char *buf, size_t size, loff_t *offset)
22308 +{
22309 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22310 +       ssize_t count = 0;
22311 +
22312 +       if (*offset < 0)
22313 +               return -EINVAL;
22314 +       if (!access_ok(VERIFY_WRITE, buf, size))
22315 +               return -EFAULT;
22316 +
22317 +       if (!(file->f_flags & O_LARGEFILE)) {
22318 +               if (*offset >= 0x7FFFFFFFull)
22319 +                       return -EFBIG;
22320 +               if (*offset + size > 0x7FFFFFFFull)
22321 +                       size = 0x7FFFFFFFull - *offset;
22322 +       }
22323 +
22324 +       count = gfs_readi(ip, buf, *offset, size, gfs_copy2user);
22325 +
22326 +       if (count > 0)
22327 +               *offset += count;
22328 +
22329 +       return count;
22330 +}
22331 +
22332 +/**
22333 + * do_read_direct - Read bytes from a file
22334 + * @file: The file to read from
22335 + * @buf: The buffer to copy into
22336 + * @size: The amount of data requested
22337 + * @offset: The current file offset
22338 + * @num_gh: The number of other locks we need to do the read
22339 + * @ghs: the locks we need plus one for our lock
22340 + *
22341 + * Outputs: Offset - updated according to number of bytes read
22342 + *
22343 + * Returns: The number of bytes read, -EXXX on failure
22344 + */
22345 +
22346 +static ssize_t
22347 +do_read_direct(struct file *file, char *buf, size_t size, loff_t *offset,
22348 +              unsigned int num_gh, struct gfs_holder *ghs)
22349 +{
22350 +       struct inode *inode = file->f_mapping->host;
22351 +       struct gfs_inode *ip = vn2ip(inode);
22352 +       unsigned int state = LM_ST_DEFERRED;
22353 +       int flags = 0;
22354 +       unsigned int x;
22355 +       ssize_t count = 0;
22356 +       int error;
22357 +
22358 +       for (x = 0; x < num_gh; x++)
22359 +               if (ghs[x].gh_gl == ip->i_gl) {
22360 +                       state = LM_ST_SHARED;
22361 +                       flags |= GL_LOCAL_EXCL;
22362 +                       break;
22363 +               }
22364 +
22365 +       gfs_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
22366 +
22367 +       error = gfs_glock_nq_m(num_gh + 1, ghs);
22368 +       if (error)
22369 +               goto out;
22370 +
22371 +       error = -EINVAL;
22372 +       if (gfs_is_jdata(ip))
22373 +               goto out_gunlock;
22374 +
22375 +       if (gfs_is_stuffed(ip)) {
22376 +               size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
22377 +
22378 +               if (((*offset) & mask) || (((unsigned long)buf) & mask))
22379 +                       goto out_gunlock;
22380 +
22381 +               count = do_read_readi(file, buf, size & ~mask, offset);
22382 +       }
22383 +       else
22384 +               count = generic_file_read(file, buf, size, offset);
22385 +
22386 +       error = 0;
22387 +
22388 + out_gunlock:
22389 +       gfs_glock_dq_m(num_gh + 1, ghs);
22390 +
22391 + out:
22392 +       gfs_holder_uninit(&ghs[num_gh]);
22393 +
22394 +       return (count) ? count : error;
22395 +}
22396 +
22397 +/**
22398 + * do_read_buf - Read bytes from a file
22399 + * @file: The file to read from
22400 + * @buf: The buffer to copy into
22401 + * @size: The amount of data requested
22402 + * @offset: The current file offset
22403 + * @num_gh: The number of other locks we need to do the read
22404 + * @ghs: the locks we need plus one for our lock
22405 + *
22406 + * Outputs: Offset - updated according to number of bytes read
22407 + *
22408 + * Returns: The number of bytes read, -EXXX on failure
22409 + */
22410 +
22411 +static ssize_t
22412 +do_read_buf(struct file *file, char *buf, size_t size, loff_t *offset,
22413 +           unsigned int num_gh, struct gfs_holder *ghs)
22414 +{
22415 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22416 +       ssize_t count = 0;
22417 +       int error;
22418 +
22419 +       gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
22420 +
22421 +       error = gfs_glock_nq_m_atime(num_gh + 1, ghs);
22422 +       if (error)
22423 +               goto out;
22424 +
22425 +       if (gfs_is_jdata(ip) ||
22426 +           (gfs_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags)))
22427 +               count = do_read_readi(file, buf, size, offset);
22428 +       else
22429 +               count = generic_file_read(file, buf, size, offset);
22430 +
22431 +       gfs_glock_dq_m(num_gh + 1, ghs);
22432 +
22433 + out:
22434 +       gfs_holder_uninit(&ghs[num_gh]);
22435 +
22436 +       return (count) ? count : error;
22437 +}
22438 +
22439 +/**
22440 + * gfs_read - Read bytes from a file
22441 + * @file: The file to read from
22442 + * @buf: The buffer to copy into
22443 + * @size: The amount of data requested
22444 + * @offset: The current file offset
22445 + *
22446 + * Outputs: Offset - updated according to number of bytes read
22447 + *
22448 + * Returns: The number of bytes read, -EXXX on failure
22449 + */
22450 +
22451 +static ssize_t
22452 +gfs_read(struct file *file, char *buf, size_t size, loff_t *offset)
22453 +{
22454 +       atomic_inc(&vfs2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
22455 +
22456 +       if (file->f_flags & O_DIRECT)
22457 +               return walk_vm(file, buf, size, offset, do_read_direct);
22458 +       else
22459 +               return walk_vm(file, buf, size, offset, do_read_buf);
22460 +}
22461 +
22462 +/**
22463 + * grope_mapping - feel up a mapping that needs to be written
22464 + * @buf: the start of the memory to be written
22465 + * @size: the size of the memory to be written
22466 + *
22467 + * We do this after acquiring the locks on the mapping,
22468 + * but before starting the write transaction.  We need to make
22469 + * sure that we don't cause recursive transactions if blocks
22470 + * need to be allocated to the file backing the mapping.
22471 + *
22472 + * Returns:  0 on success, -EXXX on failure
22473 + */
22474 +
22475 +static int
22476 +grope_mapping(char *buf, size_t size)
22477 +{
22478 +       unsigned long start = (unsigned long)buf;
22479 +       unsigned long stop = start + size;
22480 +       char c;
22481 +
22482 +       while (start < stop) {
22483 +               if (copy_from_user(&c, (char *)start, 1))
22484 +                       return -EFAULT;
22485 +
22486 +               start += PAGE_CACHE_SIZE;
22487 +               start &= PAGE_CACHE_MASK;
22488 +       }
22489 +
22490 +       return 0;
22491 +}
22492 +
22493 +/**
22494 + * do_write_direct_alloc - Write bytes to a file
22495 + * @file: The file to write to
22496 + * @buf: The buffer to copy from
22497 + * @size: The amount of data requested
22498 + * @offset: The current file offset
22499 + *
22500 + * Outputs: Offset - updated according to number of bytes written
22501 + *
22502 + * Returns: The number of bytes written, -EXXX on failure
22503 + */
22504 +
22505 +static ssize_t
22506 +do_write_direct_alloc(struct file *file, char *buf, size_t size, loff_t *offset)
22507 +{
22508 +       struct inode *inode = file->f_mapping->host;
22509 +       struct gfs_inode *ip = vn2ip(inode);
22510 +       struct gfs_sbd *sdp = ip->i_sbd;
22511 +       struct gfs_alloc *al = NULL;
22512 +       struct iovec local_iov = { .iov_base = buf, .iov_len = size };
22513 +       struct buffer_head *dibh;
22514 +       unsigned int data_blocks, ind_blocks;
22515 +       ssize_t count;
22516 +       int error;
22517 +
22518 +       gfs_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
22519 +
22520 +       al = gfs_alloc_get(ip);
22521 +
22522 +       error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
22523 +       if (error)
22524 +               goto fail;
22525 +
22526 +       error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
22527 +       if (error)
22528 +               goto fail_gunlock_q;
22529 +
22530 +       al->al_requested_meta = ind_blocks;
22531 +       al->al_requested_data = data_blocks;
22532 +
22533 +       error = gfs_inplace_reserve(ip);
22534 +       if (error)
22535 +               goto fail_gunlock_q;
22536 +
22537 +       /* Trans may require:
22538 +          All blocks for a RG bitmap, whatever indirect blocks we
22539 +          need, a modified dinode, and a quota change. */
22540 +
22541 +       error = gfs_trans_begin(sdp,
22542 +                               1 + al->al_rgd->rd_ri.ri_length + ind_blocks,
22543 +                               1);
22544 +       if (error)
22545 +               goto fail_ipres;
22546 +
22547 +       if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
22548 +               error = gfs_get_inode_buffer(ip, &dibh);
22549 +               if (error)
22550 +                       goto fail_end_trans;
22551 +
22552 +               ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? (~(S_ISUID | S_ISGID)) : (~S_ISUID);
22553 +
22554 +               gfs_trans_add_bh(ip->i_gl, dibh);
22555 +               gfs_dinode_out(&ip->i_di, dibh->b_data);
22556 +               brelse(dibh);
22557 +       }
22558 +
22559 +       if (gfs_is_stuffed(ip)) {
22560 +               error = gfs_unstuff_dinode(ip, gfs_unstuffer_sync, NULL);
22561 +               if (error)
22562 +                       goto fail_end_trans;
22563 +       }
22564 +
22565 +       count = generic_file_write_nolock(file, &local_iov, 1, offset);
22566 +       if (count < 0) {
22567 +               error = count;
22568 +               goto fail_end_trans;
22569 +       }
22570 +
22571 +       error = gfs_get_inode_buffer(ip, &dibh);
22572 +       if (error)
22573 +               goto fail_end_trans;
22574 +
22575 +       if (ip->i_di.di_size < inode->i_size)
22576 +               ip->i_di.di_size = inode->i_size;
22577 +       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
22578 +
22579 +       gfs_trans_add_bh(ip->i_gl, dibh);
22580 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
22581 +       brelse(dibh);
22582 +
22583 +       gfs_trans_end(sdp);
22584 +
22585 +       if (file->f_flags & O_SYNC)
22586 +               gfs_log_flush_glock(ip->i_gl);
22587 +
22588 +       gfs_inplace_release(ip);
22589 +       gfs_quota_unlock_m(ip);
22590 +       gfs_alloc_put(ip);
22591 +
22592 +       return count;
22593 +
22594 + fail_end_trans:
22595 +       gfs_trans_end(sdp);
22596 +
22597 + fail_ipres:
22598 +       gfs_inplace_release(ip);
22599 +
22600 + fail_gunlock_q:
22601 +       gfs_quota_unlock_m(ip);
22602 +
22603 + fail:
22604 +       gfs_alloc_put(ip);
22605 +
22606 +       return error;
22607 +}
22608 +
22609 +/**
22610 + * do_write_direct - Write bytes to a file
22611 + * @file: The file to write to
22612 + * @buf: The buffer to copy from
22613 + * @size: The amount of data requested
22614 + * @offset: The current file offset
22615 + * @num_gh: The number of other locks we need to do the read
22616 + * @gh: the locks we need plus one for our lock
22617 + *
22618 + * Outputs: Offset - updated according to number of bytes written
22619 + *
22620 + * Returns: The number of bytes written, -EXXX on failure
22621 + */
22622 +
22623 +static ssize_t
22624 +do_write_direct(struct file *file, char *buf, size_t size, loff_t *offset,
22625 +               unsigned int num_gh, struct gfs_holder *ghs)
22626 +{
22627 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22628 +       struct gfs_sbd *sdp = ip->i_sbd;
22629 +       struct gfs_file *fp = vf2fp(file);
22630 +       unsigned int state = LM_ST_DEFERRED;
22631 +       int alloc_required;
22632 +       unsigned int x;
22633 +       size_t s;
22634 +       ssize_t count = 0;
22635 +       int error;
22636 +
22637 +       if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
22638 +               state = LM_ST_EXCLUSIVE;
22639 +       else
22640 +               for (x = 0; x < num_gh; x++)
22641 +                       if (ghs[x].gh_gl == ip->i_gl) {
22642 +                               state = LM_ST_EXCLUSIVE;
22643 +                               break;
22644 +                       }
22645 +
22646 + restart:
22647 +       gfs_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
22648 +
22649 +       error = gfs_glock_nq_m(num_gh + 1, ghs);
22650 +       if (error)
22651 +               goto out;
22652 +
22653 +       error = -EINVAL;
22654 +       if (gfs_is_jdata(ip))
22655 +               goto out_gunlock;
22656 +
22657 +       if (num_gh) {
22658 +               error = grope_mapping(buf, size);
22659 +               if (error)
22660 +                       goto out_gunlock;
22661 +       }
22662 +
22663 +       if (file->f_flags & O_APPEND)
22664 +               *offset = ip->i_di.di_size;
22665 +
22666 +       if (!(file->f_flags & O_LARGEFILE)) {
22667 +               error = -EFBIG;
22668 +               if (*offset >= 0x7FFFFFFFull)
22669 +                       goto out_gunlock;
22670 +               if (*offset + size > 0x7FFFFFFFull)
22671 +                       size = 0x7FFFFFFFull - *offset;
22672 +       }
22673 +
22674 +       if (gfs_is_stuffed(ip) ||
22675 +           *offset + size > ip->i_di.di_size ||
22676 +           ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
22677 +               alloc_required = TRUE;
22678 +       else {
22679 +               error = gfs_write_alloc_required(ip, *offset, size,
22680 +                                                &alloc_required);
22681 +               if (error)
22682 +                       goto out_gunlock;
22683 +       }
22684 +
22685 +       if (alloc_required && state != LM_ST_EXCLUSIVE) {
22686 +               gfs_glock_dq_m(num_gh + 1, ghs);
22687 +               gfs_holder_uninit(&ghs[num_gh]);
22688 +               state = LM_ST_EXCLUSIVE;
22689 +               goto restart;
22690 +       }
22691 +
22692 +       if (alloc_required) {
22693 +               set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
22694 +
22695 +               while (size) {
22696 +                       s = sdp->sd_tune.gt_max_atomic_write;
22697 +                       if (s > size)
22698 +                               s = size;
22699 +
22700 +                       error = do_write_direct_alloc(file, buf, s, offset);
22701 +                       if (error < 0)
22702 +                               goto out_gunlock;
22703 +
22704 +                       buf += error;
22705 +                       size -= error;
22706 +                       count += error;
22707 +               }
22708 +       } else {
22709 +               struct iovec local_iov = { .iov_base = buf, .iov_len = size };
22710 +               struct gfs_holder t_gh;
22711 +
22712 +               clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
22713 +
22714 +               error = gfs_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
22715 +               if (error)
22716 +                       goto out_gunlock;
22717 +
22718 +               count = generic_file_write_nolock(file, &local_iov, 1, offset);
22719 +
22720 +               gfs_glock_dq_uninit(&t_gh);
22721 +       }
22722 +
22723 +       error = 0;
22724 +
22725 +      out_gunlock:
22726 +       gfs_glock_dq_m(num_gh + 1, ghs);
22727 +
22728 +      out:
22729 +       gfs_holder_uninit(&ghs[num_gh]);
22730 +
22731 +       return (count) ? count : error;
22732 +}
22733 +
22734 +/**
22735 + * do_do_write_buf - Write bytes to a file
22736 + * @file: The file to write to
22737 + * @buf: The buffer to copy from
22738 + * @size: The amount of data requested
22739 + * @offset: The current file offset
22740 + *
22741 + * Outputs: Offset - updated according to number of bytes written
22742 + *
22743 + * Returns: The number of bytes written, -EXXX on failure
22744 + */
22745 +
22746 +static ssize_t
22747 +do_do_write_buf(struct file *file, char *buf, size_t size, loff_t *offset)
22748 +{
22749 +       struct inode *inode = file->f_mapping->host;
22750 +       struct gfs_inode *ip = vn2ip(inode);
22751 +       struct gfs_sbd *sdp = ip->i_sbd;
22752 +       struct gfs_alloc *al = NULL;
22753 +       struct buffer_head *dibh;
22754 +       unsigned int data_blocks, ind_blocks;
22755 +       int alloc_required, journaled;
22756 +       ssize_t count;
22757 +       int error;
22758 +
22759 +       journaled = gfs_is_jdata(ip);
22760 +
22761 +       gfs_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
22762 +
22763 +       error = gfs_write_alloc_required(ip, *offset, size, &alloc_required);
22764 +       if (error)
22765 +               return error;
22766 +
22767 +       if (alloc_required) {
22768 +               al = gfs_alloc_get(ip);
22769 +
22770 +               error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
22771 +               if (error)
22772 +                       goto fail;
22773 +
22774 +               error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
22775 +               if (error)
22776 +                       goto fail_gunlock_q;
22777 +
22778 +               if (journaled)
22779 +                       al->al_requested_meta = ind_blocks + data_blocks;
22780 +               else {
22781 +                       al->al_requested_meta = ind_blocks;
22782 +                       al->al_requested_data = data_blocks;
22783 +               }
22784 +
22785 +               error = gfs_inplace_reserve(ip);
22786 +               if (error)
22787 +                       goto fail_gunlock_q;
22788 +
22789 +               /* Trans may require:
22790 +                  All blocks for a RG bitmap, whatever indirect blocks we
22791 +                  need, a modified dinode, and a quota change. */
22792 +
22793 +               error = gfs_trans_begin(sdp,
22794 +                                       1 + al->al_rgd->rd_ri.ri_length +
22795 +                                       ind_blocks +
22796 +                                       ((journaled) ? data_blocks : 0), 1);
22797 +               if (error)
22798 +                       goto fail_ipres;
22799 +       } else {
22800 +               /* Trans may require:
22801 +                  A modified dinode. */
22802 +
22803 +               error = gfs_trans_begin(sdp,
22804 +                                       1 + ((journaled) ? data_blocks : 0), 0);
22805 +               if (error)
22806 +                       goto fail_ipres;
22807 +       }
22808 +
22809 +       if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
22810 +               error = gfs_get_inode_buffer(ip, &dibh);
22811 +               if (error)
22812 +                       goto fail_end_trans;
22813 +
22814 +               ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? (~(S_ISUID | S_ISGID)) : (~S_ISUID);
22815 +
22816 +               gfs_trans_add_bh(ip->i_gl, dibh);
22817 +               gfs_dinode_out(&ip->i_di, dibh->b_data);
22818 +               brelse(dibh);
22819 +       }
22820 +
22821 +       if (journaled ||
22822 +           (gfs_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags) &&
22823 +            *offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))) {
22824 +
22825 +               count = gfs_writei(ip, buf, *offset, size, gfs_copy_from_user);
22826 +               if (count < 0) {
22827 +                       error = count;
22828 +                       goto fail_end_trans;
22829 +               }
22830 +
22831 +               *offset += count;
22832 +       } else {
22833 +               struct iovec local_iov = { .iov_base = buf, .iov_len = size };
22834 +
22835 +               count = generic_file_write_nolock(file, &local_iov, 1, offset);
22836 +               if (count < 0) {
22837 +                       error = count;
22838 +                       goto fail_end_trans;
22839 +               }
22840 +
22841 +               error = gfs_get_inode_buffer(ip, &dibh);
22842 +               if (error)
22843 +                       goto fail_end_trans;
22844 +
22845 +               if (ip->i_di.di_size < inode->i_size)
22846 +                       ip->i_di.di_size = inode->i_size;
22847 +               ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
22848 +
22849 +               gfs_trans_add_bh(ip->i_gl, dibh);
22850 +               gfs_dinode_out(&ip->i_di, dibh->b_data);
22851 +               brelse(dibh);
22852 +       }
22853 +
22854 +       gfs_trans_end(sdp);
22855 +
22856 +       if (file->f_flags & O_SYNC)
22857 +               gfs_log_flush_glock(ip->i_gl);
22858 +
22859 +       if (alloc_required) {
22860 +               GFS_ASSERT_INODE(count != size ||
22861 +                                al->al_alloced_meta ||
22862 +                                al->al_alloced_data, ip,);
22863 +               gfs_inplace_release(ip);
22864 +               gfs_quota_unlock_m(ip);
22865 +               gfs_alloc_put(ip);
22866 +       }
22867 +
22868 +       return count;
22869 +
22870 + fail_end_trans:
22871 +       gfs_trans_end(sdp);
22872 +
22873 + fail_ipres:
22874 +       if (alloc_required)
22875 +               gfs_inplace_release(ip);
22876 +
22877 + fail_gunlock_q:
22878 +       if (alloc_required)
22879 +               gfs_quota_unlock_m(ip);
22880 +
22881 + fail:
22882 +       if (alloc_required)
22883 +               gfs_alloc_put(ip);
22884 +
22885 +       return error;
22886 +}
22887 +
22888 +/**
22889 + * do_write_buf - Write bytes to a file
22890 + * @file: The file to write to
22891 + * @buf: The buffer to copy from
22892 + * @size: The amount of data requested
22893 + * @offset: The current file offset
22894 + * @num_gh: The number of other locks we need to do the read
22895 + * @gh: the locks we need plus one for our lock
22896 + *
22897 + * Outputs: Offset - updated according to number of bytes written
22898 + *
22899 + * Returns: The number of bytes written, -EXXX on failure
22900 + */
22901 +
22902 +static ssize_t
22903 +do_write_buf(struct file *file,
22904 +            char *buf, size_t size, loff_t *offset,
22905 +            unsigned int num_gh, struct gfs_holder *ghs)
22906 +{
22907 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22908 +       struct gfs_sbd *sdp = ip->i_sbd;
22909 +       size_t s;
22910 +       ssize_t count = 0;
22911 +       int error;
22912 +
22913 +       gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
22914 +
22915 +       error = gfs_glock_nq_m(num_gh + 1, ghs);
22916 +       if (error)
22917 +               goto out;
22918 +
22919 +       if (num_gh) {
22920 +               error = grope_mapping(buf, size);
22921 +               if (error)
22922 +                       goto out_gunlock;
22923 +       }
22924 +
22925 +       if (file->f_flags & O_APPEND)
22926 +               *offset = ip->i_di.di_size;
22927 +
22928 +       if (!(file->f_flags & O_LARGEFILE)) {
22929 +               error = -EFBIG;
22930 +               if (*offset >= 0x7FFFFFFFull)
22931 +                       goto out_gunlock;
22932 +               if (*offset + size > 0x7FFFFFFFull)
22933 +                       size = 0x7FFFFFFFull - *offset;
22934 +       }
22935 +
22936 +       while (size) {
22937 +               s = sdp->sd_tune.gt_max_atomic_write;
22938 +               if (s > size)
22939 +                       s = size;
22940 +
22941 +               error = do_do_write_buf(file, buf, s, offset);
22942 +               if (error < 0)
22943 +                       goto out_gunlock;
22944 +
22945 +               buf += error;
22946 +               size -= error;
22947 +               count += error;
22948 +       }
22949 +
22950 +       error = 0;
22951 +
22952 + out_gunlock:
22953 +       gfs_glock_dq_m(num_gh + 1, ghs);
22954 +
22955 + out:
22956 +       gfs_holder_uninit(&ghs[num_gh]);
22957 +
22958 +       return (count) ? count : error;
22959 +}
22960 +
22961 +/**
22962 + * gfs_write - Write bytes to a file
22963 + * @file: The file to write to
22964 + * @buf: The buffer to copy from
22965 + * @size: The amount of data requested
22966 + * @offset: The current file offset
22967 + *
22968 + * Outputs: Offset - updated according to number of bytes written
22969 + *
22970 + * Returns: The number of bytes written, -EXXX on failure
22971 + */
22972 +
22973 +static ssize_t
22974 +gfs_write(struct file *file, const char *buf, size_t size, loff_t *offset)
22975 +{
22976 +       struct inode *inode = file->f_mapping->host;
22977 +       ssize_t count;
22978 +
22979 +       atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_file);
22980 +
22981 +       if (*offset < 0)
22982 +               return -EINVAL;
22983 +       if (!access_ok(VERIFY_READ, buf, size))
22984 +               return -EFAULT;
22985 +
22986 +       down(&inode->i_sem);
22987 +       if (file->f_flags & O_DIRECT)
22988 +               count = walk_vm(file, (char *)buf, size, offset, do_write_direct);
22989 +       else
22990 +               count = walk_vm(file, (char *)buf, size, offset, do_write_buf);
22991 +       up(&inode->i_sem);
22992 +
22993 +       return count;
22994 +}
22995 +
22996 +/**
22997 + * filldir_reg_func - Report a directory entry to the caller of gfs_dir_read()
22998 + * @opaque: opaque data used by the function
22999 + * @name: the name of the directory entry
23000 + * @length: the length of the name
23001 + * @offset: the entry's offset in the directory
23002 + * @inum: the inode number the entry points to
23003 + * @type: the type of inode the entry points to
23004 + *
23005 + * Returns: 0 on success, 1 if buffer full
23006 + */
23007 +
23008 +static int
23009 +filldir_reg_func(void *opaque,
23010 +                const char *name, unsigned int length,
23011 +                uint64_t offset,
23012 +                struct gfs_inum *inum, unsigned int type)
23013 +{
23014 +       struct filldir_reg *fdr = (struct filldir_reg *)opaque;
23015 +       struct gfs_sbd *sdp = fdr->fdr_sbd;
23016 +       unsigned int vfs_type;
23017 +       int error;
23018 +
23019 +       switch (type) {
23020 +       case GFS_FILE_NON:
23021 +               vfs_type = DT_UNKNOWN;
23022 +               break;
23023 +       case GFS_FILE_REG:
23024 +               vfs_type = DT_REG;
23025 +               break;
23026 +       case GFS_FILE_DIR:
23027 +               vfs_type = DT_DIR;
23028 +               break;
23029 +       case GFS_FILE_LNK:
23030 +               vfs_type = DT_LNK;
23031 +               break;
23032 +       case GFS_FILE_BLK:
23033 +               vfs_type = DT_BLK;
23034 +               break;
23035 +       case GFS_FILE_CHR:
23036 +               vfs_type = DT_CHR;
23037 +               break;
23038 +       case GFS_FILE_FIFO:
23039 +               vfs_type = DT_FIFO;
23040 +               break;
23041 +       case GFS_FILE_SOCK:
23042 +               vfs_type = DT_SOCK;
23043 +               break;
23044 +       default:
23045 +               GFS_ASSERT_SBD(FALSE, sdp,
23046 +                              printk("type = %u\n", type););
23047 +       }
23048 +
23049 +       error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
23050 +                                inum->no_formal_ino, vfs_type);
23051 +       if (error)
23052 +               return 1;
23053 +
23054 +       if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
23055 +               gfs_glock_prefetch_num(sdp,
23056 +                                      inum->no_formal_ino, &gfs_inode_glops,
23057 +                                      LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
23058 +               gfs_glock_prefetch_num(sdp,
23059 +                                      inum->no_addr, &gfs_iopen_glops,
23060 +                                      LM_ST_SHARED, LM_FLAG_TRY);
23061 +       }
23062 +
23063 +       return 0;
23064 +}
23065 +
23066 +/**
23067 + * readdir_reg - Read directory entries from a directory
23068 + * @file: The directory to read from
23069 + * @dirent: Buffer for dirents
23070 + * @filldir: Function used to do the copying
23071 + *
23072 + * Returns: 0 on success, -EXXXX on failure
23073 + */
23074 +
23075 +static int
23076 +readdir_reg(struct file *file, void *dirent, filldir_t filldir)
23077 +{
23078 +       struct gfs_inode *dip = vn2ip(file->f_mapping->host);
23079 +       struct filldir_reg fdr;
23080 +       struct gfs_holder d_gh;
23081 +       uint64_t offset = file->f_pos;
23082 +       int error;
23083 +
23084 +       fdr.fdr_sbd = dip->i_sbd;
23085 +       fdr.fdr_prefetch = GFS_ASYNC_LM(dip->i_sbd);
23086 +       fdr.fdr_filldir = filldir;
23087 +       fdr.fdr_opaque = dirent;
23088 +
23089 +       gfs_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
23090 +       error = gfs_glock_nq_atime(&d_gh);
23091 +       if (error) {
23092 +               gfs_holder_uninit(&d_gh);
23093 +               return error;
23094 +       }
23095 +
23096 +       error = gfs_dir_read(dip, &offset, &fdr, filldir_reg_func);
23097 +
23098 +       gfs_glock_dq_uninit(&d_gh);
23099 +
23100 +       file->f_pos = offset;
23101 +
23102 +       return error;
23103 +}
23104 +
23105 +/**
23106 + * filldir_bad_func - Report a directory entry to the caller of gfs_dir_read()
23107 + * @opaque: opaque data used by the function
23108 + * @name: the name of the directory entry
23109 + * @length: the length of the name
23110 + * @offset: the entry's offset in the directory
23111 + * @inum: the inode number the entry points to
23112 + * @type: the type of inode the entry points to
23113 + *
23114 + * Returns: 0 on success, 1 if buffer full
23115 + */
23116 +
23117 +static int
23118 +filldir_bad_func(void *opaque,
23119 +                const char *name, unsigned int length,
23120 +                uint64_t offset,
23121 +                struct gfs_inum *inum, unsigned int type)
23122 +{
23123 +       struct filldir_bad *fdb = (struct filldir_bad *)opaque;
23124 +       struct gfs_sbd *sdp = fdb->fdb_sbd;
23125 +       struct filldir_bad_entry *fbe;
23126 +
23127 +       if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
23128 +           fdb->fdb_name_off + length > fdb->fdb_name_size)
23129 +               return 1;
23130 +
23131 +       fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
23132 +       fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
23133 +       memcpy(fbe->fbe_name, name, length);
23134 +       fbe->fbe_length = length;
23135 +       fbe->fbe_offset = offset;
23136 +       fbe->fbe_inum = *inum;
23137 +       fbe->fbe_type = type;
23138 +
23139 +       fdb->fdb_entry_off++;
23140 +       fdb->fdb_name_off += length;
23141 +
23142 +       if (fdb->fdb_prefetch && !(length == 1 && *name == '.')) {
23143 +               gfs_glock_prefetch_num(sdp,
23144 +                                      inum->no_formal_ino, &gfs_inode_glops,
23145 +                                      LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
23146 +               gfs_glock_prefetch_num(sdp,
23147 +                                      inum->no_addr, &gfs_iopen_glops,
23148 +                                      LM_ST_SHARED, LM_FLAG_TRY);
23149 +       }
23150 +
23151 +       return 0;
23152 +}
23153 +
23154 +/**
23155 + * readdir_bad - Read directory entries from a directory
23156 + * @file: The directory to read from
23157 + * @dirent: Buffer for dirents
23158 + * @filldir: Function used to do the copying
23159 + *
23160 + * Returns: 0 on success, -EXXXX on failure
23161 + */
23162 +
23163 +static int
23164 +readdir_bad(struct file *file, void *dirent, filldir_t filldir)
23165 +{
23166 +       struct gfs_inode *dip = vn2ip(file->f_mapping->host);
23167 +       struct gfs_sbd *sdp = dip->i_sbd;
23168 +       struct filldir_reg fdr;
23169 +       unsigned int entries, size;
23170 +       struct filldir_bad *fdb;
23171 +       struct gfs_holder d_gh;
23172 +       uint64_t offset = file->f_pos;
23173 +       unsigned int x;
23174 +       struct filldir_bad_entry *fbe;
23175 +       int error;
23176 +
23177 +       entries = sdp->sd_tune.gt_entries_per_readdir;
23178 +       size = sizeof(struct filldir_bad) +
23179 +           entries * (sizeof(struct filldir_bad_entry) + GFS_FAST_NAME_SIZE);
23180 +
23181 +       fdb = gmalloc(size);
23182 +       memset(fdb, 0, size);
23183 +
23184 +       fdb->fdb_sbd = sdp;
23185 +       fdb->fdb_prefetch = GFS_ASYNC_LM(sdp);
23186 +       fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
23187 +       fdb->fdb_entry_num = entries;
23188 +       fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
23189 +               entries * sizeof(struct filldir_bad_entry);
23190 +       fdb->fdb_name_size = entries * GFS_FAST_NAME_SIZE;
23191 +
23192 +       gfs_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
23193 +       error = gfs_glock_nq_atime(&d_gh);
23194 +       if (error) {
23195 +               gfs_holder_uninit(&d_gh);
23196 +               goto out;
23197 +       }
23198 +
23199 +       error = gfs_dir_read(dip, &offset, fdb, filldir_bad_func);
23200 +
23201 +       gfs_glock_dq_uninit(&d_gh);
23202 +
23203 +       fdr.fdr_sbd = sdp;
23204 +       fdr.fdr_prefetch = FALSE;
23205 +       fdr.fdr_filldir = filldir;
23206 +       fdr.fdr_opaque = dirent;
23207 +
23208 +       for (x = 0; x < fdb->fdb_entry_off; x++) {
23209 +               fbe = &fdb->fdb_entry[x];
23210 +
23211 +               error = filldir_reg_func(&fdr,
23212 +                                        fbe->fbe_name, fbe->fbe_length,
23213 +                                        fbe->fbe_offset,
23214 +                                        &fbe->fbe_inum, fbe->fbe_type);
23215 +               if (error) {
23216 +                       file->f_pos = fbe->fbe_offset;
23217 +                       error = 0;
23218 +                       goto out;
23219 +               }
23220 +       }
23221 +
23222 +       file->f_pos = offset;
23223 +
23224 + out:
23225 +       kfree(fdb);
23226 +
23227 +       return error;
23228 +}
23229 +
23230 +/**
23231 + * gfs_readdir - Read directory entries from a directory
23232 + * @file: The directory to read from
23233 + * @dirent: Buffer for dirents
23234 + * @filldir: Function used to do the copying
23235 + *
23236 + * Returns: 0 on success, -EXXXX on failure
23237 + */
23238 +
23239 +static int
23240 +gfs_readdir(struct file *file, void *dirent, filldir_t filldir)
23241 +{
23242 +       int error;
23243 +
23244 +       atomic_inc(&vfs2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
23245 +
23246 +       if (strcmp(current->comm, "nfsd") != 0)
23247 +               error = readdir_reg(file, dirent, filldir);
23248 +       else
23249 +               error = readdir_bad(file, dirent, filldir);
23250 +
23251 +       return error;
23252 +}
23253 +
23254 +/**
23255 + * gfs_ioctl - do an ioctl on a file
23256 + * @inode: the inode
23257 + * @file: the file pointer
23258 + * @cmd: the ioctl command
23259 + * @arg: the argument
23260 + *
23261 + * Returns: 0 on success, -EXXXX on failure
23262 + */
23263 +
23264 +static int
23265 +gfs_ioctl(struct inode *inode, struct file *file,
23266 +         unsigned int cmd, unsigned long arg)
23267 +{
23268 +       struct gfs_inode *ip = vn2ip(inode);
23269 +       atomic_inc(&ip->i_sbd->sd_ops_file);
23270 +       return gfs_ioctli(ip, cmd, (void *)arg);
23271 +}
23272 +
23273 +/**
23274 + * gfs_open - open a file
23275 + * @inode: the inode to open
23276 + * @file: the struct file for this opening
23277 + *
23278 + * Returns: 0 on success, -EXXX on failure
23279 + */
23280 +
23281 +static int
23282 +gfs_open(struct inode *inode, struct file *file)
23283 +{
23284 +       struct gfs_inode *ip = vn2ip(inode);
23285 +       struct gfs_holder i_gh;
23286 +       struct gfs_file *fp;
23287 +       int error;
23288 +
23289 +       atomic_inc(&ip->i_sbd->sd_ops_file);
23290 +
23291 +       fp = gmalloc(sizeof(struct gfs_file));
23292 +       memset(fp, 0, sizeof(struct gfs_file));
23293 +
23294 +       init_MUTEX(&fp->f_fl_lock);
23295 +
23296 +       fp->f_inode = ip;
23297 +       fp->f_vfile = file;
23298 +
23299 +       GFS_ASSERT_INODE(!vf2fp(file), ip,);
23300 +       vf2fp(file) = fp;
23301 +
23302 +       if (ip->i_di.di_type == GFS_FILE_REG) {
23303 +               error = gfs_glock_nq_init(ip->i_gl,
23304 +                                         LM_ST_SHARED, LM_FLAG_ANY,
23305 +                                         &i_gh);
23306 +               if (error)
23307 +                       goto fail;
23308 +
23309 +               if (!(file->f_flags & O_LARGEFILE) &&
23310 +                   ip->i_di.di_size > 0x7FFFFFFFull) {
23311 +                       error = -EFBIG;
23312 +                       goto fail_gunlock;
23313 +               }
23314 +
23315 +               /* If this is an exclusive create, make sure our gfs_create()
23316 +                  says we created the file.  The O_EXCL flag isn't passed
23317 +                  to gfs_create(), so we have to check it here. */
23318 +
23319 +               if (file->f_flags & O_CREAT) {
23320 +                       if (ip->i_creat_task == current &&
23321 +                           ip->i_creat_pid == current->pid) {
23322 +                               ip->i_creat_task = NULL;
23323 +                               ip->i_creat_pid = 0;
23324 +                       } else if (file->f_flags & O_EXCL) {
23325 +                               error = -EEXIST;
23326 +                               goto fail_gunlock;
23327 +                       }
23328 +               }
23329 +
23330 +               /* Listen to the Direct I/O flag */
23331 +
23332 +               if (ip->i_di.di_flags & GFS_DIF_DIRECTIO)
23333 +                       file->f_flags |= O_DIRECT;
23334 +
23335 +               /* Don't let the user open O_DIRECT on a jdata file */
23336 +
23337 +               if ((file->f_flags & O_DIRECT) && gfs_is_jdata(ip)) {
23338 +                       error = -EINVAL;
23339 +                       goto fail_gunlock;
23340 +               }
23341 +
23342 +               gfs_glock_dq_uninit(&i_gh);
23343 +       }
23344 +
23345 +       return 0;
23346 +
23347 + fail_gunlock:
23348 +       gfs_glock_dq_uninit(&i_gh);
23349 +
23350 + fail:
23351 +       vf2fp(file) = NULL;
23352 +       kfree(fp);
23353 +
23354 +       return error;
23355 +}
23356 +
23357 +/**
23358 + * gfs_close - called to close a struct file
23359 + * @inode: the inode the struct file belongs to
23360 + * @file: the struct file being closed
23361 + *
23362 + * Returns: 0 on success, -EXXX on failure
23363 + */
23364 +
23365 +static int
23366 +gfs_close(struct inode *inode, struct file *file)
23367 +{
23368 +       struct gfs_file *fp;
23369 +
23370 +       atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_file);
23371 +
23372 +       fp = vf2fp(file);
23373 +       vf2fp(file) = NULL;
23374 +
23375 +       GFS_ASSERT(fp,);
23376 +
23377 +       kfree(fp);
23378 +
23379 +       return 0;
23380 +}
23381 +
23382 +/**
23383 + * gfs_fsync - sync the dirty data for a file (across the cluster)
23384 + * @file: the file that points to the dentry (Huh?)
23385 + * @dentry: the dentry that points to the inode to sync
23386 + *
23387 + * Returns: 0 on success, -EXXX on failure
23388 + */
23389 +
23390 +static int
23391 +gfs_fsync(struct file *file, struct dentry *dentry, int datasync)
23392 +{
23393 +       struct gfs_inode *ip = vn2ip(dentry->d_inode);
23394 +       struct gfs_holder i_gh;
23395 +       int error;
23396 +
23397 +       atomic_inc(&ip->i_sbd->sd_ops_file);
23398 +
23399 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
23400 +       if (error)
23401 +               return error;
23402 +
23403 +       if (gfs_is_jdata(ip))
23404 +               gfs_log_flush_glock(ip->i_gl);
23405 +       else
23406 +               i_gh.gh_flags |= GL_SYNC;
23407 +
23408 +       gfs_glock_dq_uninit(&i_gh);
23409 +
23410 +       return error;
23411 +}
23412 +
23413 +/**
23414 + * gfs_lock - acquire/release a flock or posix lock on a file
23415 + * @file: the file pointer
23416 + * @cmd: either modify or retrieve lock state, possibly wait
23417 + * @fl: type and range of lock
23418 + *
23419 + * Returns: 0 on success, -EXXX on failure
23420 + */
23421 +
23422 +static int
23423 +gfs_lock(struct file *file, int cmd, struct file_lock *fl)
23424 +{
23425 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
23426 +       struct gfs_sbd *sdp = ip->i_sbd;
23427 +       struct lm_lockname name;
23428 +       uint64_t start = fl->fl_start, end = fl->fl_end;
23429 +       pid_t pid = fl->fl_pid;
23430 +       int plock = (fl->fl_flags & FL_POSIX);
23431 +       int flock = (fl->fl_flags & FL_FLOCK);
23432 +       int get, set, wait, ex, sh, un;
23433 +       int error;
23434 +
23435 +       atomic_inc(&sdp->sd_ops_file);
23436 +
23437 +       if (sdp->sd_args.ar_localflocks)
23438 +               return LOCK_USE_CLNT;
23439 +
23440 +       if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
23441 +               return -ENOLCK;
23442 +
23443 +       if (!flock && !plock)
23444 +               return -ENOLCK;
23445 +
23446 +       get = (IS_GETLK(cmd)) ? TRUE : FALSE;
23447 +       set = (IS_SETLK(cmd)) ? TRUE : FALSE;
23448 +       wait = (IS_SETLKW(cmd)) ? TRUE : FALSE;
23449 +
23450 +       if ((flock && (get || (!set && !wait))) ||
23451 +           (plock && (!get && !set && !wait)))
23452 +               return -EINVAL;
23453 +
23454 +       ex = (fl->fl_type == F_WRLCK) ? TRUE : FALSE;
23455 +       sh = (fl->fl_type == F_RDLCK) ? TRUE : FALSE;
23456 +       un = (fl->fl_type == F_UNLCK) ? TRUE : FALSE;
23457 +
23458 +       if (!ex && !sh && !un)
23459 +               return -EINVAL;
23460 +
23461 +       if (flock) {
23462 +               struct gfs_file *fp = vf2fp(file);
23463 +               GFS_ASSERT(fp,);
23464 +
23465 +               if (un)
23466 +                       error = gfs_funlock(fp);
23467 +               else
23468 +                       error = gfs_flock(fp, ex, wait);
23469 +       } else {
23470 +               name.ln_number = ip->i_num.no_formal_ino;
23471 +               name.ln_type = LM_TYPE_PLOCK;
23472 +               if (get) {
23473 +                       error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
23474 +                                       sdp->sd_lockstruct.ls_lockspace,
23475 +                                       &name, (unsigned long)fl->fl_owner,
23476 +                                       &start, &end, &ex, (unsigned long*)&pid);
23477 +                       if (error < 0)
23478 +                               return error;
23479 +
23480 +                       fl->fl_type = F_UNLCK;
23481 +                       if (!error)
23482 +                               return error;
23483 +
23484 +                       fl->fl_start = start;
23485 +                       fl->fl_end = end;
23486 +                       fl->fl_pid = pid;
23487 +                       fl->fl_type = (ex) ? F_WRLCK : F_RDLCK;
23488 +
23489 +                       error = 0;
23490 +               } else if (un)
23491 +                       error = sdp->sd_lockstruct.ls_ops->lm_punlock(
23492 +                                       sdp->sd_lockstruct.ls_lockspace,
23493 +                                       &name, (unsigned long)fl->fl_owner,
23494 +                                       start, end);
23495 +               else
23496 +                       error = sdp->sd_lockstruct.ls_ops->lm_plock(
23497 +                                       sdp->sd_lockstruct.ls_lockspace,
23498 +                                       &name, (unsigned long)fl->fl_owner,
23499 +                                       wait, ex, start, end);
23500 +       }
23501 +
23502 +       return error;
23503 +}
23504 +
23505 +/**
23506 + * gfs_sendfile - Send bytes to a file or socket
23507 + * @in_file: The file to read from
23508 + * @out_file: The file to write to
23509 + * @count: The amount of data
23510 + * @offset: The beginning file offset
23511 + *
23512 + * Outputs: offset - updated according to number of bytes read
23513 + *
23514 + * Returns: The number of bytes sent, -EXXX on failure
23515 + */
23516 +
23517 +static ssize_t
23518 +gfs_sendfile(struct file *in_file, loff_t *offset, size_t count, read_actor_t actor, void __user *target)
23519 +{
23520 +       struct gfs_inode *ip = vn2ip(in_file->f_mapping->host);
23521 +       struct gfs_holder gh;
23522 +       ssize_t retval;
23523 +
23524 +       atomic_inc(&ip->i_sbd->sd_ops_file);
23525 +
23526 +       gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
23527 +
23528 +       retval = gfs_glock_nq_atime(&gh);
23529 +       if (retval)
23530 +               goto out;
23531 +
23532 +       if (gfs_is_jdata(ip))
23533 +               retval = -ENOSYS;
23534 +       else
23535 +               retval = generic_file_sendfile(in_file, offset, count, actor, target);
23536 +
23537 +       gfs_glock_dq(&gh);
23538 +
23539 + out:
23540 +       gfs_holder_uninit(&gh);
23541 +
23542 +       return retval;
23543 +}
23544 +
23545 +/**
23546 + * gfs_mmap - We don't support shared writable mappings right now
23547 + * @file: The file to map
23548 + * @vma: The VMA which described the mapping
23549 + *
23550 + * Returns: 0 or error code
23551 + */
23552 +
23553 +static int
23554 +gfs_mmap(struct file *file, struct vm_area_struct *vma)
23555 +{
23556 +       struct gfs_inode *ip = vn2ip(file->f_mapping->host);
23557 +       struct gfs_holder i_gh;
23558 +       int error;
23559 +
23560 +       atomic_inc(&ip->i_sbd->sd_ops_file);
23561 +
23562 +       gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
23563 +       error = gfs_glock_nq_atime(&i_gh);
23564 +       if (error) {
23565 +               gfs_holder_uninit(&i_gh);
23566 +               return error;
23567 +       }
23568 +
23569 +       if (gfs_is_jdata(ip)) {
23570 +               if (vma->vm_flags & VM_MAYSHARE)
23571 +                       error = -ENOSYS;
23572 +               else
23573 +                       vma->vm_ops = &gfs_vm_ops_private;
23574 +       } else {
23575 +               /* This is VM_MAYWRITE instead of VM_WRITE because a call
23576 +                  to mprotect() can turn on VM_WRITE later. */
23577 +
23578 +               if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == (VM_MAYSHARE | VM_MAYWRITE))
23579 +                       vma->vm_ops = &gfs_vm_ops_sharewrite;
23580 +               else
23581 +                       vma->vm_ops = &gfs_vm_ops_private;
23582 +       }
23583 +
23584 +       gfs_glock_dq_uninit(&i_gh);
23585 +
23586 +       return error;
23587 +}
23588 +
23589 +struct file_operations gfs_file_fops = {
23590 +       .llseek = gfs_llseek,
23591 +       .read = gfs_read,
23592 +       .write = gfs_write,
23593 +       .ioctl = gfs_ioctl,
23594 +       .mmap = gfs_mmap,
23595 +       .open = gfs_open,
23596 +       .release = gfs_close,
23597 +       .fsync = gfs_fsync,
23598 +       .lock = gfs_lock,
23599 +       .sendfile = gfs_sendfile,
23600 +};
23601 +
23602 +struct file_operations gfs_dir_fops = {
23603 +       .readdir = gfs_readdir,
23604 +       .ioctl = gfs_ioctl,
23605 +       .open = gfs_open,
23606 +       .release = gfs_close,
23607 +       .fsync = gfs_fsync,
23608 +       .lock = gfs_lock,
23609 +};
23610 diff -urN linux-orig/fs/gfs/ops_file.h linux-patched/fs/gfs/ops_file.h
23611 --- linux-orig/fs/gfs/ops_file.h        1969-12-31 18:00:00.000000000 -0600
23612 +++ linux-patched/fs/gfs/ops_file.h     2004-06-30 13:27:49.352709042 -0500
23613 @@ -0,0 +1,20 @@
23614 +/******************************************************************************
23615 +*******************************************************************************
23616 +**
23617 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
23618 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
23619 +**
23620 +**  This copyrighted material is made available to anyone wishing to use,
23621 +**  modify, copy, or redistribute it subject to the terms and conditions
23622 +**  of the GNU General Public License v.2.
23623 +**
23624 +*******************************************************************************
23625 +******************************************************************************/
23626 +
23627 +#ifndef __OPS_FILE_DOT_H__
23628 +#define __OPS_FILE_DOT_H__
23629 +
23630 +extern struct file_operations gfs_file_fops;
23631 +extern struct file_operations gfs_dir_fops;
23632 +
23633 +#endif /* __OPS_FILE_DOT_H__ */
23634 diff -urN linux-orig/fs/gfs/ops_fstype.c linux-patched/fs/gfs/ops_fstype.c
23635 --- linux-orig/fs/gfs/ops_fstype.c      1969-12-31 18:00:00.000000000 -0600
23636 +++ linux-patched/fs/gfs/ops_fstype.c   2004-06-30 13:27:49.353708810 -0500
23637 @@ -0,0 +1,607 @@
23638 +/******************************************************************************
23639 +*******************************************************************************
23640 +**
23641 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
23642 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
23643 +**
23644 +**  This copyrighted material is made available to anyone wishing to use,
23645 +**  modify, copy, or redistribute it subject to the terms and conditions
23646 +**  of the GNU General Public License v.2.
23647 +**
23648 +*******************************************************************************
23649 +******************************************************************************/
23650 +
23651 +#include <linux/sched.h>
23652 +#include <linux/slab.h>
23653 +#include <linux/smp_lock.h>
23654 +#include <linux/spinlock.h>
23655 +#include <asm/semaphore.h>
23656 +#include <linux/completion.h>
23657 +#include <linux/buffer_head.h>
23658 +#include <linux/vmalloc.h>
23659 +#include <linux/blkdev.h>
23660 +
23661 +#include "gfs.h"
23662 +#include "daemon.h"
23663 +#include "glock.h"
23664 +#include "glops.h"
23665 +#include "inode.h"
23666 +#include "locking.h"
23667 +#include "mount.h"
23668 +#include "ops_export.h"
23669 +#include "ops_fstype.h"
23670 +#include "ops_super.h"
23671 +#include "quota.h"
23672 +#include "recovery.h"
23673 +#include "rgrp.h"
23674 +#include "super.h"
23675 +#include "unlinked.h"
23676 +
23677 +/**
23678 + * gfs_read_super - Read in superblock
23679 + * @sb: The VFS superblock
23680 + * @data: Mount options
23681 + * @silent: Don't complain if its not a GFS filesystem
23682 + *
23683 + * Returns: The VFS superblock, or NULL on error
23684 + */
23685 +
23686 +static int
23687 +fill_super(struct super_block *sb, void *data, int silent)
23688 +{
23689 +       struct gfs_sbd *sdp;
23690 +       struct gfs_holder mount_gh, sb_gh, ji_gh;
23691 +       struct inode *inode;
23692 +       int super = TRUE, jindex = TRUE;
23693 +       unsigned int x;
23694 +       int error;
23695 +
23696 +       error = -ENOMEM;
23697 +       sdp = vmalloc(sizeof(struct gfs_sbd));
23698 +       if (!sdp)
23699 +               goto fail;
23700 +
23701 +       memset(sdp, 0, sizeof(struct gfs_sbd));
23702 +
23703 +       vfs2sdp(sb) = sdp;
23704 +       sdp->sd_vfs = sb;
23705 +
23706 +       /*  Init rgrp variables  */
23707 +
23708 +       INIT_LIST_HEAD(&sdp->sd_rglist);
23709 +       init_MUTEX(&sdp->sd_rindex_lock);
23710 +       INIT_LIST_HEAD(&sdp->sd_rg_mru_list);
23711 +       spin_lock_init(&sdp->sd_rg_mru_lock);
23712 +       INIT_LIST_HEAD(&sdp->sd_rg_recent);
23713 +       spin_lock_init(&sdp->sd_rg_recent_lock);
23714 +       spin_lock_init(&sdp->sd_rg_forward_lock);
23715 +
23716 +       for (x = 0; x < GFS_GL_HASH_SIZE; x++) {
23717 +               sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
23718 +               INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
23719 +       }
23720 +
23721 +       INIT_LIST_HEAD(&sdp->sd_reclaim_list);
23722 +       spin_lock_init(&sdp->sd_reclaim_lock);
23723 +       init_waitqueue_head(&sdp->sd_reclaim_wchan);
23724 +
23725 +       for (x = 0; x < GFS_MHC_HASH_SIZE; x++)
23726 +               INIT_LIST_HEAD(&sdp->sd_mhc[x]);
23727 +       INIT_LIST_HEAD(&sdp->sd_mhc_single);
23728 +       spin_lock_init(&sdp->sd_mhc_lock);
23729 +
23730 +       for (x = 0; x < GFS_DEPEND_HASH_SIZE; x++)
23731 +               INIT_LIST_HEAD(&sdp->sd_depend[x]);
23732 +       spin_lock_init(&sdp->sd_depend_lock);
23733 +
23734 +       init_MUTEX(&sdp->sd_freeze_lock);
23735 +
23736 +       init_MUTEX(&sdp->sd_thread_lock);
23737 +       init_completion(&sdp->sd_thread_completion);
23738 +
23739 +       spin_lock_init(&sdp->sd_log_seg_lock);
23740 +       INIT_LIST_HEAD(&sdp->sd_log_seg_list);
23741 +       init_waitqueue_head(&sdp->sd_log_seg_wait);
23742 +       INIT_LIST_HEAD(&sdp->sd_log_ail);
23743 +       INIT_LIST_HEAD(&sdp->sd_log_incore);
23744 +       init_MUTEX(&sdp->sd_log_lock);
23745 +       INIT_LIST_HEAD(&sdp->sd_unlinked_list);
23746 +       spin_lock_init(&sdp->sd_unlinked_lock);
23747 +       INIT_LIST_HEAD(&sdp->sd_quota_list);
23748 +       spin_lock_init(&sdp->sd_quota_lock);
23749 +
23750 +       INIT_LIST_HEAD(&sdp->sd_dirty_j);
23751 +       spin_lock_init(&sdp->sd_dirty_j_lock);
23752 +
23753 +       spin_lock_init(&sdp->sd_ail_lock);
23754 +       INIT_LIST_HEAD(&sdp->sd_recovery_bufs);
23755 +
23756 +       gfs_init_tune_data(sdp);
23757 +
23758 +       error = gfs_make_args((char *)data, &sdp->sd_args);
23759 +       if (error) {
23760 +               printk("GFS: can't parse mount arguments\n");
23761 +               goto fail_vfree;
23762 +       }
23763 +
23764 +       /*  Copy out mount flags  */
23765 +
23766 +       if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
23767 +               set_bit(SDF_NOATIME, &sdp->sd_flags);
23768 +       if (sb->s_flags & MS_RDONLY)
23769 +               set_bit(SDF_ROFS, &sdp->sd_flags);
23770 +
23771 +       /*  Setup up Virtual Super Block  */
23772 +
23773 +       sb->s_magic = GFS_MAGIC;
23774 +       sb->s_op = &gfs_super_ops;
23775 +       sb->s_export_op = &gfs_export_ops;
23776 +       sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
23777 +       sb->s_maxbytes = ~0ULL;
23778 +
23779 +       if (sdp->sd_args.ar_posixacls)
23780 +               sb->s_flags |= MS_POSIXACL;
23781 +
23782 +       /*  Set up the buffer cache and fill in some fake values
23783 +          to allow us to read in the superblock.  */
23784 +
23785 +       sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS_BASIC_BLOCK);
23786 +       sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
23787 +       sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT;
23788 +       sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
23789 +
23790 +       GFS_ASSERT_SBD(sizeof(struct gfs_sb) <= sdp->sd_sb.sb_bsize, sdp,);
23791 +
23792 +       error = gfs_mount_lockproto(sdp, silent);
23793 +       if (error)
23794 +               goto fail_vfree;
23795 +
23796 +       printk("GFS: fsid=%s: Joined cluster. Now mounting FS...\n",
23797 +              sdp->sd_fsname);
23798 +
23799 +       if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
23800 +           !sdp->sd_args.ar_ignore_local_fs) {
23801 +               /*  Force local [p|f]locks  */
23802 +               sdp->sd_args.ar_localflocks = TRUE;
23803 +
23804 +               /*  Force local read ahead and caching  */
23805 +               sdp->sd_args.ar_localcaching = TRUE;
23806 +       }
23807 +
23808 +       /*  Start up the scand thread  */
23809 +
23810 +       error = kernel_thread(gfs_scand, sdp, 0);
23811 +       if (error < 0) {
23812 +               printk("GFS: fsid=%s: can't start scand thread: %d\n",
23813 +                      sdp->sd_fsname, error);
23814 +               goto fail_lockproto;
23815 +       }
23816 +       wait_for_completion(&sdp->sd_thread_completion);
23817 +
23818 +       /*  Start up the glockd thread  */
23819 +
23820 +       for (sdp->sd_glockd_num = 0;
23821 +            sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
23822 +            sdp->sd_glockd_num++) {
23823 +               error = kernel_thread(gfs_glockd, sdp, 0);
23824 +               if (error < 0) {
23825 +                       printk("GFS: fsid=%s: can't start glockd thread: %d\n",
23826 +                              sdp->sd_fsname, error);
23827 +                       goto fail_glockd;
23828 +               }
23829 +               wait_for_completion(&sdp->sd_thread_completion);
23830 +       }
23831 +
23832 +       error = gfs_glock_nq_num(sdp,
23833 +                                GFS_MOUNT_LOCK, &gfs_nondisk_glops,
23834 +                                LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
23835 +                                &mount_gh);
23836 +       if (error) {
23837 +               printk("GFS: fsid=%s: can't acquire mount glock: %d\n",
23838 +                      sdp->sd_fsname, error);
23839 +               goto fail_glockd;
23840 +       }
23841 +
23842 +       error = gfs_glock_nq_num(sdp,
23843 +                                GFS_LIVE_LOCK, &gfs_nondisk_glops,
23844 +                                LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
23845 +                                &sdp->sd_live_gh);
23846 +       if (error) {
23847 +               printk("GFS: fsid=%s: can't acquire live glock: %d\n",
23848 +                      sdp->sd_fsname, error);
23849 +               goto fail_gunlock_mount;
23850 +       }
23851 +
23852 +       sdp->sd_live_gh.gh_owner = NULL;
23853 +
23854 +       error = gfs_glock_nq_num(sdp,
23855 +                                GFS_SB_LOCK, &gfs_meta_glops,
23856 +                                (sdp->sd_args.ar_upgrade) ? LM_ST_EXCLUSIVE : LM_ST_SHARED,
23857 +                                0, &sb_gh);
23858 +       if (error) {
23859 +               printk("GFS: fsid=%s: can't acquire superblock glock: %d\n",
23860 +                      sdp->sd_fsname, error);
23861 +               goto fail_gunlock_live;
23862 +       }
23863 +
23864 +       error = gfs_read_sb(sdp, sb_gh.gh_gl, silent);
23865 +       if (error) {
23866 +               printk("GFS: fsid=%s: can't read superblock: %d\n",
23867 +                      sdp->sd_fsname, error);
23868 +               goto fail_gunlock_sb;
23869 +       }
23870 +
23871 +       /*  Set up the buffer cache and SB for real  */
23872 +
23873 +       error = -EINVAL;
23874 +       if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
23875 +               printk("GFS: fsid=%s: FS block size (%u) is too small for device block size (%u)\n",
23876 +                      sdp->sd_fsname, sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
23877 +               goto fail_gunlock_sb;
23878 +       }
23879 +       if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
23880 +               printk("GFS: fsid=%s: FS block size (%u) is too big for machine page size (%u)\n",
23881 +                      sdp->sd_fsname, sdp->sd_sb.sb_bsize,
23882 +                      (unsigned int)PAGE_SIZE);
23883 +               goto fail_gunlock_sb;
23884 +       }
23885 +
23886 +       /*  Get rid of buffers from the original block size  */
23887 +       sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
23888 +       sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
23889 +
23890 +       sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
23891 +
23892 +       /*  Read in journal index inode  */
23893 +
23894 +       error = gfs_get_jiinode(sdp);
23895 +       if (error) {
23896 +               printk("GFS: fsid=%s: can't get journal index inode: %d\n",
23897 +                      sdp->sd_fsname, error);
23898 +               goto fail_gunlock_sb;
23899 +       }
23900 +
23901 +       init_MUTEX(&sdp->sd_jindex_lock);
23902 +
23903 +       /*  Get a handle on the transaction glock  */
23904 +
23905 +       error = gfs_glock_get(sdp, GFS_TRANS_LOCK, &gfs_trans_glops,
23906 +                             CREATE, &sdp->sd_trans_gl);
23907 +       if (error)
23908 +               goto fail_ji_free;
23909 +       set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
23910 +
23911 +       /*  Upgrade version numbers if we need to  */
23912 +
23913 +       if (sdp->sd_args.ar_upgrade) {
23914 +               error = gfs_do_upgrade(sdp, sb_gh.gh_gl);
23915 +               if (error)
23916 +                       goto fail_trans_gl;
23917 +       }
23918 +
23919 +       /*  Load in the journal index  */
23920 +
23921 +       error = gfs_jindex_hold(sdp, &ji_gh);
23922 +       if (error) {
23923 +               printk("GFS: fsid=%s: can't read journal index: %d\n",
23924 +                      sdp->sd_fsname, error);
23925 +               goto fail_trans_gl;
23926 +       }
23927 +
23928 +       error = -EINVAL;
23929 +       if (sdp->sd_lockstruct.ls_jid >= sdp->sd_journals) {
23930 +               printk("GFS: fsid=%s: can't mount journal #%u\n",
23931 +                      sdp->sd_fsname, sdp->sd_lockstruct.ls_jid);
23932 +               printk("GFS: fsid=%s: there are only %u journals (0 - %u)\n",
23933 +                    sdp->sd_fsname, sdp->sd_journals, sdp->sd_journals - 1);
23934 +               goto fail_gunlock_ji;
23935 +       }
23936 +       sdp->sd_jdesc = sdp->sd_jindex[sdp->sd_lockstruct.ls_jid];
23937 +       sdp->sd_log_seg_free = sdp->sd_jdesc.ji_nsegment - 1;
23938 +
23939 +       error = gfs_glock_nq_num(sdp,
23940 +                                sdp->sd_jdesc.ji_addr, &gfs_meta_glops,
23941 +                                LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
23942 +                                &sdp->sd_journal_gh);
23943 +       if (error) {
23944 +               printk("GFS: fsid=%s: can't acquire the journal glock: %d\n",
23945 +                      sdp->sd_fsname, error);
23946 +               goto fail_gunlock_ji;
23947 +       }
23948 +
23949 +       if (sdp->sd_lockstruct.ls_first) {
23950 +               for (x = 0; x < sdp->sd_journals; x++) {
23951 +                       error = gfs_recover_journal(sdp,
23952 +                                                   x, sdp->sd_jindex + x,
23953 +                                                   TRUE);
23954 +                       if (error) {
23955 +                               printk("GFS: fsid=%s: error recovering journal %u: %d\n",
23956 +                                      sdp->sd_fsname, x, error);
23957 +                               goto fail_gunlock_journal;
23958 +                       }
23959 +               }
23960 +
23961 +               sdp->sd_lockstruct.ls_ops->lm_others_may_mount(sdp->sd_lockstruct.ls_lockspace);
23962 +               sdp->sd_lockstruct.ls_first = FALSE;
23963 +       } else {
23964 +               error = gfs_recover_journal(sdp,
23965 +                                           sdp->sd_lockstruct.ls_jid, &sdp->sd_jdesc,
23966 +                                           TRUE);
23967 +               if (error) {
23968 +                       printk("GFS: fsid=%s: error recovering my journal: %d\n",
23969 +                              sdp->sd_fsname, error);
23970 +                       goto fail_gunlock_journal;
23971 +               }
23972 +       }
23973 +
23974 +       gfs_glock_dq_uninit(&ji_gh);
23975 +       jindex = FALSE;
23976 +
23977 +       /*  Disown my Journal glock  */
23978 +
23979 +       sdp->sd_journal_gh.gh_owner = NULL;
23980 +
23981 +       /*  Drop our cache and reread all the things we read before the replay.  */
23982 +
23983 +       error = gfs_read_sb(sdp, sb_gh.gh_gl, FALSE);
23984 +       if (error) {
23985 +               printk("GFS: fsid=%s: can't read superblock: %d\n",
23986 +                      sdp->sd_fsname, error);
23987 +               goto fail_gunlock_journal;
23988 +       }
23989 +
23990 +       gfs_glock_force_drop(sdp->sd_jiinode->i_gl);
23991 +
23992 +       error = gfs_jindex_hold(sdp, &ji_gh);
23993 +       if (error) {
23994 +               printk("GFS: fsid=%s: can't read journal index: %d\n",
23995 +                      sdp->sd_fsname, error);
23996 +               goto fail_gunlock_journal;
23997 +       }
23998 +       gfs_glock_dq_uninit(&ji_gh);
23999 +
24000 +       /*  Make the FS read/write  */
24001 +
24002 +       if (!test_bit(SDF_ROFS, &sdp->sd_flags)) {
24003 +               error = gfs_make_fs_rw(sdp);
24004 +               if (error) {
24005 +                       printk("GFS: fsid=%s: can't make FS RW: %d\n",
24006 +                              sdp->sd_fsname, error);
24007 +                       goto fail_gunlock_journal;
24008 +               }
24009 +       }
24010 +
24011 +       /*  Start up the recover thread  */
24012 +
24013 +       error = kernel_thread(gfs_recoverd, sdp, 0);
24014 +       if (error < 0) {
24015 +               printk("GFS: fsid=%s: can't start recoverd thread: %d\n",
24016 +                      sdp->sd_fsname, error);
24017 +               goto fail_recover_dump;
24018 +       }
24019 +       wait_for_completion(&sdp->sd_thread_completion);
24020 +
24021 +       /*  Read in the resource index inode  */
24022 +
24023 +       error = gfs_get_riinode(sdp);
24024 +       if (error) {
24025 +               printk("GFS: fsid=%s: can't get resource index inode: %d\n",
24026 +                      sdp->sd_fsname, error);
24027 +               goto fail_recoverd;
24028 +       }
24029 +
24030 +       /*  Get the root inode  */
24031 +
24032 +       error = gfs_get_rootinode(sdp);
24033 +       if (error) {
24034 +               printk("GFS: fsid=%s: can't read in root inode: %d\n",
24035 +                      sdp->sd_fsname, error);
24036 +               goto fail_ri_free;
24037 +       }
24038 +
24039 +       /*  Read in the quota inode  */
24040 +
24041 +       error = gfs_get_qinode(sdp);
24042 +       if (error) {
24043 +               printk("GFS: fsid=%s: can't get quota file inode: %d\n",
24044 +                      sdp->sd_fsname, error);
24045 +               goto fail_root_free;
24046 +       }
24047 +
24048 +       /*  Read in the license inode  */
24049 +
24050 +       error = gfs_get_linode(sdp);
24051 +       if (error) {
24052 +               printk("GFS: fsid=%s: can't get license file inode: %d\n",
24053 +                      sdp->sd_fsname, error);
24054 +               goto fail_qi_free;
24055 +       }
24056 +
24057 +       /*  We're through with the superblock lock  */
24058 +
24059 +       gfs_glock_dq_uninit(&sb_gh);
24060 +       super = FALSE;
24061 +
24062 +       /*  Get the inode/dentry  */
24063 +
24064 +       inode = gfs_iget(sdp->sd_rooti, CREATE);
24065 +       if (!inode) {
24066 +               printk("GFS: fsid=%s: can't get root inode\n", sdp->sd_fsname);
24067 +               error = -ENOMEM;
24068 +               goto fail_li_free;
24069 +       }
24070 +
24071 +       sb->s_root = d_alloc_root(inode);
24072 +       if (!sb->s_root) {
24073 +               iput(inode);
24074 +               printk("GFS: fsid=%s: can't get root dentry\n", sdp->sd_fsname);
24075 +               error = -ENOMEM;
24076 +               goto fail_li_free;
24077 +       }
24078 +
24079 +       /*  Start up the logd thread  */
24080 +
24081 +       sdp->sd_jindex_refresh_time = jiffies;
24082 +
24083 +       error = kernel_thread(gfs_logd, sdp, 0);
24084 +       if (error < 0) {
24085 +               printk("GFS: fsid=%s: can't start logd thread: %d\n",
24086 +                      sdp->sd_fsname, error);
24087 +               goto fail_dput;
24088 +       }
24089 +       wait_for_completion(&sdp->sd_thread_completion);
24090 +
24091 +       /*  Start up the quotad thread  */
24092 +
24093 +       error = kernel_thread(gfs_quotad, sdp, 0);
24094 +       if (error < 0) {
24095 +               printk("GFS: fsid=%s: can't start quotad thread: %d\n",
24096 +                      sdp->sd_fsname, error);
24097 +               goto fail_logd;
24098 +       }
24099 +       wait_for_completion(&sdp->sd_thread_completion);
24100 +
24101 +       /*  Start up the inoded thread  */
24102 +
24103 +       error = kernel_thread(gfs_inoded, sdp, 0);
24104 +       if (error < 0) {
24105 +               printk("GFS: fsid=%s: can't start inoded thread: %d\n",
24106 +                      sdp->sd_fsname, error);
24107 +               goto fail_quotad;
24108 +       }
24109 +       wait_for_completion(&sdp->sd_thread_completion);
24110 +
24111 +       /*  Get a handle on the rename lock  */
24112 +
24113 +       error = gfs_glock_get(sdp, GFS_RENAME_LOCK, &gfs_nondisk_glops,
24114 +                             CREATE, &sdp->sd_rename_gl);
24115 +       if (error)
24116 +               goto fail_inoded;
24117 +
24118 +       gfs_glock_dq_uninit(&mount_gh);
24119 +
24120 +       return 0;
24121 +
24122 +      fail_inoded:
24123 +       down(&sdp->sd_thread_lock);
24124 +       clear_bit(SDF_INODED_RUN, &sdp->sd_flags);
24125 +       wake_up_process(sdp->sd_inoded_process);
24126 +       up(&sdp->sd_thread_lock);
24127 +       wait_for_completion(&sdp->sd_thread_completion);
24128 +
24129 +      fail_quotad:
24130 +       down(&sdp->sd_thread_lock);
24131 +       clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
24132 +       wake_up_process(sdp->sd_quotad_process);
24133 +       up(&sdp->sd_thread_lock);
24134 +       wait_for_completion(&sdp->sd_thread_completion);
24135 +
24136 +      fail_logd:
24137 +       down(&sdp->sd_thread_lock);
24138 +       clear_bit(SDF_LOGD_RUN, &sdp->sd_flags);
24139 +       wake_up_process(sdp->sd_logd_process);
24140 +       up(&sdp->sd_thread_lock);
24141 +       wait_for_completion(&sdp->sd_thread_completion);
24142 +
24143 +      fail_dput:
24144 +       dput(sb->s_root);
24145 +
24146 +      fail_li_free:
24147 +       gfs_inode_put(sdp->sd_linode);
24148 +
24149 +      fail_qi_free:
24150 +       gfs_inode_put(sdp->sd_qinode);
24151 +
24152 +      fail_root_free:
24153 +       gfs_inode_put(sdp->sd_rooti);
24154 +
24155 +      fail_ri_free:
24156 +       gfs_inode_put(sdp->sd_riinode);
24157 +       gfs_clear_rgrpd(sdp);
24158 +
24159 +      fail_recoverd:
24160 +       down(&sdp->sd_thread_lock);
24161 +       clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
24162 +       wake_up_process(sdp->sd_recoverd_process);
24163 +       up(&sdp->sd_thread_lock);
24164 +       wait_for_completion(&sdp->sd_thread_completion);
24165 +
24166 +      fail_recover_dump:
24167 +       clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
24168 +       gfs_unlinked_cleanup(sdp);
24169 +       gfs_quota_cleanup(sdp);
24170 +
24171 +      fail_gunlock_journal:
24172 +       gfs_glock_dq_uninit(&sdp->sd_journal_gh);
24173 +
24174 +      fail_gunlock_ji:
24175 +       if (jindex)
24176 +               gfs_glock_dq_uninit(&ji_gh);
24177 +
24178 +      fail_trans_gl:
24179 +       gfs_glock_put(sdp->sd_trans_gl);
24180 +
24181 +      fail_ji_free:
24182 +       gfs_inode_put(sdp->sd_jiinode);
24183 +       gfs_clear_journals(sdp);
24184 +
24185 +      fail_gunlock_sb:
24186 +       if (super)
24187 +               gfs_glock_dq_uninit(&sb_gh);
24188 +
24189 +      fail_gunlock_live:
24190 +       gfs_glock_dq_uninit(&sdp->sd_live_gh);
24191 +
24192 +      fail_gunlock_mount:
24193 +       gfs_glock_dq_uninit(&mount_gh);
24194 +
24195 +      fail_glockd:
24196 +       clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
24197 +       wake_up(&sdp->sd_reclaim_wchan);
24198 +       while (sdp->sd_glockd_num--)
24199 +               wait_for_completion(&sdp->sd_thread_completion);
24200 +
24201 +       down(&sdp->sd_thread_lock);
24202 +       clear_bit(SDF_SCAND_RUN, &sdp->sd_flags);
24203 +       wake_up_process(sdp->sd_scand_process);
24204 +       up(&sdp->sd_thread_lock);
24205 +       wait_for_completion(&sdp->sd_thread_completion);
24206 +
24207 +      fail_lockproto:
24208 +       gfs_gl_hash_clear(sdp, TRUE);
24209 +       gfs_unmount_lockproto(sdp);
24210 +       gfs_clear_dirty_j(sdp);
24211 +       while (invalidate_inodes(sb))
24212 +               yield();
24213 +
24214 +      fail_vfree:
24215 +       vfree(sdp);
24216 +
24217 +      fail:
24218 +       vfs2sdp(sb) = NULL;
24219 +       return error;
24220 +}
24221 +
24222 +/**
24223 + * gfs_get_sb -
24224 + * @fs_type:
24225 + * @flags:
24226 + * @dev_name:
24227 + * @data:
24228 + *
24229 + * Returns: the new superblock
24230 + */
24231 +
24232 +struct super_block *gfs_get_sb(struct file_system_type *fs_type, int flags,
24233 +                              const char *dev_name, void *data)
24234 +{
24235 +       return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
24236 +}
24237 +
24238 +struct file_system_type gfs_fs_type = {
24239 +       .name = "gfs",
24240 +       .fs_flags = FS_REQUIRES_DEV,
24241 +       .get_sb = gfs_get_sb,
24242 +       .kill_sb = kill_block_super,
24243 +       .owner = THIS_MODULE,
24244 +};
24245 diff -urN linux-orig/fs/gfs/ops_fstype.h linux-patched/fs/gfs/ops_fstype.h
24246 --- linux-orig/fs/gfs/ops_fstype.h      1969-12-31 18:00:00.000000000 -0600
24247 +++ linux-patched/fs/gfs/ops_fstype.h   2004-06-30 13:27:49.353708810 -0500
24248 @@ -0,0 +1,19 @@
24249 +/******************************************************************************
24250 +*******************************************************************************
24251 +**
24252 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
24253 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
24254 +**
24255 +**  This copyrighted material is made available to anyone wishing to use,
24256 +**  modify, copy, or redistribute it subject to the terms and conditions
24257 +**  of the GNU General Public License v.2.
24258 +**
24259 +*******************************************************************************
24260 +******************************************************************************/
24261 +
24262 +#ifndef __OPS_FSTYPE_DOT_H__
24263 +#define __OPS_FSTYPE_DOT_H__
24264 +
24265 +extern struct file_system_type gfs_fs_type;
24266 +
24267 +#endif /* __OPS_FSTYPE_DOT_H__ */
24268 diff -urN linux-orig/fs/gfs/ops_inode.c linux-patched/fs/gfs/ops_inode.c
24269 --- linux-orig/fs/gfs/ops_inode.c       1969-12-31 18:00:00.000000000 -0600
24270 +++ linux-patched/fs/gfs/ops_inode.c    2004-06-30 13:27:49.354708578 -0500
24271 @@ -0,0 +1,1723 @@
24272 +/******************************************************************************
24273 +*******************************************************************************
24274 +**
24275 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
24276 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
24277 +**
24278 +**  This copyrighted material is made available to anyone wishing to use,
24279 +**  modify, copy, or redistribute it subject to the terms and conditions
24280 +**  of the GNU General Public License v.2.
24281 +**
24282 +*******************************************************************************
24283 +******************************************************************************/
24284 +
24285 +#include <linux/sched.h>
24286 +#include <linux/slab.h>
24287 +#include <linux/smp_lock.h>
24288 +#include <linux/spinlock.h>
24289 +#include <asm/semaphore.h>
24290 +#include <linux/completion.h>
24291 +#include <linux/buffer_head.h>
24292 +#include <linux/namei.h>
24293 +#include <linux/utsname.h>
24294 +#include <asm/uaccess.h>
24295 +#include <linux/xattr.h>
24296 +#include <linux/mm.h>
24297 +#include <linux/posix_acl.h>
24298 +
24299 +#include "gfs.h"
24300 +#include "acl.h"
24301 +#include "bmap.h"
24302 +#include "dio.h"
24303 +#include "dir.h"
24304 +#include "eattr.h"
24305 +#include "glock.h"
24306 +#include "inode.h"
24307 +#include "ops_dentry.h"
24308 +#include "ops_inode.h"
24309 +#include "page.h"
24310 +#include "quota.h"
24311 +#include "rgrp.h"
24312 +#include "trans.h"
24313 +#include "unlinked.h"
24314 +
24315 +/**
24316 + * gfs_create - Create a file
24317 + * @dir: The directory in which to create the file
24318 + * @dentry: The dentry of the new file
24319 + * @mode: The mode of the new file
24320 + *
24321 + * Returns: 0 on success, -EXXXX on failure
24322 + */
24323 +
24324 +static int
24325 +gfs_create(struct inode *dir, struct dentry *dentry,
24326 +          int mode, struct nameidata *nd)
24327 +{
24328 +       struct gfs_inode *dip = vn2ip(dir), *ip;
24329 +       struct gfs_sbd *sdp = dip->i_sbd;
24330 +       struct gfs_holder d_gh, i_gh;
24331 +       struct inode *inode;
24332 +       int new = TRUE;
24333 +       int error;
24334 +
24335 +       atomic_inc(&sdp->sd_ops_inode);
24336 +
24337 +       gfs_unlinked_limit(sdp);
24338 +
24339 +       gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24340 +
24341 +       for (;;) {
24342 +               error = gfs_createi(&d_gh, &dentry->d_name,
24343 +                                   GFS_FILE_REG, mode,
24344 +                                   &i_gh);
24345 +               if (!error)
24346 +                       break;
24347 +               else if (error != -EEXIST) {
24348 +                       gfs_holder_uninit(&d_gh);
24349 +                       return error;
24350 +               }
24351 +
24352 +               error = gfs_lookupi(&d_gh, &dentry->d_name,
24353 +                                   FALSE, &i_gh);
24354 +               if (!error) {
24355 +                       if (i_gh.gh_gl) {
24356 +                               new = FALSE;
24357 +                               break;
24358 +                       }
24359 +               } else {
24360 +                       gfs_holder_uninit(&d_gh);
24361 +                       return error;
24362 +               }
24363 +       }
24364 +
24365 +       GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
24366 +       ip = gl2ip(i_gh.gh_gl);
24367 +
24368 +       if (new) {
24369 +               gfs_trans_end(sdp);
24370 +               if (dip->i_alloc->al_rgd)
24371 +                       gfs_inplace_release(dip);
24372 +               gfs_quota_unlock_m(dip);
24373 +               gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
24374 +               gfs_alloc_put(dip);
24375 +
24376 +               ip->i_creat_task = current;
24377 +               ip->i_creat_pid = current->pid;
24378 +       }
24379 +
24380 +       gfs_glock_dq_uninit(&d_gh);
24381 +       gfs_glock_dq_uninit(&i_gh);
24382 +
24383 +       inode = gfs_iget(ip, CREATE);
24384 +       gfs_inode_put(ip);
24385 +
24386 +       if (!inode)
24387 +               return -ENOMEM;
24388 +
24389 +       d_instantiate(dentry, inode);
24390 +       if (new)
24391 +               mark_inode_dirty(inode);
24392 +
24393 +       return 0;
24394 +}
24395 +
24396 +/**
24397 + * lookup_cdpn_sub_at - Maybe lookup a Context Dependent Pathname
24398 + * @sdp: the filesystem
24399 + * @dentry: the original dentry to lookup
24400 + * @new_dentry: the new dentry, if this was a substitutable path.
24401 + *
24402 + */
24403 +
24404 +static void
24405 +lookup_cdpn_sub_at(struct gfs_sbd *sdp, struct dentry *dentry,
24406 +                  struct dentry **new_dentry)
24407 +{
24408 +       struct dentry *parent = dget_parent(dentry);
24409 +       char *buf = gmalloc(2 * __NEW_UTS_LEN + 2);
24410 +
24411 +       if (gfs_filecmp(&dentry->d_name, "@hostname", 9))
24412 +               *new_dentry = lookup_one_len(system_utsname.nodename,
24413 +                                            parent,
24414 +                                            strlen(system_utsname.nodename));
24415 +       else if (gfs_filecmp(&dentry->d_name, "@mach", 5))
24416 +               *new_dentry = lookup_one_len(system_utsname.machine,
24417 +                                            parent,
24418 +                                            strlen(system_utsname.machine));
24419 +       else if (gfs_filecmp(&dentry->d_name, "@os", 3))
24420 +               *new_dentry = lookup_one_len(system_utsname.sysname,
24421 +                                            parent,
24422 +                                            strlen(system_utsname.sysname));
24423 +       else if (gfs_filecmp(&dentry->d_name, "@uid", 4))
24424 +               *new_dentry = lookup_one_len(buf,
24425 +                                            parent,
24426 +                                            sprintf(buf, "%u", current->fsuid));
24427 +       else if (gfs_filecmp(&dentry->d_name, "@gid", 4))
24428 +               *new_dentry = lookup_one_len(buf,
24429 +                                            parent,
24430 +                                            sprintf(buf, "%u", current->fsgid));
24431 +       else if (gfs_filecmp(&dentry->d_name, "@sys", 4))
24432 +               *new_dentry = lookup_one_len(buf,
24433 +                                            parent,
24434 +                                            sprintf(buf, "%s_%s",
24435 +                                                    system_utsname.machine,
24436 +                                                    system_utsname.sysname));
24437 +       else if (gfs_filecmp(&dentry->d_name, "@jid", 4))
24438 +               *new_dentry = lookup_one_len(buf,
24439 +                                            parent,
24440 +                                            sprintf(buf, "%u",
24441 +                                                    sdp->sd_lockstruct.ls_jid));
24442 +
24443 +       kfree(buf);
24444 +       dput(parent);
24445 +}
24446 +
24447 +/**
24448 + * lookup_cdpn_sub_brace - Maybe lookup a Context Dependent Pathname
24449 + * @sdp: the filesystem
24450 + * @dentry: the original dentry to lookup
24451 + * @new_dentry: the new dentry, if this was a substitutable path.
24452 + *
24453 + */
24454 +
24455 +static void
24456 +lookup_cdpn_sub_brace(struct gfs_sbd *sdp, struct dentry *dentry,
24457 +                  struct dentry **new_dentry)
24458 +{
24459 +       struct dentry *parent = dget_parent(dentry);
24460 +       char *buf = gmalloc(2 * __NEW_UTS_LEN + 2);
24461 +
24462 +       if (gfs_filecmp(&dentry->d_name, "{hostname}", 10))
24463 +               *new_dentry = lookup_one_len(system_utsname.nodename,
24464 +                                            parent,
24465 +                                            strlen(system_utsname.nodename));
24466 +       else if (gfs_filecmp(&dentry->d_name, "{mach}", 6))
24467 +               *new_dentry = lookup_one_len(system_utsname.machine,
24468 +                                            parent,
24469 +                                            strlen(system_utsname.machine));
24470 +       else if (gfs_filecmp(&dentry->d_name, "{os}", 4))
24471 +               *new_dentry = lookup_one_len(system_utsname.sysname,
24472 +                                            parent,
24473 +                                            strlen(system_utsname.sysname));
24474 +       else if (gfs_filecmp(&dentry->d_name, "{uid}", 5))
24475 +               *new_dentry = lookup_one_len(buf,
24476 +                                            parent,
24477 +                                            sprintf(buf, "%u", current->fsuid));
24478 +       else if (gfs_filecmp(&dentry->d_name, "{gid}", 5))
24479 +               *new_dentry = lookup_one_len(buf,
24480 +                                            parent,
24481 +                                            sprintf(buf, "%u", current->fsgid));
24482 +       else if (gfs_filecmp(&dentry->d_name, "{sys}", 5))
24483 +               *new_dentry = lookup_one_len(buf,
24484 +                                            parent,
24485 +                                            sprintf(buf, "%s_%s",
24486 +                                                    system_utsname.machine,
24487 +                                                    system_utsname.sysname));
24488 +       else if (gfs_filecmp(&dentry->d_name, "{jid}", 5))
24489 +               *new_dentry = lookup_one_len(buf,
24490 +                                            parent,
24491 +                                            sprintf(buf, "%u",
24492 +                                                    sdp->sd_lockstruct.ls_jid));
24493 +
24494 +       kfree(buf);
24495 +       dput(parent);
24496 +}
24497 +
24498 +/**
24499 + * gfs_lookup - Look up a filename in a directory and return its inode
24500 + * @dir: The directory inode
24501 + * @dentry: The dentry of the new inode
24502 + *
24503 + * Called by the VFS layer. Lock dir and call gfs_lookupi()
24504 + *
24505 + * Returns: 0 on success, -EXXXX on failure
24506 + */
24507 +
24508 +static struct dentry *
24509 +gfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
24510 +{
24511 +       struct gfs_inode *dip = vn2ip(dir), *ip;
24512 +       struct gfs_holder d_gh, i_gh;
24513 +       struct inode *inode = NULL;
24514 +       int error;
24515 +
24516 +       atomic_inc(&dip->i_sbd->sd_ops_inode);
24517 +
24518 +       /*  Do Context Dependent Path Name expansion  */
24519 +
24520 +       if (*dentry->d_name.name == '@' && dentry->d_name.len > 1) {
24521 +               struct dentry *new_dentry = NULL;
24522 +               lookup_cdpn_sub_at(dip->i_sbd, dentry, &new_dentry);
24523 +               if (new_dentry)
24524 +                       return new_dentry;
24525 +       } else if (*dentry->d_name.name == '{' && dentry->d_name.len > 2) {
24526 +               struct dentry *new_dentry = NULL;
24527 +               lookup_cdpn_sub_brace(dip->i_sbd, dentry, &new_dentry);
24528 +               if (new_dentry)
24529 +                       return new_dentry;
24530 +       }
24531 +
24532 +       dentry->d_op = &gfs_dops;
24533 +
24534 +       gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24535 +
24536 +       error = gfs_lookupi(&d_gh, &dentry->d_name, FALSE, &i_gh);
24537 +       if (error) {
24538 +               gfs_holder_uninit(&d_gh);
24539 +               return ERR_PTR(error);
24540 +       }
24541 +
24542 +       if (i_gh.gh_gl) {
24543 +               ip = gl2ip(i_gh.gh_gl);
24544 +
24545 +               gfs_glock_dq_uninit(&d_gh);
24546 +               gfs_glock_dq_uninit(&i_gh);
24547 +
24548 +               inode = gfs_iget(ip, CREATE);
24549 +               gfs_inode_put(ip);
24550 +
24551 +               if (!inode)
24552 +                       return ERR_PTR(-ENOMEM);
24553 +       } else
24554 +               gfs_holder_uninit(&d_gh);
24555 +
24556 +       if (inode)
24557 +               return d_splice_alias(inode, dentry);
24558 +       d_add(dentry, inode);
24559 +       return NULL;
24560 +}
24561 +
24562 +/**
24563 + * gfs_link - Link to a file
24564 + * @old_dentry: The inode to link
24565 + * @dir: Add link to this directory
24566 + * @dentry: The name of the link
24567 + *
24568 + * Link the inode in "old_dentry" into the directory "dir" with the
24569 + * name in "dentry".
24570 + *
24571 + * Returns: 0 on success, -EXXXX on failure
24572 + */
24573 +
24574 +static int
24575 +gfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
24576 +{
24577 +       struct gfs_inode *dip = vn2ip(dir);
24578 +       struct gfs_sbd *sdp = dip->i_sbd;
24579 +       struct inode *inode = old_dentry->d_inode;
24580 +       struct gfs_inode *ip = vn2ip(inode);
24581 +       struct gfs_alloc *al = NULL;
24582 +       struct gfs_holder ghs[2];
24583 +       int alloc_required;
24584 +       int error;
24585 +
24586 +       atomic_inc(&sdp->sd_ops_inode);
24587 +
24588 +       if (ip->i_di.di_type == GFS_FILE_DIR)
24589 +               return -EPERM;
24590 +
24591 +       gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
24592 +       gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
24593 +
24594 +       error = gfs_glock_nq_m(2, ghs);
24595 +       if (error)
24596 +               goto fail;
24597 +
24598 +       error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
24599 +       if (error)
24600 +               goto fail_gunlock;
24601 +
24602 +       error = gfs_dir_search(dip, &dentry->d_name, NULL, NULL);
24603 +       switch (error) {
24604 +       case -ENOENT:
24605 +               break;
24606 +       case 0:
24607 +               error = -EEXIST;
24608 +       default:
24609 +               goto fail_gunlock;
24610 +       }
24611 +
24612 +       if (!dip->i_di.di_nlink) {
24613 +               error = -EINVAL;
24614 +               goto fail_gunlock;
24615 +       }
24616 +       if (dip->i_di.di_entries == (uint32_t)-1) {
24617 +               error = -EFBIG;
24618 +               goto fail_gunlock;
24619 +       }
24620 +       if (!ip->i_di.di_nlink) {
24621 +               error = -EINVAL;
24622 +               goto fail_gunlock;
24623 +       }
24624 +       if (ip->i_di.di_nlink == (uint32_t)-1) {
24625 +               error = -EMLINK;
24626 +               goto fail_gunlock;
24627 +       }
24628 +
24629 +       error = gfs_diradd_alloc_required(dip, &dentry->d_name, &alloc_required);
24630 +       if (error)
24631 +               goto fail_gunlock;
24632 +
24633 +       if (alloc_required) {
24634 +               al = gfs_alloc_get(dip);
24635 +
24636 +               error = gfs_quota_lock_m(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
24637 +               if (error)
24638 +                       goto fail_alloc;
24639 +
24640 +               error = gfs_quota_check(dip, dip->i_di.di_uid, dip->i_di.di_gid);
24641 +               if (error)
24642 +                       goto fail_gunlock_q;
24643 +
24644 +               al->al_requested_meta = sdp->sd_max_dirres;
24645 +
24646 +               error = gfs_inplace_reserve(dip);
24647 +               if (error)
24648 +                       goto fail_gunlock_q;
24649 +
24650 +               /* Trans may require:
24651 +                  two dinode blocks, directory modifications to add an entry,
24652 +                  RG bitmap blocks to allocate from, and quota change */
24653 +
24654 +               error = gfs_trans_begin(sdp,
24655 +                                       2 + sdp->sd_max_dirres +
24656 +                                       al->al_rgd->rd_ri.ri_length,
24657 +                                       1);
24658 +               if (error)
24659 +                       goto fail_ipres;
24660 +       } else {
24661 +               /*  Trans may require:
24662 +                   Two dinode blocks and a leaf block.  */
24663 +
24664 +               error = gfs_trans_begin(sdp, 3, 0);
24665 +               if (error)
24666 +                       goto fail_ipres;
24667 +       }
24668 +
24669 +       error = gfs_dir_add(dip, &dentry->d_name, &ip->i_num, ip->i_di.di_type);
24670 +       if (error)
24671 +               goto fail_end_trans;
24672 +
24673 +       error = gfs_change_nlink(ip, +1);
24674 +       if (error)
24675 +               goto fail_end_trans;
24676 +
24677 +       gfs_trans_end(sdp);
24678 +
24679 +       if (alloc_required) {
24680 +               GFS_ASSERT_INODE(al->al_alloced_meta, dip,);
24681 +               gfs_inplace_release(dip);
24682 +               gfs_quota_unlock_m(dip);
24683 +               gfs_alloc_put(dip);
24684 +       }
24685 +
24686 +       gfs_glock_dq_m(2, ghs);
24687 +
24688 +       gfs_holder_uninit(&ghs[0]);
24689 +       gfs_holder_uninit(&ghs[1]);
24690 +
24691 +       atomic_inc(&inode->i_count);
24692 +
24693 +       d_instantiate(dentry, inode);
24694 +       mark_inode_dirty(inode);
24695 +
24696 +       return 0;
24697 +
24698 + fail_end_trans:
24699 +       gfs_trans_end(sdp);
24700 +
24701 + fail_ipres:
24702 +       if (alloc_required)
24703 +               gfs_inplace_release(dip);
24704 +
24705 + fail_gunlock_q:
24706 +       if (alloc_required)
24707 +               gfs_quota_unlock_m(dip);
24708 +
24709 + fail_alloc:
24710 +       if (alloc_required)
24711 +               gfs_alloc_put(dip);
24712 +
24713 + fail_gunlock:
24714 +       gfs_glock_dq_m(2, ghs);
24715 +
24716 + fail:
24717 +       gfs_holder_uninit(&ghs[0]);
24718 +       gfs_holder_uninit(&ghs[1]);
24719 +
24720 +       return error;
24721 +}
24722 +
24723 +/**
24724 + * gfs_unlink - Unlink a file
24725 + * @dir: The inode of the directory containing the file to unlink
24726 + * @dentry: The file itself
24727 + *
24728 + * Unlink a file.  Call gfs_unlinki()
24729 + *
24730 + * Returns: 0 on success, -EXXXX on failure
24731 + */
24732 +
24733 +static int
24734 +gfs_unlink(struct inode *dir, struct dentry *dentry)
24735 +{
24736 +       struct gfs_inode *dip = vn2ip(dir);
24737 +       struct gfs_sbd *sdp = dip->i_sbd;
24738 +       struct gfs_inode *ip = vn2ip(dentry->d_inode);
24739 +       struct gfs_holder ghs[2];
24740 +       int error;
24741 +
24742 +       atomic_inc(&sdp->sd_ops_inode);
24743 +
24744 +       gfs_unlinked_limit(sdp);
24745 +
24746 +       gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
24747 +       gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
24748 +
24749 +       error = gfs_glock_nq_m(2, ghs);
24750 +       if (error)
24751 +               goto fail;
24752 +
24753 +       error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
24754 +       if (error)
24755 +               goto fail_gunlock;
24756 +
24757 +       if ((dip->i_di.di_mode & S_ISVTX) &&
24758 +           dip->i_di.di_uid != current->fsuid &&
24759 +           ip->i_di.di_uid != current->fsuid &&
24760 +           !capable(CAP_FOWNER)) {
24761 +               error = -EPERM;
24762 +               goto fail_gunlock;
24763 +       }
24764 +
24765 +       error = gfs_revalidate(dip, &dentry->d_name, ip);
24766 +       if (error)
24767 +               goto fail_gunlock;
24768 +
24769 +       /*  Trans may require:
24770 +           Two dinode blocks and one modified directory leaf block
24771 +           and one unlinked tag.  */
24772 +
24773 +       error = gfs_trans_begin(sdp, 3, 1);
24774 +       if (error)
24775 +               goto fail_gunlock;
24776 +
24777 +       error = gfs_unlinki(dip, &dentry->d_name, ip);
24778 +       if (error)
24779 +               goto fail_end_trans;
24780 +
24781 +       gfs_trans_end(sdp);
24782 +
24783 +       gfs_glock_dq_m(2, ghs);
24784 +
24785 +       gfs_holder_uninit(&ghs[0]);
24786 +       gfs_holder_uninit(&ghs[1]);
24787 +
24788 +       return 0;
24789 +
24790 + fail_end_trans:
24791 +       gfs_trans_end(sdp);
24792 +
24793 + fail_gunlock:
24794 +       gfs_glock_dq_m(2, ghs);
24795 +
24796 + fail:
24797 +       gfs_holder_uninit(&ghs[0]);
24798 +       gfs_holder_uninit(&ghs[1]);
24799 +
24800 +       return error;
24801 +}
24802 +
24803 +/**
24804 + * gfs_symlink - Create a symlink
24805 + * @dir: The directory to create the symlink in
24806 + * @dentry: The dentry to put the symlink in
24807 + * @symname: The thing which the link points to
24808 + *
24809 + * Returns: 0 on success, -EXXXX on failure
24810 + */
24811 +
24812 +static int
24813 +gfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
24814 +{
24815 +       struct gfs_inode *dip = vn2ip(dir), *ip;
24816 +       struct gfs_sbd *sdp = dip->i_sbd;
24817 +       struct gfs_holder d_gh, i_gh;
24818 +       struct inode *inode;
24819 +       struct buffer_head *dibh;
24820 +       int size;
24821 +       int error;
24822 +
24823 +       atomic_inc(&sdp->sd_ops_inode);
24824 +
24825 +       gfs_unlinked_limit(sdp);
24826 +
24827 +       /* Must be stuffed with a null terminator for gfs_follow_link() */
24828 +       size = strlen(symname);
24829 +       if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode) - 1)
24830 +               return -ENAMETOOLONG;
24831 +
24832 +       gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24833 +
24834 +       error = gfs_createi(&d_gh, &dentry->d_name,
24835 +                           GFS_FILE_LNK, 0777,
24836 +                           &i_gh);
24837 +       if (error) {
24838 +               gfs_holder_uninit(&d_gh);
24839 +               return error;
24840 +       }
24841 +
24842 +       GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
24843 +       ip = gl2ip(i_gh.gh_gl);
24844 +
24845 +       ip->i_di.di_size = size;
24846 +
24847 +       error = gfs_get_inode_buffer(ip, &dibh);
24848 +       GFS_ASSERT_INODE(!error, ip,);
24849 +
24850 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
24851 +       memcpy(dibh->b_data + sizeof(struct gfs_dinode), symname, size);
24852 +
24853 +       brelse(dibh);
24854 +
24855 +       gfs_trans_end(sdp);
24856 +       if (dip->i_alloc->al_rgd)
24857 +               gfs_inplace_release(dip);
24858 +       gfs_quota_unlock_m(dip);
24859 +       gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
24860 +       gfs_alloc_put(dip);
24861 +
24862 +       gfs_glock_dq_uninit(&d_gh);
24863 +       gfs_glock_dq_uninit(&i_gh);
24864 +
24865 +       inode = gfs_iget(ip, CREATE);
24866 +       gfs_inode_put(ip);
24867 +
24868 +       if (!inode)
24869 +               return -ENOMEM;
24870 +
24871 +       d_instantiate(dentry, inode);
24872 +       mark_inode_dirty(inode);
24873 +
24874 +       return 0;
24875 +}
24876 +
24877 +/**
24878 + * gfs_mkdir - Make a directory
24879 + * @dir: The parent directory of the new one
24880 + * @dentry: The dentry of the new directory
24881 + * @mode: The mode of the new directory
24882 + *
24883 + * Returns: 0 on success, -EXXXX on failure
24884 + */
24885 +
24886 +static int
24887 +gfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
24888 +{
24889 +       struct gfs_inode *dip = vn2ip(dir), *ip;
24890 +       struct gfs_sbd *sdp = dip->i_sbd;
24891 +       struct gfs_holder d_gh, i_gh;
24892 +       struct inode *inode;
24893 +       struct buffer_head *dibh;
24894 +       struct gfs_dinode *di;
24895 +       struct gfs_dirent *dent;
24896 +       int error;
24897 +
24898 +       atomic_inc(&sdp->sd_ops_inode);
24899 +
24900 +       gfs_unlinked_limit(sdp);
24901 +
24902 +       gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24903 +
24904 +       error = gfs_createi(&d_gh, &dentry->d_name,
24905 +                           GFS_FILE_DIR, mode,
24906 +                           &i_gh);
24907 +       if (error) {
24908 +               gfs_holder_uninit(&d_gh);
24909 +               return error;
24910 +       }
24911 +
24912 +       GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
24913 +       ip = gl2ip(i_gh.gh_gl);
24914 +
24915 +       ip->i_di.di_nlink = 2;
24916 +       ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
24917 +       ip->i_di.di_flags |= GFS_DIF_JDATA;
24918 +       ip->i_di.di_payload_format = GFS_FORMAT_DE;
24919 +       ip->i_di.di_entries = 2;
24920 +
24921 +       error = gfs_get_inode_buffer(ip, &dibh);
24922 +       GFS_ASSERT_INODE(!error, ip,);
24923 +
24924 +       di = (struct gfs_dinode *)dibh->b_data;
24925 +
24926 +       error = gfs_dirent_alloc(ip, dibh, 1, &dent);
24927 +       GFS_ASSERT_INODE(!error, ip,); /* This should never fail */
24928 +
24929 +       dent->de_inum = di->di_num; /* already GFS endian */
24930 +       dent->de_hash = gfs_dir_hash(".", 1);
24931 +       dent->de_hash = cpu_to_gfs32(dent->de_hash);
24932 +       dent->de_type = cpu_to_gfs16(GFS_FILE_DIR);
24933 +       memcpy((char *) (dent + 1), ".", 1);
24934 +       di->di_entries = cpu_to_gfs32(1);
24935 +
24936 +       error = gfs_dirent_alloc(ip, dibh, 2, &dent);
24937 +       GFS_ASSERT_INODE(!error, ip,);  /*  This should never fail  */
24938 +
24939 +       gfs_inum_out(&dip->i_num, (char *) &dent->de_inum);
24940 +       dent->de_hash = gfs_dir_hash("..", 2);
24941 +       dent->de_hash = cpu_to_gfs32(dent->de_hash);
24942 +       dent->de_type = cpu_to_gfs16(GFS_FILE_DIR);
24943 +       memcpy((char *) (dent + 1), "..", 2);
24944 +
24945 +       gfs_dinode_out(&ip->i_di, (char *)di);
24946 +
24947 +       brelse(dibh);
24948 +
24949 +       error = gfs_change_nlink(dip, +1);
24950 +       GFS_ASSERT_INODE(!error, dip,); /* dip already pinned */
24951 +
24952 +       gfs_trans_end(sdp);
24953 +       if (dip->i_alloc->al_rgd)
24954 +               gfs_inplace_release(dip);
24955 +       gfs_quota_unlock_m(dip);
24956 +       gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
24957 +       gfs_alloc_put(dip);
24958 +
24959 +       gfs_glock_dq_uninit(&d_gh);
24960 +       gfs_glock_dq_uninit(&i_gh);
24961 +
24962 +       inode = gfs_iget(ip, CREATE);
24963 +       gfs_inode_put(ip);
24964 +
24965 +       if (!inode)
24966 +               return -ENOMEM;
24967 +
24968 +       d_instantiate(dentry, inode);
24969 +       mark_inode_dirty(inode);
24970 +
24971 +       return 0;
24972 +}
24973 +
24974 +/**
24975 + * gfs_rmdir - Remove a directory
24976 + * @dir: The parent directory of the directory to be removed
24977 + * @dentry: The dentry of the directory to remove
24978 + *
24979 + * Remove a directory. Call gfs_rmdiri()
24980 + *
24981 + * Returns: 0 on success, -EXXXX on failure
24982 + */
24983 +
24984 +static int
24985 +gfs_rmdir(struct inode *dir, struct dentry *dentry)
24986 +{
24987 +       struct gfs_inode *dip = vn2ip(dir);
24988 +       struct gfs_sbd *sdp = dip->i_sbd;
24989 +       struct gfs_inode *ip = vn2ip(dentry->d_inode);
24990 +       struct gfs_holder ghs[2];
24991 +       int error;
24992 +
24993 +       atomic_inc(&sdp->sd_ops_inode);
24994 +
24995 +       gfs_unlinked_limit(sdp);
24996 +
24997 +       gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
24998 +       gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
24999 +
25000 +       error = gfs_glock_nq_m(2, ghs);
25001 +       if (error)
25002 +               goto fail;
25003 +
25004 +       error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
25005 +       if (error)
25006 +               goto fail_gunlock;
25007 +
25008 +       if ((dip->i_di.di_mode & S_ISVTX) &&
25009 +           dip->i_di.di_uid != current->fsuid &&
25010 +           ip->i_di.di_uid != current->fsuid &&
25011 +           !capable(CAP_FOWNER)) {
25012 +               error = -EPERM;
25013 +               goto fail_gunlock;
25014 +       }
25015 +
25016 +       error = gfs_revalidate(dip, &dentry->d_name, ip);
25017 +       if (error)
25018 +               goto fail_gunlock;
25019 +
25020 +       GFS_ASSERT_INODE(ip->i_di.di_entries >= 2, ip,
25021 +                        gfs_dinode_print(&ip->i_di););
25022 +
25023 +       if (ip->i_di.di_entries > 2) {
25024 +               error = -ENOTEMPTY;
25025 +               goto fail_gunlock;
25026 +       }
25027 +
25028 +       /* Trans may require:
25029 +          Two dinode blocks, one directory leaf block containing the
25030 +          entry to be rmdired, two leaf blocks containing . and .. of
25031 +          the directory being rmdired, and one unlinked tag */
25032 +
25033 +       error = gfs_trans_begin(sdp, 5, 1);
25034 +       if (error)
25035 +               goto fail_gunlock;
25036 +
25037 +       error = gfs_rmdiri(dip, &dentry->d_name, ip);
25038 +       if (error)
25039 +               goto fail_end_trans;
25040 +
25041 +       gfs_trans_end(sdp);
25042 +
25043 +       gfs_glock_dq_m(2, ghs);
25044 +
25045 +       gfs_holder_uninit(&ghs[0]);
25046 +       gfs_holder_uninit(&ghs[1]);
25047 +
25048 +       return 0;
25049 +
25050 + fail_end_trans:
25051 +       gfs_trans_end(sdp);
25052 +
25053 + fail_gunlock:
25054 +       gfs_glock_dq_m(2, ghs);
25055 +
25056 + fail:
25057 +       gfs_holder_uninit(&ghs[0]);
25058 +       gfs_holder_uninit(&ghs[1]);
25059 +
25060 +       return error;
25061 +}
25062 +
25063 +/**
25064 + * gfs_mknod - Make a special file
25065 + * @dir: The directory in which the special file will reside
25066 + * @dentry: The dentry of the special file
25067 + * @mode: The mode of the special file
25068 + * @rdev: The device specification of the special file
25069 + *
25070 + */
25071 +
25072 +static int
25073 +gfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
25074 +{
25075 +       struct gfs_inode *dip = vn2ip(dir), *ip;
25076 +       struct gfs_sbd *sdp = dip->i_sbd;
25077 +       struct gfs_holder d_gh, i_gh;
25078 +       struct inode *inode;
25079 +       struct buffer_head *dibh;
25080 +       uint16_t type = 0;
25081 +       uint32_t major = 0, minor = 0;
25082 +       int error;
25083 +
25084 +       atomic_inc(&sdp->sd_ops_inode);
25085 +
25086 +       gfs_unlinked_limit(sdp);
25087 +
25088 +       switch (mode & S_IFMT) {
25089 +       case S_IFBLK:
25090 +               type = GFS_FILE_BLK;
25091 +               major = MAJOR(dev);
25092 +               minor = MINOR(dev);
25093 +               break;
25094 +       case S_IFCHR:
25095 +               type = GFS_FILE_CHR;
25096 +               major = MAJOR(dev);
25097 +               minor = MINOR(dev);
25098 +               break;
25099 +       case S_IFIFO:
25100 +               type = GFS_FILE_FIFO;
25101 +               break;
25102 +       case S_IFSOCK:
25103 +               type = GFS_FILE_SOCK;
25104 +               break;
25105 +       default:
25106 +               GFS_ASSERT_SBD(FALSE, sdp,
25107 +                              printk("mode = %d\n", mode););
25108 +               break;
25109 +       };
25110 +
25111 +       gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
25112 +
25113 +       error = gfs_createi(&d_gh, &dentry->d_name,
25114 +                           type, mode,
25115 +                           &i_gh);
25116 +       if (error) {
25117 +               gfs_holder_uninit(&d_gh);
25118 +               return error;
25119 +       }
25120 +
25121 +       GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
25122 +       ip = gl2ip(i_gh.gh_gl);
25123 +
25124 +       ip->i_di.di_major = major;
25125 +       ip->i_di.di_minor = minor;
25126 +
25127 +       error = gfs_get_inode_buffer(ip, &dibh);
25128 +       GFS_ASSERT_INODE(!error, ip,);
25129 +
25130 +       gfs_dinode_out(&ip->i_di, dibh->b_data);
25131 +
25132 +       brelse(dibh);
25133 +
25134 +       gfs_trans_end(sdp);
25135 +       if (dip->i_alloc->al_rgd)
25136 +               gfs_inplace_release(dip);
25137 +       gfs_quota_unlock_m(dip);
25138 +       gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
25139 +       gfs_alloc_put(dip);
25140 +
25141 +       gfs_glock_dq_uninit(&d_gh);
25142 +       gfs_glock_dq_uninit(&i_gh);
25143 +
25144 +       inode = gfs_iget(ip, CREATE);
25145 +       gfs_inode_put(ip);
25146 +
25147 +       if (!inode)
25148 +               return -ENOMEM;
25149 +
25150 +       d_instantiate(dentry, inode);
25151 +       mark_inode_dirty(inode);
25152 +
25153 +       return 0;
25154 +}
25155 +
25156 +/**
25157 + * gfs_rename - Rename a file
25158 + * @odir: Parent directory of old file name
25159 + * @odentry: The old dentry of the file
25160 + * @ndir: Parent directory of new file name
25161 + * @ndentry: The new dentry of the file
25162 + *
25163 + * Returns: 0 on success, -EXXXX on failure
25164 + */
25165 +
25166 +static int
25167 +gfs_rename(struct inode *odir, struct dentry *odentry,
25168 +          struct inode *ndir, struct dentry *ndentry)
25169 +{
25170 +       struct gfs_inode *odip = vn2ip(odir);
25171 +       struct gfs_inode *ndip = vn2ip(ndir);
25172 +       struct gfs_inode *ip = vn2ip(odentry->d_inode);
25173 +       struct gfs_inode *nip = NULL;
25174 +       struct gfs_sbd *sdp = odip->i_sbd;
25175 +       struct qstr name;
25176 +       struct gfs_alloc *al;
25177 +       struct gfs_holder ghs[4], r_gh;
25178 +       unsigned int num_gh;
25179 +       int dir_rename = FALSE;
25180 +       int alloc_required;
25181 +       unsigned int x;
25182 +       int error;
25183 +
25184 +       atomic_inc(&sdp->sd_ops_inode);
25185 +
25186 +       gfs_unlinked_limit(sdp);
25187 +
25188 +       if (ndentry->d_inode) {
25189 +               nip = vn2ip(ndentry->d_inode);
25190 +               if (ip == nip)
25191 +                       return 0;
25192 +       }
25193 +
25194 +       /*  Make sure we aren't trying to move a dirctory into it's subdir  */
25195 +
25196 +       if (ip->i_di.di_type == GFS_FILE_DIR && odip != ndip) {
25197 +               dir_rename = TRUE;
25198 +
25199 +               error = gfs_glock_nq_init(sdp->sd_rename_gl,
25200 +                                         LM_ST_EXCLUSIVE, 0,
25201 +                                         &r_gh);
25202 +               if (error)
25203 +                       return error;
25204 +
25205 +               error = gfs_ok_to_move(ip, ndip);
25206 +               if (error)
25207 +                       goto fail;
25208 +       }
25209 +
25210 +       gfs_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
25211 +       gfs_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
25212 +       num_gh = 2;
25213 +
25214 +       if (nip)
25215 +               gfs_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh++]);
25216 +
25217 +       if (dir_rename)
25218 +               gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh++]);
25219 +
25220 +       error = gfs_glock_nq_m(num_gh, ghs);
25221 +       if (error)
25222 +               goto fail_uninit;
25223 +
25224 +       /*  Check out the old directory  */
25225 +
25226 +       error = permission(odir, MAY_WRITE | MAY_EXEC, NULL);
25227 +       if (error)
25228 +               goto fail_gunlock;
25229 +
25230 +       if ((odip->i_di.di_mode & S_ISVTX) &&
25231 +           odip->i_di.di_uid != current->fsuid &&
25232 +           ip->i_di.di_uid != current->fsuid &&
25233 +           !capable(CAP_FOWNER)) {
25234 +               error = -EPERM;
25235 +               goto fail_gunlock;
25236 +       }
25237 +
25238 +       error = gfs_revalidate(odip, &odentry->d_name, ip);
25239 +       if (error)
25240 +               goto fail_gunlock;
25241 +
25242 +       /*  Check out the new directory  */
25243 +
25244 +       error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
25245 +       if (error)
25246 +               goto fail_gunlock;
25247 +
25248 +       if (nip) {
25249 +               if ((ndip->i_di.di_mode & S_ISVTX) &&
25250 +                   ndip->i_di.di_uid != current->fsuid &&
25251 +                   nip->i_di.di_uid != current->fsuid &&
25252 +                   !capable(CAP_FOWNER)) {
25253 +                       error = -EPERM;
25254 +                       goto fail_gunlock;
25255 +               }
25256 +
25257 +               error = gfs_revalidate(ndip, &ndentry->d_name, nip);
25258 +               if (error)
25259 +                       goto fail_gunlock;
25260 +
25261 +               if (nip->i_di.di_type == GFS_FILE_DIR) {
25262 +                       GFS_ASSERT_INODE(nip->i_di.di_entries >= 2, ip,
25263 +                                        gfs_dinode_print(&nip->i_di););
25264 +                       if (nip->i_di.di_entries > 2) {
25265 +                               error = -ENOTEMPTY;
25266 +                               goto fail_gunlock;
25267 +                       }
25268 +               }
25269 +       } else {
25270 +               error = gfs_dir_search(ndip, &ndentry->d_name, NULL, NULL);
25271 +               switch (error) {
25272 +               case -ENOENT:
25273 +                       error = 0;
25274 +                       break;
25275 +               case 0:
25276 +                       error = -EEXIST;
25277 +               default:
25278 +                       goto fail_gunlock;
25279 +               };
25280 +
25281 +               if (odip != ndip) {
25282 +                       if (!ndip->i_di.di_nlink) {
25283 +                               error = -EINVAL;
25284 +                               goto fail_gunlock;
25285 +                       }
25286 +                       if (ndip->i_di.di_entries == (uint32_t)-1) {
25287 +                               error = -EFBIG;
25288 +                               goto fail_gunlock;
25289 +                       }
25290 +                       if (ip->i_di.di_type == GFS_FILE_DIR &&
25291 +                           ndip->i_di.di_nlink == (uint32_t)-1) {
25292 +                               error = -EMLINK;
25293 +                               goto fail_gunlock;
25294 +                       }
25295 +               }
25296 +       }
25297 +
25298 +       error = gfs_diradd_alloc_required(ndip, &ndentry->d_name, &alloc_required);
25299 +       if (error)
25300 +               goto fail_gunlock;
25301 +
25302 +       if (alloc_required) {
25303 +               al = gfs_alloc_get(ndip);
25304 +
25305 +               error = gfs_quota_lock_m(ndip,
25306 +                                           NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
25307 +               if (error)
25308 +                       goto fail_alloc;
25309 +
25310 +               error = gfs_quota_check(ndip, ndip->i_di.di_uid, ndip->i_di.di_gid);
25311 +               if (error)
25312 +                       goto fail_gunlock_q;
25313 +
25314 +               al->al_requested_meta = sdp->sd_max_dirres;
25315 +
25316 +               error = gfs_inplace_reserve(ndip);
25317 +               if (error)
25318 +                       goto fail_gunlock_q;
25319 +
25320 +               /* Trans may require:
25321 +                  Dinodes for the srcdir, srcino, dstdir, dstino.  Blocks for
25322 +                  adding the entry to dstdir.  RG bitmaps for that allocation.
25323 +                  One leaf block in the srcdir for removal of the entry.
25324 +                  One leaf block for changing .. in srcino (if it's a directory).
25325 +                  Two leaf blocks for removing . and .. from dstino (if it exists
25326 +                  and it's a directory), one unlinked tag, and one quota block. */
25327 +
25328 +               error = gfs_trans_begin(sdp,
25329 +                                       8 + sdp->sd_max_dirres +
25330 +                                       al->al_rgd->rd_ri.ri_length,
25331 +                                       2);
25332 +               if (error)
25333 +                       goto fail_ipres;
25334 +       } else {
25335 +               /* Trans may require:
25336 +                  Dinodes for the srcdir, srcino, dstdir, dstino.  One block for
25337 +                  adding the entry to dstdir.
25338 +                  One leaf block in the srcdir for removal of the entry.
25339 +                  One leaf block for changing .. in srcino (if it's a directory).
25340 +                  Two leaf blocks for removing . and .. from dstino (if it exists
25341 +                  and it's a directory), and one unlinked tag. */
25342 +
25343 +               error = gfs_trans_begin(sdp, 9, 1);
25344 +               if (error)
25345 +                       goto fail_ipres;
25346 +       }
25347 +
25348 +       /*  Remove the target file, if it exists  */
25349 +
25350 +       if (nip) {
25351 +               if (nip->i_di.di_type == GFS_FILE_DIR)
25352 +                       error = gfs_rmdiri(ndip, &ndentry->d_name, nip);
25353 +               else
25354 +                       error = gfs_unlinki(ndip, &ndentry->d_name, nip);
25355 +
25356 +               if (error)
25357 +                       goto fail_end_trans;
25358 +       }
25359 +
25360 +       if (dir_rename) {
25361 +               error = gfs_change_nlink(ndip, +1);
25362 +               if (error)
25363 +                       goto fail_end_trans;
25364 +               error = gfs_change_nlink(odip, -1);
25365 +               if (error)
25366 +                       goto fail_end_trans;
25367 +
25368 +               name.len = 2;
25369 +               name.name = "..";
25370 +
25371 +               error = gfs_dir_mvino(ip, &name, &ndip->i_num, GFS_FILE_DIR);
25372 +               if (error)
25373 +                       goto fail_end_trans;
25374 +       }
25375 +
25376 +       error = gfs_dir_del(odip, &odentry->d_name);
25377 +       if (error)
25378 +               goto fail_end_trans;
25379 +
25380 +       error = gfs_dir_add(ndip, &ndentry->d_name, &ip->i_num, ip->i_di.di_type);
25381 +       if (error)
25382 +               goto fail_end_trans;
25383 +
25384 +       if (dir_rename)
25385 +               gfs_trans_add_gl(sdp->sd_rename_gl);
25386 +
25387 +       gfs_trans_end(sdp);
25388 +
25389 +       if (alloc_required) {
25390 +               /*  Don't check al->al_alloced_meta and friends.  */
25391 +               gfs_inplace_release(ndip);
25392 +               gfs_quota_unlock_m(ndip);
25393 +               gfs_alloc_put(ndip);
25394 +       }
25395 +
25396 +       gfs_glock_dq_m(num_gh, ghs);
25397 +
25398 +       for (x = 0; x < num_gh; x++)
25399 +               gfs_holder_uninit(&ghs[x]);
25400 +
25401 +       if (dir_rename)
25402 +               gfs_glock_dq_uninit(&r_gh);
25403 +
25404 +       return 0;
25405 +
25406 + fail_end_trans:
25407 +       gfs_trans_end(sdp);
25408 +
25409 + fail_ipres:
25410 +       if (alloc_required)
25411 +               gfs_inplace_release(ndip);
25412 +
25413 + fail_gunlock_q:
25414 +       if (alloc_required)
25415 +               gfs_quota_unlock_m(ndip);
25416 +
25417 + fail_alloc:
25418 +       if (alloc_required)
25419 +               gfs_alloc_put(ndip);
25420 +
25421 + fail_gunlock:
25422 +       gfs_glock_dq_m(num_gh, ghs);
25423 +
25424 + fail_uninit:
25425 +       for (x = 0; x < num_gh; x++)
25426 +               gfs_holder_uninit(&ghs[x]);
25427 +
25428 + fail:
25429 +       if (dir_rename)
25430 +               gfs_glock_dq_uninit(&r_gh);
25431 +
25432 +       return error;
25433 +}
25434 +
25435 +/**
25436 + * gfs_readlink - Read the value of a symlink
25437 + * @dentry: the symlink
25438 + * @buf: the buffer to read the symlink data into
25439 + * @size: the size of the buffer
25440 + *
25441 + * Returns: 0 on success, -EXXX on failure
25442 + */
25443 +
25444 +static int
25445 +gfs_readlink(struct dentry *dentry, char *user_buf, int user_size)
25446 +{
25447 +       struct gfs_inode *ip = vn2ip(dentry->d_inode);
25448 +       char array[GFS_FAST_NAME_SIZE], *buf = array;
25449 +       unsigned int len = GFS_FAST_NAME_SIZE;
25450 +       int error;
25451 +
25452 +       atomic_inc(&ip->i_sbd->sd_ops_inode);
25453 +
25454 +       error = gfs_readlinki(ip, &buf, &len);
25455 +       if (error)
25456 +               return error;
25457 +
25458 +       GFS_ASSERT_INODE(len, ip,);
25459 +
25460 +       if (user_size > len - 1)
25461 +               user_size = len - 1;
25462 +
25463 +       if (copy_to_user(user_buf, buf, user_size))
25464 +               error = -EFAULT;
25465 +       else
25466 +               error = user_size;
25467 +
25468 +       if (buf != array)
25469 +               kfree(buf);
25470 +
25471 +       return error;
25472 +}
25473 +
25474 +/**
25475 + * gfs_follow_link - Follow a symbolic link
25476 + * @dentry: The dentry of the link
25477 + * @nd: Data that we pass to vfs_follow_link()
25478 + *
25479 + * This can handle symlinks of any size. It is optimised for symlinks
25480 + * under GFS_FAST_NAME_SIZE.
25481 + *
25482 + * Returns: 0 on success or error code
25483 + */
25484 +
25485 +static int
25486 +gfs_follow_link(struct dentry *dentry, struct nameidata *nd)
25487 +{
25488 +       struct gfs_inode *ip = vn2ip(dentry->d_inode);
25489 +       char array[GFS_FAST_NAME_SIZE], *buf = array;
25490 +       unsigned int len = GFS_FAST_NAME_SIZE;
25491 +       int error;
25492 +
25493 +       atomic_inc(&ip->i_sbd->sd_ops_inode);
25494 +
25495 +       error = gfs_readlinki(ip, &buf, &len);
25496 +       if (!error) {
25497 +               error = vfs_follow_link(nd, buf);
25498 +               if (buf != array)
25499 +                       kfree(buf);
25500 +       }
25501 +
25502 +       return error;
25503 +}
25504 +
25505 +/**
25506 + * gfs_permission -
25507 + * @inode:
25508 + * @mask:
25509 + * @nd:
25510 + *
25511 + * Returns: errno
25512 + */
25513 +
25514 +static int
25515 +gfs_permission(struct inode *inode, int mask, struct nameidata *nd)
25516 +{
25517 +       struct gfs_inode *ip = vn2ip(inode);
25518 +       struct gfs_holder i_gh;
25519 +       struct posix_acl *acl;
25520 +       umode_t mode = inode->i_mode;
25521 +       int error;
25522 +
25523 +       atomic_inc(&ip->i_sbd->sd_ops_inode);
25524 +
25525 +       error = gfs_glock_nq_init(ip->i_gl,
25526 +                                 LM_ST_SHARED, LM_FLAG_ANY,
25527 +                                 &i_gh);
25528 +       if (error)
25529 +               return error;
25530 +
25531 +       if (mask & MAY_WRITE) {
25532 +               if (IS_RDONLY(inode) &&
25533 +                   (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
25534 +                       error = -EROFS;
25535 +                       goto out;
25536 +               }
25537 +               if (IS_IMMUTABLE(inode)) {
25538 +                       error = -EACCES;
25539 +                       goto out;
25540 +               }
25541 +       }
25542 +
25543 +       if (capable(CAP_DAC_OVERRIDE))
25544 +               if (!(mask & MAY_EXEC) || (mode & S_IXUGO))
25545 +                       goto out;
25546 +
25547 +       if (capable(CAP_DAC_READ_SEARCH) &&
25548 +           (mask == MAY_READ ||
25549 +            (!(mask & MAY_WRITE) && S_ISDIR(mode))))
25550 +               goto out;
25551 +
25552 +       if (inode->i_uid == current->fsuid) {
25553 +               if ((mask & (mode >> 6)) != mask)
25554 +                       error = -EACCES;
25555 +               goto out;
25556 +       }
25557 +
25558 +       if ((mask & (mode >> 3)) == mask) {
25559 +               error = gfs_getacl(inode, TRUE, &acl);
25560 +               if (acl) {
25561 +                       error = posix_acl_permission(inode, acl, mask);
25562 +                       goto out;
25563 +               } else if (error && error != -ENODATA)
25564 +                       goto out;
25565 +               error = 0;
25566 +               if (in_group_p(inode->i_gid)) {
25567 +                       error = 0;
25568 +                       goto out;
25569 +               }
25570 +       } else if (in_group_p(inode->i_gid)) {
25571 +               error = -EACCES;
25572 +               goto out;
25573 +       }
25574 +
25575 +       if ((mask & mode) == mask)
25576 +               goto out;
25577 +
25578 +       error = -EACCES;
25579 +
25580 + out:
25581 +       gfs_glock_dq_uninit(&i_gh);
25582 +
25583 +       return error;
25584 +}
25585 +
25586 +/**
25587 + * gfs_setattr - Change attributes on an inode
25588 + * @dentry: The dentry which is changing
25589 + * @attr: The structure describing the change
25590 + *
25591 + * The VFS layer wants to change one or more of an inodes attributes.  Write
25592 + * that change out to disk.
25593 + *
25594 + * Returns: 0 on success, -EXXXX on failure
25595 + */
25596 +
25597 +static int
25598 +gfs_setattr(struct dentry *dentry, struct iattr *attr)
25599 +{
25600 +       struct inode *inode = dentry->d_inode;
25601 +       struct gfs_inode *ip = vn2ip(inode);
25602 +       struct gfs_sbd *sdp = ip->i_sbd;
25603 +       struct gfs_holder i_gh;
25604 +       struct gfs_alloc *al;
25605 +       struct buffer_head *dibh;
25606 +       uint32_t ouid, ogid, nuid, ngid;
25607 +       int error = 0;
25608 +
25609 +       atomic_inc(&sdp->sd_ops_inode);
25610 +
25611 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
25612 +       if (error)
25613 +               return error;
25614 +
25615 +       error = inode_change_ok(inode, attr);
25616 +       if (error)
25617 +               goto fail;
25618 +
25619 +       if (attr->ia_valid & ATTR_SIZE) {
25620 +               error = permission(inode, MAY_WRITE, NULL);
25621 +               if (error)
25622 +                       goto fail;
25623 +
25624 +               if (attr->ia_size != ip->i_di.di_size) {
25625 +                       error = vmtruncate(inode, attr->ia_size);
25626 +                       if (error)
25627 +                               goto fail;
25628 +               }
25629 +
25630 +               error = gfs_truncatei(ip, attr->ia_size, gfs_truncator_page);
25631 +               if (error)
25632 +                       goto fail;
25633 +
25634 +               if ((sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) &&
25635 +                   !gfs_is_jdata(ip))
25636 +                       i_gh.gh_flags |= GL_SYNC;
25637 +       }
25638 +
25639 +       else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) {
25640 +               ouid = ip->i_di.di_uid;
25641 +               ogid = ip->i_di.di_gid;
25642 +               nuid = attr->ia_uid;
25643 +               ngid = attr->ia_gid;
25644 +
25645 +               if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
25646 +                       ouid = nuid = NO_QUOTA_CHANGE;
25647 +               if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
25648 +                       ogid = ngid = NO_QUOTA_CHANGE;
25649 +
25650 +               al = gfs_alloc_get(ip);
25651 +
25652 +               error = gfs_quota_lock_m(ip, nuid, ngid);
25653 +               if (error)
25654 +                       goto fail_alloc;
25655 +
25656 +               if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
25657 +                       error = gfs_quota_check(ip, nuid, ngid);
25658 +                       if (error)
25659 +                               goto fail_gunlock_q;
25660 +               }
25661 +
25662 +               /* Trans may require:
25663 +                  one dinode block and one quota change block */
25664 +
25665 +               error = gfs_trans_begin(sdp, 1, 1);
25666 +               if (error)
25667 +                       goto fail_gunlock_q;
25668 +
25669 +               error = gfs_get_inode_buffer(ip, &dibh);
25670 +               if (error)
25671 +                       goto fail_end_trans;
25672 +
25673 +               if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
25674 +                       gfs_trans_add_quota(sdp, -ip->i_di.di_blocks,
25675 +                                           ouid, ogid);
25676 +                       gfs_trans_add_quota(sdp, ip->i_di.di_blocks,
25677 +                                           nuid, ngid);
25678 +               }
25679 +
25680 +               inode_setattr(inode, attr);
25681 +               gfs_inode_attr_out(ip);
25682 +
25683 +               gfs_trans_add_bh(ip->i_gl, dibh);
25684 +               gfs_dinode_out(&ip->i_di, dibh->b_data);
25685 +               brelse(dibh);
25686 +
25687 +               gfs_trans_end(sdp);
25688 +
25689 +               gfs_quota_unlock_m(ip);
25690 +               gfs_alloc_put(ip);
25691 +       }
25692 +
25693 +       else {
25694 +               /* Trans may require:
25695 +                  one dinode block plus changes for acl. */
25696 +
25697 +               error = gfs_trans_begin(sdp,
25698 +                                       1 + GFS_MAX_EA_ACL_BLKS, 0);
25699 +               if (error)
25700 +                       goto fail;
25701 +
25702 +               error = gfs_get_inode_buffer(ip, &dibh);
25703 +               if (!error) {
25704 +                       inode_setattr(inode, attr);
25705 +                       gfs_inode_attr_out(ip);
25706 +
25707 +                       if (attr->ia_valid & ATTR_MODE)
25708 +                               error = gfs_acl_setattr(inode);
25709 +
25710 +                       gfs_trans_add_bh(ip->i_gl, dibh);
25711 +                       gfs_dinode_out(&ip->i_di, dibh->b_data);
25712 +                       brelse(dibh);
25713 +               }
25714 +
25715 +               gfs_trans_end(sdp);
25716 +       }
25717 +
25718 +       gfs_glock_dq_uninit(&i_gh);
25719 +
25720 +       mark_inode_dirty(inode);
25721 +
25722 +       return error;
25723 +
25724 + fail_end_trans:
25725 +       gfs_trans_end(sdp);
25726 +
25727 + fail_gunlock_q:
25728 +       gfs_quota_unlock_m(ip);
25729 +
25730 + fail_alloc:
25731 +       gfs_alloc_put(ip);
25732 +
25733 + fail:
25734 +       gfs_glock_dq_uninit(&i_gh);
25735 +
25736 +       return error;
25737 +}
25738 +
25739 +/**
25740 + * gfs_getattr - Read out an inode's attributes
25741 + * @mnt: ?
25742 + * @dentry: The dentry to stat
25743 + * @stat: The inode's stats
25744 + *
25745 + * Returns: 0 on success, -EXXXX on failure
25746 + */
25747 +
25748 +static int
25749 +gfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
25750 +{
25751 +       struct inode *inode = dentry->d_inode;
25752 +       struct gfs_inode *ip = vn2ip(inode);
25753 +       struct gfs_holder gh;
25754 +       int error;
25755 +
25756 +       atomic_inc(&ip->i_sbd->sd_ops_inode);
25757 +
25758 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
25759 +       if (!error)
25760 +       {
25761 +               generic_fillattr(inode, stat);
25762 +               gfs_glock_dq_uninit(&gh);
25763 +       }
25764 +
25765 +       return error;
25766 +}
25767 +
25768 +/**
25769 + * get_eatype - get the type of the ea, and trucate the type from the name
25770 + * @namep: ea name, possibly with type appended
25771 + *
25772 + * Returns: GFS_EATYPE_XXX
25773 + */
25774 +
25775 +int
25776 +get_eatype(const char *name, char **truncated_name)
25777 +{
25778 +       int type;
25779 +
25780 +       if (strncmp(name, "system.", 7) == 0) {
25781 +               type = GFS_EATYPE_SYS;
25782 +               *truncated_name = strchr(name, '.') + 1;
25783 +       } else if (strncmp(name, "user.", 5) == 0) {
25784 +               type = GFS_EATYPE_USR;
25785 +               *truncated_name = strchr(name, '.') + 1;
25786 +       } else {
25787 +               type = GFS_EATYPE_UNUSED;
25788 +               *truncated_name = NULL;
25789 +       }
25790 +
25791 +       return type;
25792 +}
25793 +
25794 +/**
25795 + * gfs_setxattr - Set (or create or replace) an inode's extended attribute
25796 + * @dentry: inode's dentry
25797 + * @name: name of the extended attribute
25798 + * @data: the value of the extended attribute
25799 + * @size: the size of data
25800 + * @flags: used to specify create or replace actions
25801 + *
25802 + * Returns:  0 on success, -EXXX on error
25803 + */
25804 +
25805 +int
25806 +gfs_setxattr(struct dentry *dentry, const char *name,
25807 +            const void *data, size_t size,
25808 +            int flags)
25809 +{
25810 +       struct inode *inode = dentry->d_inode;
25811 +       struct gfs_inode *ip = vn2ip(inode);
25812 +       struct gfs_sbd *sdp = ip->i_sbd;
25813 +       struct gfs_easet_io req;
25814 +       char *truncated_name;
25815 +       int error = 0;
25816 +
25817 +       atomic_inc(&sdp->sd_ops_inode);
25818 +
25819 +       req.es_type = get_eatype(name, &truncated_name);
25820 +
25821 +       if (req.es_type == GFS_EATYPE_UNUSED)
25822 +               error = -EOPNOTSUPP;
25823 +       else {
25824 +               req.es_data = data;
25825 +               req.es_name = truncated_name;
25826 +               req.es_data_len = size;
25827 +               req.es_name_len = strlen(truncated_name);
25828 +               if (flags & XATTR_CREATE)
25829 +                       req.es_cmd = GFS_EACMD_CREATE;
25830 +               else if (flags & XATTR_REPLACE)
25831 +                       req.es_cmd = GFS_EACMD_REPLACE;
25832 +               else
25833 +                       req.es_cmd = GFS_EACMD_SET;
25834 +               error = gfs_set_eattr(sdp, ip, &req);
25835 +       }
25836 +
25837 +       return error;
25838 +}
25839 +
25840 +/**
25841 + * gfs_getxattr -
25842 + * @dentry:
25843 + * @name:
25844 + * @data:
25845 + * @size:
25846 + *
25847 + * Returns:  0 on success, -EXXX on error
25848 + */
25849 +
25850 +ssize_t
25851 +gfs_getxattr(struct dentry *dentry, const char *name,
25852 +            void *data, size_t size)
25853 +{
25854 +       struct inode *inode = dentry->d_inode;
25855 +       struct gfs_inode *ip = vn2ip(inode);
25856 +       struct gfs_sbd *sdp = ip->i_sbd;
25857 +       struct gfs_eaget_io req;
25858 +       char *truncated_name;
25859 +       int error = 0;
25860 +
25861 +       atomic_inc(&sdp->sd_ops_inode);
25862 +
25863 +       req.eg_type = get_eatype(name, &truncated_name);
25864 +
25865 +       if (req.eg_type == GFS_EATYPE_UNUSED)
25866 +               error = -EOPNOTSUPP;
25867 +       else {
25868 +               req.eg_name = truncated_name;
25869 +               req.eg_name_len = strlen(truncated_name);
25870 +               req.eg_data = data;
25871 +               req.eg_data_len = size;
25872 +               req.eg_len = NULL;
25873 +               error = gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy);
25874 +       }
25875 +
25876 +       return error;
25877 +}
25878 +
25879 +/**
25880 + * gfs_listxattr -
25881 + * @dentry:
25882 + * @buffer:
25883 + * @size:
25884 + *
25885 + * Returns:  0 on success, -EXXX on error
25886 + */
25887 +
25888 +ssize_t
25889 +gfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
25890 +{
25891 +       struct inode *inode = dentry->d_inode;
25892 +       struct gfs_inode *ip = vn2ip(inode);
25893 +       struct gfs_sbd *sdp = ip->i_sbd;
25894 +       struct gfs_eaget_io req;
25895 +
25896 +       atomic_inc(&sdp->sd_ops_inode);
25897 +
25898 +       req.eg_type = 0;
25899 +       req.eg_name = NULL;
25900 +       req.eg_name_len = 0;
25901 +       req.eg_data = buffer;
25902 +       req.eg_data_len = size;
25903 +       req.eg_len = NULL;
25904 +
25905 +       return gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy);
25906 +}
25907 +
25908 +/**
25909 + * gfs_removexattr -
25910 + * @dentry:
25911 + * @name:
25912 + *
25913 + * Returns:  0 on success, -EXXX on error
25914 + */
25915 +
25916 +int
25917 +gfs_removexattr(struct dentry *dentry, const char *name)
25918 +{
25919 +       struct inode *inode = dentry->d_inode;
25920 +       struct gfs_inode *ip = vn2ip(inode);
25921 +       struct gfs_sbd *sdp = ip->i_sbd;
25922 +       struct gfs_easet_io req;
25923 +       char *truncated_name;
25924 +       int error = 0;
25925 +
25926 +       atomic_inc(&sdp->sd_ops_inode);
25927 +
25928 +       req.es_type = get_eatype(name, &truncated_name);
25929 +
25930 +       if (req.es_type == GFS_EATYPE_UNUSED)
25931 +               error = -EOPNOTSUPP;
25932 +       else {
25933 +               req.es_name = truncated_name;
25934 +               req.es_data = NULL;
25935 +               req.es_data_len = 0;
25936 +               req.es_name_len = strlen(truncated_name);
25937 +               req.es_cmd = GFS_EACMD_REMOVE;
25938 +               error = gfs_set_eattr(sdp, ip, &req);
25939 +       }
25940 +
25941 +       return error;
25942 +}
25943 +
25944 +struct inode_operations gfs_file_iops = {
25945 +       .permission = gfs_permission,
25946 +       .setattr = gfs_setattr,
25947 +       .getattr = gfs_getattr,
25948 +       .setxattr = gfs_setxattr,
25949 +       .getxattr = gfs_getxattr,
25950 +       .listxattr = gfs_listxattr,
25951 +       .removexattr = gfs_removexattr,
25952 +};
25953 +
25954 +struct inode_operations gfs_dev_iops = {
25955 +       .permission = gfs_permission,
25956 +       .setattr = gfs_setattr,
25957 +       .getattr = gfs_getattr,
25958 +       .setxattr = gfs_setxattr,
25959 +       .getxattr = gfs_getxattr,
25960 +       .listxattr = gfs_listxattr,
25961 +       .removexattr = gfs_removexattr,
25962 +};
25963 +
25964 +struct inode_operations gfs_dir_iops = {
25965 +       .create = gfs_create,
25966 +       .lookup = gfs_lookup,
25967 +       .link = gfs_link,
25968 +       .unlink = gfs_unlink,
25969 +       .symlink = gfs_symlink,
25970 +       .mkdir = gfs_mkdir,
25971 +       .rmdir = gfs_rmdir,
25972 +       .mknod = gfs_mknod,
25973 +       .rename = gfs_rename,
25974 +       .permission = gfs_permission,
25975 +       .setattr = gfs_setattr,
25976 +       .getattr = gfs_getattr,
25977 +       .setxattr = gfs_setxattr,
25978 +       .getxattr = gfs_getxattr,
25979 +       .listxattr = gfs_listxattr,
25980 +       .removexattr = gfs_removexattr,
25981 +};
25982 +
25983 +struct inode_operations gfs_symlink_iops = {
25984 +       .readlink = gfs_readlink,
25985 +       .follow_link = gfs_follow_link,
25986 +       .permission = gfs_permission,
25987 +       .setattr = gfs_setattr,
25988 +       .getattr = gfs_getattr,
25989 +       .setxattr = gfs_setxattr,
25990 +       .getxattr = gfs_getxattr,
25991 +       .listxattr = gfs_listxattr,
25992 +       .removexattr = gfs_removexattr,
25993 +};
25994 +
25995 diff -urN linux-orig/fs/gfs/ops_inode.h linux-patched/fs/gfs/ops_inode.h
25996 --- linux-orig/fs/gfs/ops_inode.h       1969-12-31 18:00:00.000000000 -0600
25997 +++ linux-patched/fs/gfs/ops_inode.h    2004-06-30 13:27:49.354708578 -0500
25998 @@ -0,0 +1,22 @@
25999 +/******************************************************************************
26000 +*******************************************************************************
26001 +**
26002 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26003 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26004 +**
26005 +**  This copyrighted material is made available to anyone wishing to use,
26006 +**  modify, copy, or redistribute it subject to the terms and conditions
26007 +**  of the GNU General Public License v.2.
26008 +**
26009 +*******************************************************************************
26010 +******************************************************************************/
26011 +
26012 +#ifndef __OPS_INODE_DOT_H__
26013 +#define __OPS_INODE_DOT_H__
26014 +
26015 +extern struct inode_operations gfs_file_iops;
26016 +extern struct inode_operations gfs_dir_iops;
26017 +extern struct inode_operations gfs_symlink_iops;
26018 +extern struct inode_operations gfs_dev_iops;
26019 +
26020 +#endif /* __OPS_INODE_DOT_H__ */
26021 diff -urN linux-orig/fs/gfs/ops_super.c linux-patched/fs/gfs/ops_super.c
26022 --- linux-orig/fs/gfs/ops_super.c       1969-12-31 18:00:00.000000000 -0600
26023 +++ linux-patched/fs/gfs/ops_super.c    2004-06-30 13:27:49.354708578 -0500
26024 @@ -0,0 +1,416 @@
26025 +/******************************************************************************
26026 +*******************************************************************************
26027 +**
26028 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26029 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26030 +**
26031 +**  This copyrighted material is made available to anyone wishing to use,
26032 +**  modify, copy, or redistribute it subject to the terms and conditions
26033 +**  of the GNU General Public License v.2.
26034 +**
26035 +*******************************************************************************
26036 +******************************************************************************/
26037 +
26038 +#include <linux/sched.h>
26039 +#include <linux/slab.h>
26040 +#include <linux/smp_lock.h>
26041 +#include <linux/spinlock.h>
26042 +#include <asm/semaphore.h>
26043 +#include <linux/completion.h>
26044 +#include <linux/buffer_head.h>
26045 +#include <linux/vmalloc.h>
26046 +#include <linux/statfs.h>
26047 +#include <linux/seq_file.h>
26048 +#include <linux/mount.h>
26049 +
26050 +#include "gfs.h"
26051 +#include "dio.h"
26052 +#include "glock.h"
26053 +#include "inode.h"
26054 +#include "locking.h"
26055 +#include "log.h"
26056 +#include "ops_super.h"
26057 +#include "page.h"
26058 +#include "quota.h"
26059 +#include "recovery.h"
26060 +#include "rgrp.h"
26061 +#include "super.h"
26062 +
26063 +/**
26064 + * gfs_write_inode - Make sure the inode is stable on the disk
26065 + * @inode: The inode
26066 + * @sync: synchronous write flag
26067 + *
26068 + */
26069 +
26070 +static void
26071 +gfs_write_inode(struct inode *inode, int sync)
26072 +{
26073 +       struct gfs_inode *ip = vn2ip(inode);
26074 +
26075 +       atomic_inc(&ip->i_sbd->sd_ops_super);
26076 +
26077 +       if (ip && sync && !gfs_in_panic)
26078 +               gfs_log_flush_glock(ip->i_gl);
26079 +}
26080 +
26081 +/**
26082 + * gfs_put_inode - put an inode
26083 + * @inode: The inode
26084 + *
26085 + * If i_nlink is zero, any dirty data for the inode is thrown away.
26086 + * If a process on another machine has the file open, it may need that
26087 + * data.  So, sync it out.
26088 + */
26089 +
26090 +static void
26091 +gfs_put_inode(struct inode *inode)
26092 +{
26093 +       struct gfs_sbd *sdp = vfs2sdp(inode->i_sb);
26094 +       struct gfs_inode *ip = vn2ip(inode);
26095 +
26096 +       atomic_inc(&sdp->sd_ops_super);
26097 +
26098 +       if (ip &&
26099 +           !inode->i_nlink &&
26100 +           S_ISREG(inode->i_mode) &&
26101 +           !sdp->sd_args.ar_localcaching)
26102 +               gfs_sync_page_i(inode, DIO_START | DIO_WAIT);
26103 +}
26104 +
26105 +/**
26106 + * gfs_put_super - Unmount the filesystem
26107 + * @sb: The VFS superblock
26108 + *
26109 + */
26110 +
26111 +static void
26112 +gfs_put_super(struct super_block *sb)
26113 +{
26114 +       struct gfs_sbd *sdp = vfs2sdp(sb);
26115 +       int error;
26116 +
26117 +       atomic_inc(&sdp->sd_ops_super);
26118 +
26119 +       /*  Unfreeze the filesystem, if we need to  */
26120 +
26121 +       down(&sdp->sd_freeze_lock);
26122 +       if (sdp->sd_freeze_count)
26123 +               gfs_glock_dq_uninit(&sdp->sd_freeze_gh);
26124 +       up(&sdp->sd_freeze_lock);
26125 +
26126 +       /*  Kill off the inode thread  */
26127 +       down(&sdp->sd_thread_lock);
26128 +       clear_bit(SDF_INODED_RUN, &sdp->sd_flags);
26129 +       wake_up_process(sdp->sd_inoded_process);
26130 +       up(&sdp->sd_thread_lock);
26131 +       wait_for_completion(&sdp->sd_thread_completion);
26132 +
26133 +       /*  Kill off the quota thread  */
26134 +       down(&sdp->sd_thread_lock);
26135 +       clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
26136 +       wake_up_process(sdp->sd_quotad_process);
26137 +       up(&sdp->sd_thread_lock);
26138 +       wait_for_completion(&sdp->sd_thread_completion);
26139 +
26140 +       /*  Kill off the log thread  */
26141 +       down(&sdp->sd_thread_lock);
26142 +       clear_bit(SDF_LOGD_RUN, &sdp->sd_flags);
26143 +       wake_up_process(sdp->sd_logd_process);
26144 +       up(&sdp->sd_thread_lock);
26145 +       wait_for_completion(&sdp->sd_thread_completion);
26146 +
26147 +       /*  Kill off the recoverd thread  */
26148 +       down(&sdp->sd_thread_lock);
26149 +       clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
26150 +       wake_up_process(sdp->sd_recoverd_process);
26151 +       up(&sdp->sd_thread_lock);
26152 +       wait_for_completion(&sdp->sd_thread_completion);
26153 +
26154 +       /*  Kill off the glockd threads  */
26155 +       clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
26156 +       wake_up(&sdp->sd_reclaim_wchan);
26157 +       while (sdp->sd_glockd_num--)
26158 +               wait_for_completion(&sdp->sd_thread_completion);
26159 +
26160 +       /*  Kill off the scand thread  */
26161 +       down(&sdp->sd_thread_lock);
26162 +       clear_bit(SDF_SCAND_RUN, &sdp->sd_flags);
26163 +       wake_up_process(sdp->sd_scand_process);
26164 +       up(&sdp->sd_thread_lock);
26165 +       wait_for_completion(&sdp->sd_thread_completion);
26166 +
26167 +       if (!test_bit(SDF_ROFS, &sdp->sd_flags)) {
26168 +               gfs_log_flush(sdp);
26169 +               gfs_quota_sync(sdp);
26170 +               gfs_quota_sync(sdp);
26171 +
26172 +               error = gfs_make_fs_ro(sdp);
26173 +               if (error)
26174 +                       gfs_io_error(sdp);
26175 +       }
26176 +
26177 +       /*  At this point, we're through modifying the disk  */
26178 +
26179 +       /*  Release stuff  */
26180 +
26181 +       gfs_inode_put(sdp->sd_riinode);
26182 +       gfs_inode_put(sdp->sd_jiinode);
26183 +       gfs_inode_put(sdp->sd_rooti);
26184 +       gfs_inode_put(sdp->sd_qinode);
26185 +       gfs_inode_put(sdp->sd_linode);
26186 +
26187 +       gfs_glock_put(sdp->sd_trans_gl);
26188 +       gfs_glock_put(sdp->sd_rename_gl);
26189 +
26190 +       gfs_glock_dq_uninit(&sdp->sd_journal_gh);
26191 +
26192 +       gfs_glock_dq_uninit(&sdp->sd_live_gh);
26193 +
26194 +       /*  Get rid of rgrp bitmap structures  */
26195 +       gfs_clear_rgrpd(sdp);
26196 +       gfs_clear_journals(sdp);
26197 +
26198 +       /*  Take apart glock structures and buffer lists  */
26199 +       gfs_gl_hash_clear(sdp, TRUE);
26200 +
26201 +       /*  Unmount the locking protocol  */
26202 +       gfs_unmount_lockproto(sdp);
26203 +
26204 +       /*  At this point, we're through participating in the lockspace  */
26205 +
26206 +       gfs_clear_dirty_j(sdp);
26207 +
26208 +       /*  Get rid of any extra inodes  */
26209 +       while (invalidate_inodes(sb))
26210 +               yield();
26211 +
26212 +       vfree(sdp);
26213 +
26214 +       vfs2sdp(sb) = NULL;
26215 +}
26216 +
26217 +/**
26218 + * gfs_write_super - disk commit all incore transactions
26219 + * @sb: the filesystem
26220 + *
26221 + * This function is called every time sync(2) is called.
26222 + * After this exits, all dirty buffers and synced.
26223 + */
26224 +
26225 +static void
26226 +gfs_write_super(struct super_block *sb)
26227 +{
26228 +       struct gfs_sbd *sdp = vfs2sdp(sb);
26229 +
26230 +       atomic_inc(&sdp->sd_ops_super);
26231 +
26232 +       if (!gfs_in_panic)
26233 +               gfs_log_flush(sdp);
26234 +}
26235 +
26236 +/**
26237 + * gfs_write_super_lockfs - prevent further writes to the filesystem
26238 + * @sb: the VFS structure for the filesystem
26239 + *
26240 + */
26241 +
26242 +static void
26243 +gfs_write_super_lockfs(struct super_block *sb)
26244 +{
26245 +       struct gfs_sbd *sdp = vfs2sdp(sb);
26246 +       int error;
26247 +
26248 +       atomic_inc(&sdp->sd_ops_super);
26249 +
26250 +       for (;;) {
26251 +               error = gfs_freeze_fs(sdp);
26252 +               if (!error)
26253 +                       break;
26254 +
26255 +               switch (error) {
26256 +               case -EBUSY:
26257 +                       printk("GFS: fsid=%s: waiting for recovery before freeze\n",
26258 +                              sdp->sd_fsname);
26259 +                       break;
26260 +
26261 +               default:
26262 +                       printk("GFS: fsid=%s: error freezing FS: %d\n",
26263 +                              sdp->sd_fsname, error);
26264 +                       break;
26265 +               }
26266 +
26267 +               printk("GFS: fsid=%s: retrying...\n", sdp->sd_fsname);
26268 +
26269 +               current->state = TASK_UNINTERRUPTIBLE;
26270 +               schedule_timeout(HZ);
26271 +       }
26272 +}
26273 +
26274 +/**
26275 + * gfs_unlockfs - reallow writes to the filesystem
26276 + * @sb: the VFS structure for the filesystem
26277 + *
26278 + */
26279 +
26280 +static void
26281 +gfs_unlockfs(struct super_block *sb)
26282 +{
26283 +       struct gfs_sbd *sdp = vfs2sdp(sb);
26284 +
26285 +       atomic_inc(&sdp->sd_ops_super);
26286 +
26287 +       gfs_unfreeze_fs(sdp);
26288 +}
26289 +
26290 +/**
26291 + * gfs_statfs - Gather and return stats about the filesystem
26292 + * @sb: The superblock
26293 + * @statfsbuf: The buffer
26294 + *
26295 + * Returns: 0 on success or error code
26296 + */
26297 +
26298 +static int
26299 +gfs_statfs(struct super_block *sb, struct kstatfs *buf)
26300 +{
26301 +       struct gfs_sbd *sdp = vfs2sdp(sb);
26302 +       struct gfs_usage usage;
26303 +       int error;
26304 +
26305 +       atomic_inc(&sdp->sd_ops_super);
26306 +
26307 +       error = gfs_stat_gfs(sdp, &usage, TRUE);
26308 +       if (error)
26309 +               return error;
26310 +
26311 +       memset(buf, 0, sizeof(struct kstatfs));
26312 +
26313 +       buf->f_type = GFS_MAGIC;
26314 +       buf->f_bsize = usage.gu_block_size;
26315 +       buf->f_blocks = usage.gu_total_blocks;
26316 +       buf->f_bfree = usage.gu_free + usage.gu_free_dinode + usage.gu_free_meta;
26317 +       buf->f_bavail = usage.gu_free + usage.gu_free_dinode + usage.gu_free_meta;
26318 +       buf->f_files = usage.gu_used_dinode + usage.gu_free_dinode + usage.gu_free_meta + usage.gu_free;
26319 +       buf->f_ffree = usage.gu_free_dinode + usage.gu_free_meta + usage.gu_free;
26320 +       buf->f_namelen = GFS_FNAMESIZE;
26321 +
26322 +       return 0;
26323 +}
26324 +
26325 +/**
26326 + * gfs_remount_fs - called when the FS is remounted
26327 + * @sb:  the filesystem
26328 + * @flags:  the remount flags
26329 + * @data:  extra data passed in (not used right now)
26330 + *
26331 + * Returns: 0 on success, -EXXX on failure
26332 + */
26333 +
26334 +static int
26335 +gfs_remount_fs(struct super_block *sb, int *flags, char *data)
26336 +{
26337 +       struct gfs_sbd *sdp = vfs2sdp(sb);
26338 +       int error = 0;
26339 +
26340 +       atomic_inc(&sdp->sd_ops_super);
26341 +
26342 +       if (*flags & (MS_NOATIME | MS_NODIRATIME))
26343 +               set_bit(SDF_NOATIME, &sdp->sd_flags);
26344 +       else
26345 +               clear_bit(SDF_NOATIME, &sdp->sd_flags);
26346 +
26347 +       if (*flags & MS_RDONLY) {
26348 +               if (!test_bit(SDF_ROFS, &sdp->sd_flags))
26349 +                       error = gfs_make_fs_ro(sdp);
26350 +       } else if (!(*flags & MS_RDONLY) &&
26351 +                  test_bit(SDF_ROFS, &sdp->sd_flags)) {
26352 +               error = gfs_make_fs_rw(sdp);
26353 +       }
26354 +
26355 +       /*  Don't let the VFS update atimes.  */
26356 +       *flags |= MS_NOATIME | MS_NODIRATIME;
26357 +
26358 +       return error;
26359 +}
26360 +
26361 +/**
26362 + * gfs_clear_inode - Deallocate an inode when VFS is done with it
26363 + * @inode: The VFS inode
26364 + *
26365 + */
26366 +
26367 +static void
26368 +gfs_clear_inode(struct inode *inode)
26369 +{
26370 +       struct gfs_inode *ip = vn2ip(inode);
26371 +
26372 +       atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_super);
26373 +
26374 +       if (ip) {
26375 +               spin_lock(&ip->i_lock);
26376 +               ip->i_vnode = NULL;
26377 +               vn2ip(inode) = NULL;
26378 +               spin_unlock(&ip->i_lock);
26379 +
26380 +               gfs_glock_schedule_for_reclaim(ip->i_gl);
26381 +               gfs_inode_put(ip);
26382 +       }
26383 +}
26384 +
26385 +/**
26386 + * gfs_show_options - Show mount options for /proc/mounts
26387 + * @s: seq_file structure
26388 + * @mnt: vfsmount
26389 + *
26390 + * Returns: 0 on success or error code
26391 + */
26392 +
26393 +static int
26394 +gfs_show_options(struct seq_file *s, struct vfsmount *mnt)
26395 +{
26396 +       struct gfs_sbd *sdp = vfs2sdp(mnt->mnt_sb);
26397 +       struct gfs_args *args = &sdp->sd_args;
26398 +
26399 +       atomic_inc(&sdp->sd_ops_super);
26400 +
26401 +       if (args->ar_lockproto[0]) {
26402 +               seq_printf(s, ",lockproto=");
26403 +               seq_puts(s, args->ar_lockproto);
26404 +       }
26405 +       if (args->ar_locktable[0]) {
26406 +               seq_printf(s, ",locktable=");
26407 +               seq_puts(s, args->ar_locktable);
26408 +       }
26409 +       if (args->ar_hostdata[0]) {
26410 +               seq_printf(s, ",hostdata=");
26411 +               seq_puts(s, args->ar_hostdata);
26412 +       }
26413 +       if (args->ar_ignore_local_fs)
26414 +               seq_printf(s, ",ignore_local_fs");
26415 +       if (args->ar_localflocks)
26416 +               seq_printf(s, ",localflocks");
26417 +       if (args->ar_localcaching)
26418 +               seq_printf(s, ",localcaching");
26419 +       if (args->ar_upgrade)
26420 +               seq_printf(s, ",upgrade");
26421 +       if (args->ar_num_glockd != GFS_GLOCKD_DEFAULT)
26422 +               seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
26423 +       if (args->ar_posixacls)
26424 +               seq_printf(s, ",acl");
26425 +
26426 +       return 0;
26427 +}
26428 +
26429 +struct super_operations gfs_super_ops = {
26430 +       .write_inode = gfs_write_inode,
26431 +       .put_inode = gfs_put_inode,
26432 +       .put_super = gfs_put_super,
26433 +       .write_super = gfs_write_super,
26434 +       .write_super_lockfs = gfs_write_super_lockfs,
26435 +       .unlockfs = gfs_unlockfs,
26436 +       .statfs = gfs_statfs,
26437 +       .remount_fs = gfs_remount_fs,
26438 +       .clear_inode = gfs_clear_inode,
26439 +       .show_options = gfs_show_options,
26440 +};
26441 diff -urN linux-orig/fs/gfs/ops_super.h linux-patched/fs/gfs/ops_super.h
26442 --- linux-orig/fs/gfs/ops_super.h       1969-12-31 18:00:00.000000000 -0600
26443 +++ linux-patched/fs/gfs/ops_super.h    2004-06-30 13:27:49.354708578 -0500
26444 @@ -0,0 +1,19 @@
26445 +/******************************************************************************
26446 +*******************************************************************************
26447 +**
26448 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26449 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26450 +**
26451 +**  This copyrighted material is made available to anyone wishing to use,
26452 +**  modify, copy, or redistribute it subject to the terms and conditions
26453 +**  of the GNU General Public License v.2.
26454 +**
26455 +*******************************************************************************
26456 +******************************************************************************/
26457 +
26458 +#ifndef __OPS_SUPER_DOT_H__
26459 +#define __OPS_SUPER_DOT_H__
26460 +
26461 +extern struct super_operations gfs_super_ops;
26462 +
26463 +#endif /* __OPS_SUPER_DOT_H__ */
26464 diff -urN linux-orig/fs/gfs/ops_vm.c linux-patched/fs/gfs/ops_vm.c
26465 --- linux-orig/fs/gfs/ops_vm.c  1969-12-31 18:00:00.000000000 -0600
26466 +++ linux-patched/fs/gfs/ops_vm.c       2004-06-30 13:27:49.355708346 -0500
26467 @@ -0,0 +1,212 @@
26468 +/******************************************************************************
26469 +*******************************************************************************
26470 +**
26471 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26472 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26473 +**
26474 +**  This copyrighted material is made available to anyone wishing to use,
26475 +**  modify, copy, or redistribute it subject to the terms and conditions
26476 +**  of the GNU General Public License v.2.
26477 +**
26478 +*******************************************************************************
26479 +******************************************************************************/
26480 +
26481 +#include <linux/sched.h>
26482 +#include <linux/slab.h>
26483 +#include <linux/smp_lock.h>
26484 +#include <linux/spinlock.h>
26485 +#include <asm/semaphore.h>
26486 +#include <linux/completion.h>
26487 +#include <linux/buffer_head.h>
26488 +#include <linux/mm.h>
26489 +#include <linux/pagemap.h>
26490 +
26491 +#include "gfs.h"
26492 +#include "bmap.h"
26493 +#include "glock.h"
26494 +#include "inode.h"
26495 +#include "ops_vm.h"
26496 +#include "page.h"
26497 +#include "quota.h"
26498 +#include "rgrp.h"
26499 +#include "trans.h"
26500 +
26501 +/**
26502 + * gfs_private_nopage -
26503 + * @area:
26504 + * @address:
26505 + * @type:
26506 + *
26507 + * Returns: the page
26508 + */
26509 +
26510 +static struct page *
26511 +gfs_private_nopage(struct vm_area_struct *area,
26512 +                  unsigned long address, int *type)
26513 +{
26514 +       struct gfs_inode *ip = vn2ip(area->vm_file->f_mapping->host);
26515 +       struct gfs_holder i_gh;
26516 +       struct page *result;
26517 +       int error;
26518 +
26519 +       atomic_inc(&ip->i_sbd->sd_ops_vm);
26520 +
26521 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
26522 +       if (error)
26523 +               return NULL;
26524 +
26525 +       set_bit(GIF_PAGED, &ip->i_flags);
26526 +
26527 +       result = filemap_nopage(area, address, type);
26528 +
26529 +       gfs_glock_dq_uninit(&i_gh);
26530 +
26531 +       return result;
26532 +}
26533 +
26534 +/**
26535 + * alloc_page_backing -
26536 + * @ip:
26537 + * @index:
26538 + *
26539 + * Returns: errno
26540 + */
26541 +
26542 +static int
26543 +alloc_page_backing(struct gfs_inode *ip, unsigned long index)
26544 +{
26545 +       struct gfs_sbd *sdp = ip->i_sbd;
26546 +       uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
26547 +       unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
26548 +       struct gfs_alloc *al;
26549 +       unsigned int x;
26550 +       int error;
26551 +
26552 +       al = gfs_alloc_get(ip);
26553 +
26554 +       error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
26555 +       if (error)
26556 +               goto out;
26557 +
26558 +       error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
26559 +       if (error)
26560 +               goto out_gunlock_q;
26561 +
26562 +       gfs_write_calc_reserv(ip, PAGE_CACHE_SIZE,
26563 +                             &al->al_requested_data, &al->al_requested_meta);
26564 +
26565 +       error = gfs_inplace_reserve(ip);
26566 +       if (error)
26567 +               goto out_gunlock_q;
26568 +
26569 +       /* Trans may require:
26570 +          a dinode block, RG bitmaps to allocate from,
26571 +          indirect blocks, and a quota block */
26572 +
26573 +       error = gfs_trans_begin(sdp,
26574 +                               1 + al->al_rgd->rd_ri.ri_length +
26575 +                               al->al_requested_meta, 1);
26576 +       if (error)
26577 +               goto out_ipres;
26578 +
26579 +       if (gfs_is_stuffed(ip)) {
26580 +               error = gfs_unstuff_dinode(ip, gfs_unstuffer_page, NULL);
26581 +               if (error)
26582 +                       goto out_trans;
26583 +       }
26584 +
26585 +       for (x = 0; x < blocks; ) {
26586 +               uint64_t dblock;
26587 +               unsigned int extlen;
26588 +               int new = TRUE;
26589 +
26590 +               error = gfs_block_map(ip, lblock, &new, &dblock, &extlen);
26591 +               if (error)
26592 +                       goto out_trans;
26593 +               GFS_ASSERT_INODE(dblock, ip,);
26594 +
26595 +               lblock += extlen;
26596 +               x += extlen;
26597 +       }
26598 +
26599 +       GFS_ASSERT_INODE(al->al_alloced_meta || al->al_alloced_data, ip,);
26600 +
26601 + out_trans:
26602 +       gfs_trans_end(sdp);
26603 +
26604 + out_ipres:
26605 +       gfs_inplace_release(ip);
26606 +
26607 + out_gunlock_q:
26608 +       gfs_quota_unlock_m(ip);
26609 +
26610 + out:
26611 +       gfs_alloc_put(ip);
26612 +
26613 +       return error;
26614 +}
26615 +
26616 +/**
26617 + * gfs_sharewrite_nopage -
26618 + * @area:
26619 + * @address:
26620 + * @type:
26621 + *
26622 + * Returns: the page
26623 + */
26624 +
26625 +static struct page *
26626 +gfs_sharewrite_nopage(struct vm_area_struct *area,
26627 +                     unsigned long address, int *type)
26628 +{
26629 +       struct gfs_inode *ip = vn2ip(area->vm_file->f_mapping->host);
26630 +       struct gfs_holder i_gh;
26631 +       struct page *result = NULL;
26632 +       unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
26633 +       int alloc_required;
26634 +       int error;
26635 +
26636 +       atomic_inc(&ip->i_sbd->sd_ops_vm);
26637 +
26638 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
26639 +       if (error)
26640 +               return NULL;
26641 +
26642 +       if (gfs_is_jdata(ip))
26643 +               goto out;
26644 +
26645 +       set_bit(GIF_PAGED, &ip->i_flags);
26646 +       set_bit(GIF_SW_PAGED, &ip->i_flags);
26647 +
26648 +       error = gfs_write_alloc_required(ip, (uint64_t)index << PAGE_CACHE_SHIFT,
26649 +                                        PAGE_CACHE_SIZE, &alloc_required);
26650 +       if (error)
26651 +               goto out;
26652 +
26653 +       result = filemap_nopage(area, address, type);
26654 +       if (!result || result == NOPAGE_OOM)
26655 +               goto out;
26656 +
26657 +       if (alloc_required) {
26658 +               error = alloc_page_backing(ip, index);
26659 +               if (error) {
26660 +                       page_cache_release(result);
26661 +                       result = NULL;
26662 +               }
26663 +               set_page_dirty(result);
26664 +       }
26665 +
26666 + out:
26667 +       gfs_glock_dq_uninit(&i_gh);
26668 +
26669 +       return result;
26670 +}
26671 +
26672 +struct vm_operations_struct gfs_vm_ops_private = {
26673 +       .nopage = gfs_private_nopage,
26674 +};
26675 +
26676 +struct vm_operations_struct gfs_vm_ops_sharewrite = {
26677 +       .nopage = gfs_sharewrite_nopage,
26678 +};
26679 +
26680 diff -urN linux-orig/fs/gfs/ops_vm.h linux-patched/fs/gfs/ops_vm.h
26681 --- linux-orig/fs/gfs/ops_vm.h  1969-12-31 18:00:00.000000000 -0600
26682 +++ linux-patched/fs/gfs/ops_vm.h       2004-06-30 13:27:49.355708346 -0500
26683 @@ -0,0 +1,20 @@
26684 +/******************************************************************************
26685 +*******************************************************************************
26686 +**
26687 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26688 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26689 +**
26690 +**  This copyrighted material is made available to anyone wishing to use,
26691 +**  modify, copy, or redistribute it subject to the terms and conditions
26692 +**  of the GNU General Public License v.2.
26693 +**
26694 +*******************************************************************************
26695 +******************************************************************************/
26696 +
26697 +#ifndef __OPS_VM_DOT_H__
26698 +#define __OPS_VM_DOT_H__
26699 +
26700 +extern struct vm_operations_struct gfs_vm_ops_private;
26701 +extern struct vm_operations_struct gfs_vm_ops_sharewrite;
26702 +
26703 +#endif /* __OPS_VM_DOT_H__ */
26704 diff -urN linux-orig/fs/gfs/page.c linux-patched/fs/gfs/page.c
26705 --- linux-orig/fs/gfs/page.c    1969-12-31 18:00:00.000000000 -0600
26706 +++ linux-patched/fs/gfs/page.c 2004-06-30 13:27:49.355708346 -0500
26707 @@ -0,0 +1,276 @@
26708 +/******************************************************************************
26709 +*******************************************************************************
26710 +**
26711 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26712 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26713 +**
26714 +**  This copyrighted material is made available to anyone wishing to use,
26715 +**  modify, copy, or redistribute it subject to the terms and conditions
26716 +**  of the GNU General Public License v.2.
26717 +**
26718 +*******************************************************************************
26719 +******************************************************************************/
26720 +
26721 +#include <linux/sched.h>
26722 +#include <linux/slab.h>
26723 +#include <linux/smp_lock.h>
26724 +#include <linux/spinlock.h>
26725 +#include <asm/semaphore.h>
26726 +#include <linux/completion.h>
26727 +#include <linux/buffer_head.h>
26728 +#include <linux/pagemap.h>
26729 +#include <linux/mm.h>
26730 +
26731 +#include "gfs.h"
26732 +#include "bmap.h"
26733 +#include "inode.h"
26734 +#include "page.h"
26735 +
26736 +/**
26737 + * gfs_inval_pte - Sync and invalidate all PTEs associated with a glock
26738 + * @gl: the glock
26739 + *
26740 + */
26741 +
26742 +void
26743 +gfs_inval_pte(struct gfs_glock *gl)
26744 +{
26745 +       struct gfs_inode *ip;
26746 +       struct inode *inode;
26747 +
26748 +       ip = gl2ip(gl);
26749 +       if (!ip ||
26750 +           ip->i_di.di_type != GFS_FILE_REG)
26751 +               return;
26752 +
26753 +       if (!test_bit(GIF_PAGED, &ip->i_flags))
26754 +               return;
26755 +
26756 +       inode = gfs_iget(ip, NO_CREATE);
26757 +       if (inode) {
26758 +               unmap_shared_mapping_range(inode->i_mapping, 0, 0);
26759 +               iput(inode);
26760 +
26761 +               if (test_bit(GIF_SW_PAGED, &ip->i_flags))
26762 +                       set_bit(GLF_DIRTY, &gl->gl_flags);
26763 +       }
26764 +
26765 +       clear_bit(GIF_SW_PAGED, &ip->i_flags);
26766 +}
26767 +
26768 +/**
26769 + * gfs_inval_page - Invalidate all pages associated with a glock
26770 + * @gl: the glock
26771 + *
26772 + */
26773 +
26774 +void
26775 +gfs_inval_page(struct gfs_glock *gl)
26776 +{
26777 +       struct gfs_inode *ip;
26778 +       struct inode *inode;
26779 +
26780 +       ip = gl2ip(gl);
26781 +       if (!ip ||
26782 +           ip->i_di.di_type != GFS_FILE_REG)
26783 +               return;
26784 +
26785 +       inode = gfs_iget(ip, NO_CREATE);
26786 +       if (inode) {
26787 +               struct address_space *mapping = inode->i_mapping;
26788 +
26789 +               truncate_inode_pages(mapping, 0);
26790 +               GFS_ASSERT_INODE(!mapping->nrpages, ip,);
26791 +
26792 +               iput(inode);
26793 +       }
26794 +
26795 +       clear_bit(GIF_PAGED, &ip->i_flags);
26796 +}
26797 +
26798 +/**
26799 + * gfs_sync_page_i - Sync the pages for a struct inode
26800 + * @inode: the inode
26801 + * @flags: DIO_START | DIO_WAIT
26802 + *
26803 + */
26804 +
26805 +void
26806 +gfs_sync_page_i(struct inode *inode, int flags)
26807 +{
26808 +       struct address_space *mapping = inode->i_mapping;
26809 +       int error = 0;
26810 +
26811 +       if (flags & DIO_START)
26812 +               error = filemap_fdatawrite(mapping);
26813 +       if (!error && (flags & DIO_WAIT))
26814 +               filemap_fdatawait(mapping);
26815 +
26816 +       if (error)
26817 +               gfs_io_error_inode(vn2ip(inode));
26818 +}
26819 +
26820 +/**
26821 + * gfs_sync_page - sync the pages associated with a glock
26822 + * @gl: the glock
26823 + * @flags: DIO_START | DIO_WAIT
26824 + *
26825 + */
26826 +
26827 +void
26828 +gfs_sync_page(struct gfs_glock *gl, int flags)
26829 +{
26830 +       struct gfs_inode *ip;
26831 +       struct inode *inode;
26832 +
26833 +       ip = gl2ip(gl);
26834 +       if (!ip ||
26835 +           ip->i_di.di_type != GFS_FILE_REG)
26836 +               return;
26837 +
26838 +       inode = gfs_iget(ip, NO_CREATE);
26839 +       if (inode) {
26840 +               gfs_sync_page_i(inode, flags);
26841 +               iput(inode);
26842 +       }
26843 +}
26844 +
26845 +/**
26846 + * gfs_unstuffer_page - unstuff a stuffed inode into a block cached by a page
26847 + * @ip: the inode
26848 + * @dibh: the dinode buffer
26849 + * @block: the block number that was allocated
26850 + * @private: any locked page held by the caller process
26851 + *
26852 + * Returns: 0 on success, -EXXX on failure
26853 + */
26854 +
26855 +int
26856 +gfs_unstuffer_page(struct gfs_inode *ip, struct buffer_head *dibh,
26857 +                  uint64_t block, void *private)
26858 +{
26859 +       struct inode *inode = ip->i_vnode;
26860 +       struct page *page = (struct page *)private;
26861 +       struct buffer_head *bh;
26862 +       int release = FALSE;
26863 +
26864 +       if (!page || page->index) {
26865 +               RETRY_MALLOC(page = grab_cache_page(inode->i_mapping, 0), page);
26866 +               release = TRUE;
26867 +       }
26868 +
26869 +       GFS_ASSERT_INODE(PageLocked(page), ip,);
26870 +
26871 +       if (!PageUptodate(page)) {
26872 +               void *kaddr = kmap(page);
26873 +
26874 +               memcpy(kaddr,
26875 +                      dibh->b_data + sizeof(struct gfs_dinode),
26876 +                      ip->i_di.di_size);
26877 +               memset(kaddr + ip->i_di.di_size,
26878 +                      0,
26879 +                      PAGE_CACHE_SIZE - ip->i_di.di_size);
26880 +               kunmap(page);
26881 +
26882 +               SetPageUptodate(page);
26883 +       }
26884 +
26885 +       if (!page_has_buffers(page))
26886 +               create_empty_buffers(page, 1 << inode->i_blkbits,
26887 +                                    (1 << BH_Uptodate));
26888 +
26889 +       bh = page_buffers(page);
26890 +
26891 +       if (!buffer_mapped(bh))
26892 +               map_bh(bh, inode->i_sb, block);
26893 +       else
26894 +               GFS_ASSERT_INODE(bh->b_bdev == inode->i_sb->s_bdev &&
26895 +                                bh->b_blocknr == block,
26896 +                                ip,);
26897 +
26898 +       set_buffer_uptodate(bh);
26899 +       mark_buffer_dirty(bh);
26900 +
26901 +       if (release) {
26902 +               unlock_page(page);
26903 +               page_cache_release(page);
26904 +       }
26905 +
26906 +       return 0;
26907 +}
26908 +
26909 +/**
26910 + * gfs_truncator_page - truncate a partial data block in the page cache
26911 + * @ip: the inode
26912 + * @size: the size the file should be
26913 + *
26914 + * Returns: 0 on success, -EXXX on failure
26915 + */
26916 +
26917 +int
26918 +gfs_truncator_page(struct gfs_inode *ip, uint64_t size)
26919 +{
26920 +       struct inode *inode = ip->i_vnode;
26921 +       struct page *page;
26922 +       struct buffer_head *bh;
26923 +       void *kaddr;
26924 +       uint64_t lbn, dbn;
26925 +       unsigned long index;
26926 +       unsigned int offset;
26927 +       unsigned int bufnum;
26928 +       int not_new = 0;
26929 +       int error;
26930 +
26931 +       lbn = size >> inode->i_blkbits;
26932 +       error = gfs_block_map(ip,
26933 +                             lbn, &not_new,
26934 +                             &dbn, NULL);
26935 +       if (error || !dbn)
26936 +               return error;
26937 +
26938 +       index = size >> PAGE_CACHE_SHIFT;
26939 +       offset = size & (PAGE_CACHE_SIZE - 1);
26940 +       bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits));
26941 +
26942 +       /* Not in a transaction here -- a non-disk-I/O error is ok. */
26943 +
26944 +       page = read_cache_page(inode->i_mapping, index,
26945 +                              (filler_t *)inode->i_mapping->a_ops->readpage,
26946 +                              NULL);
26947 +       if (IS_ERR(page))
26948 +               return PTR_ERR(page);
26949 +
26950 +       lock_page(page);
26951 +
26952 +       if (!PageUptodate(page) || PageError(page)) {
26953 +               error = -EIO;
26954 +               goto out;
26955 +       }
26956 +
26957 +       kaddr = kmap(page);
26958 +       memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
26959 +       kunmap(page);
26960 +
26961 +       if (!page_has_buffers(page))
26962 +               create_empty_buffers(page, 1 << inode->i_blkbits,
26963 +                                    (1 << BH_Uptodate));
26964 +
26965 +       for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
26966 +               /* Do nothing */;
26967 +
26968 +       if (!buffer_mapped(bh))
26969 +               map_bh(bh, inode->i_sb, dbn);
26970 +       else
26971 +               GFS_ASSERT_INODE(bh->b_bdev == inode->i_sb->s_bdev &&
26972 +                                bh->b_blocknr == dbn,
26973 +                                ip,);
26974 +
26975 +       set_buffer_uptodate(bh);
26976 +       mark_buffer_dirty(bh);
26977 +
26978 + out:
26979 +       unlock_page(page);
26980 +       page_cache_release(page);
26981 +
26982 +       return error;
26983 +}
26984 diff -urN linux-orig/fs/gfs/page.h linux-patched/fs/gfs/page.h
26985 --- linux-orig/fs/gfs/page.h    1969-12-31 18:00:00.000000000 -0600
26986 +++ linux-patched/fs/gfs/page.h 2004-06-30 13:27:49.355708346 -0500
26987 @@ -0,0 +1,26 @@
26988 +/******************************************************************************
26989 +*******************************************************************************
26990 +**
26991 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
26992 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
26993 +**
26994 +**  This copyrighted material is made available to anyone wishing to use,
26995 +**  modify, copy, or redistribute it subject to the terms and conditions
26996 +**  of the GNU General Public License v.2.
26997 +**
26998 +*******************************************************************************
26999 +******************************************************************************/
27000 +
27001 +#ifndef __PAGE_DOT_H__
27002 +#define __PAGE_DOT_H__
27003 +
27004 +void gfs_inval_pte(struct gfs_glock *gl);
27005 +void gfs_inval_page(struct gfs_glock *gl);
27006 +void gfs_sync_page_i(struct inode *inode, int flags);
27007 +void gfs_sync_page(struct gfs_glock *gl, int flags);
27008 +
27009 +int gfs_unstuffer_page(struct gfs_inode *ip, struct buffer_head *dibh,
27010 +                      uint64_t block, void *private);
27011 +int gfs_truncator_page(struct gfs_inode *ip, uint64_t size);
27012 +
27013 +#endif /* __PAGE_DOT_H__ */
27014 diff -urN linux-orig/fs/gfs/quota.c linux-patched/fs/gfs/quota.c
27015 --- linux-orig/fs/gfs/quota.c   1969-12-31 18:00:00.000000000 -0600
27016 +++ linux-patched/fs/gfs/quota.c        2004-06-30 13:27:49.356708115 -0500
27017 @@ -0,0 +1,1146 @@
27018 +/******************************************************************************
27019 +*******************************************************************************
27020 +**
27021 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
27022 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
27023 +**
27024 +**  This copyrighted material is made available to anyone wishing to use,
27025 +**  modify, copy, or redistribute it subject to the terms and conditions
27026 +**  of the GNU General Public License v.2.
27027 +**
27028 +*******************************************************************************
27029 +******************************************************************************/
27030 +
27031 +#include <linux/sched.h>
27032 +#include <linux/slab.h>
27033 +#include <linux/smp_lock.h>
27034 +#include <linux/spinlock.h>
27035 +#include <asm/semaphore.h>
27036 +#include <linux/completion.h>
27037 +#include <linux/buffer_head.h>
27038 +#include <linux/tty.h>
27039 +#include <asm/uaccess.h>
27040 +
27041 +#include "gfs.h"
27042 +#include "bmap.h"
27043 +#include "file.h"
27044 +#include "glock.h"
27045 +#include "glops.h"
27046 +#include "log.h"
27047 +#include "quota.h"
27048 +#include "rgrp.h"
27049 +#include "super.h"
27050 +#include "trans.h"
27051 +
27052 +/**
27053 + * gfs_quota_get - Get a structure to represent a quota change
27054 + * @sdp: the filesystem
27055 + * @user: TRUE if this is a user quota
27056 + * @id: the uid or gid
27057 + * @create: if TRUE, create the structure, otherwise return NULL
27058 + * @qdp: the returned quota structure
27059 + *
27060 + * Returns: 0 on success, -EXXX on failure
27061 + */
27062 +
27063 +int
27064 +gfs_quota_get(struct gfs_sbd *sdp, int user, uint32_t id, int create,
27065 +             struct gfs_quota_data **qdp)
27066 +{
27067 +       struct gfs_quota_data *qd = NULL, *new_qd = NULL;
27068 +       struct list_head *tmp, *head;
27069 +       int error = 0;
27070 +
27071 +       for (;;) {
27072 +               spin_lock(&sdp->sd_quota_lock);
27073 +
27074 +               for (head = &sdp->sd_quota_list, tmp = head->next;
27075 +                    tmp != head;
27076 +                    tmp = tmp->next) {
27077 +                       qd = list_entry(tmp, struct gfs_quota_data, qd_list);
27078 +                       if (qd->qd_id == id &&
27079 +                           !test_bit(QDF_USER, &qd->qd_flags) == !user) {
27080 +                               qd->qd_count++;
27081 +                               break;
27082 +                       }
27083 +               }
27084 +
27085 +               if (tmp == head)
27086 +                       qd = NULL;
27087 +
27088 +               if (!qd && new_qd) {
27089 +                       qd = new_qd;
27090 +                       list_add(&qd->qd_list, &sdp->sd_quota_list);
27091 +                       new_qd = NULL;
27092 +               }
27093 +
27094 +               spin_unlock(&sdp->sd_quota_lock);
27095 +
27096 +               if (qd || !create) {
27097 +                       if (new_qd) {
27098 +                               gfs_lvb_unhold(new_qd->qd_gl);
27099 +                               kfree(new_qd);
27100 +                               atomic_dec(&sdp->sd_quota_count);
27101 +                       }
27102 +                       goto out;
27103 +               }
27104 +
27105 +               new_qd = gmalloc(sizeof(struct gfs_quota_data));
27106 +               memset(new_qd, 0, sizeof(struct gfs_quota_data));
27107 +
27108 +               new_qd->qd_count = 1;
27109 +
27110 +               new_qd->qd_id = id;
27111 +               if (user)
27112 +                       set_bit(QDF_USER, &new_qd->qd_flags);
27113 +
27114 +               INIT_LIST_HEAD(&new_qd->qd_le_list);
27115 +
27116 +               error = gfs_glock_get(sdp, 2 * (uint64_t)id + ((user) ? 0 : 1),
27117 +                                     &gfs_quota_glops, CREATE,
27118 +                                     &new_qd->qd_gl);
27119 +               if (error) {
27120 +                       kfree(new_qd);
27121 +                       goto out;
27122 +               }
27123 +
27124 +               error = gfs_lvb_hold(new_qd->qd_gl);
27125 +
27126 +               gfs_glock_put(new_qd->qd_gl);
27127 +
27128 +               if (error) {
27129 +                       kfree(new_qd);
27130 +                       goto out;
27131 +               }
27132 +
27133 +               atomic_inc(&sdp->sd_quota_count);
27134 +       }
27135 +
27136 + out:
27137 +       *qdp = qd;
27138 +
27139 +       return error;
27140 +}
27141 +
27142 +/**
27143 + * gfs_quota_hold - increment the usage count on a struct gfs_quota_data
27144 + * @sdp: the filesystem
27145 + * @qd: the structure
27146 + *
27147 + */
27148 +
27149 +void
27150 +gfs_quota_hold(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27151 +{
27152 +       spin_lock(&sdp->sd_quota_lock);
27153 +       qd->qd_count++;
27154 +       spin_unlock(&sdp->sd_quota_lock);
27155 +}
27156 +
27157 +/**
27158 + * gfs_quota_put - decrement the usage count on a struct gfs_quota_data
27159 + * @sdp: the filesystem
27160 + * @qd: the structure
27161 + *
27162 + * Free the structure if its reference count hits zero.
27163 + *
27164 + */
27165 +
27166 +void
27167 +gfs_quota_put(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27168 +{
27169 +       spin_lock(&sdp->sd_quota_lock);
27170 +       GFS_ASSERT_SBD(qd->qd_count, sdp,);
27171 +       qd->qd_count--;
27172 +       spin_unlock(&sdp->sd_quota_lock);
27173 +}
27174 +
27175 +/**
27176 + * quota_find - Find a quota change to sync to the quota file
27177 + * @sdp: the filesystem
27178 + *
27179 + * The returned structure is locked and needs to be unlocked
27180 + * with quota_unlock().
27181 + *
27182 + * Returns: A quota structure, or NULL
27183 + */
27184 +
27185 +static struct gfs_quota_data *
27186 +quota_find(struct gfs_sbd *sdp)
27187 +{
27188 +       struct list_head *tmp, *head;
27189 +       struct gfs_quota_data *qd = NULL;
27190 +
27191 +       if (test_bit(SDF_ROFS, &sdp->sd_flags))
27192 +               return NULL;
27193 +
27194 +       gfs_log_lock(sdp);
27195 +       spin_lock(&sdp->sd_quota_lock);
27196 +
27197 +       if (!atomic_read(&sdp->sd_quota_od_count))
27198 +               goto out;
27199 +
27200 +       for (head = &sdp->sd_quota_list, tmp = head->next;
27201 +            tmp != head;
27202 +            tmp = tmp->next) {
27203 +               qd = list_entry(tmp, struct gfs_quota_data, qd_list);
27204 +
27205 +               if (test_bit(QDF_LOCK, &qd->qd_flags))
27206 +                       continue;
27207 +               if (!test_bit(QDF_OD_LIST, &qd->qd_flags))
27208 +                       continue;
27209 +               if (qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
27210 +                       continue;
27211 +
27212 +               list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
27213 +
27214 +               set_bit(QDF_LOCK, &qd->qd_flags);
27215 +               qd->qd_count++;
27216 +               qd->qd_change_sync = qd->qd_change_od;
27217 +
27218 +               goto out;
27219 +       }
27220 +
27221 +       qd = NULL;
27222 +
27223 + out:
27224 +       spin_unlock(&sdp->sd_quota_lock);
27225 +       gfs_log_unlock(sdp);
27226 +
27227 +       return qd;
27228 +}
27229 +
27230 +/**
27231 + * quota_trylock - Try to lock a given quota entry
27232 + * @sdp: the filesystem
27233 + * @qd: the quota data structure
27234 + *
27235 + * Returns: TRUE if the lock was successful, FALSE, otherwise
27236 + */
27237 +
27238 +static int
27239 +quota_trylock(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27240 +{
27241 +       int ret = FALSE;
27242 +
27243 +       if (test_bit(SDF_ROFS, &sdp->sd_flags))
27244 +               return FALSE;
27245 +
27246 +       gfs_log_lock(sdp);
27247 +       spin_lock(&sdp->sd_quota_lock);
27248 +
27249 +       if (test_bit(QDF_LOCK, &qd->qd_flags))
27250 +               goto out;
27251 +       if (!test_bit(QDF_OD_LIST, &qd->qd_flags))
27252 +               goto out;
27253 +
27254 +       list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
27255 +
27256 +       set_bit(QDF_LOCK, &qd->qd_flags);
27257 +       qd->qd_count++;
27258 +       qd->qd_change_sync = qd->qd_change_od;
27259 +
27260 +       ret = TRUE;
27261 +
27262 + out:
27263 +       spin_unlock(&sdp->sd_quota_lock);
27264 +       gfs_log_unlock(sdp);
27265 +
27266 +       return ret;
27267 +}
27268 +
27269 +/**
27270 + * quota_unlock - drop and a reference on a quota structure
27271 + * @sdp: the filesystem
27272 + * @qd: the quota inode structure
27273 + *
27274 + */
27275 +
27276 +static void
27277 +quota_unlock(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27278 +{
27279 +       spin_lock(&sdp->sd_quota_lock);
27280 +
27281 +       GFS_ASSERT_SBD(test_bit(QDF_LOCK, &qd->qd_flags), sdp,);
27282 +       clear_bit(QDF_LOCK, &qd->qd_flags);
27283 +
27284 +       GFS_ASSERT_SBD(qd->qd_count, sdp,);
27285 +       qd->qd_count--;
27286 +
27287 +       spin_unlock(&sdp->sd_quota_lock);
27288 +}
27289 +
27290 +/**
27291 + * gfs_quota_merge - add/remove a quota change from the in-memory list
27292 + * @sdp: the filesystem
27293 + * @tag: the quota change tag
27294 + *
27295 + * Returns: 0 on success, -EXXX on failure
27296 + */
27297 +
27298 +int
27299 +gfs_quota_merge(struct gfs_sbd *sdp, struct gfs_quota_tag *tag)
27300 +{
27301 +       struct gfs_quota_data *qd;
27302 +       int error;
27303 +
27304 +       error = gfs_quota_get(sdp,
27305 +                             tag->qt_flags & GFS_QTF_USER, tag->qt_id,
27306 +                             CREATE, &qd);
27307 +       if (error)
27308 +               return error;
27309 +
27310 +       GFS_ASSERT_SBD(qd->qd_change_ic == qd->qd_change_od, sdp,);
27311 +
27312 +       gfs_log_lock(sdp);
27313 +
27314 +       qd->qd_change_ic += tag->qt_change;
27315 +       qd->qd_change_od += tag->qt_change;
27316 +
27317 +       if (qd->qd_change_od) {
27318 +               if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) {
27319 +                       gfs_quota_hold(sdp, qd);
27320 +                       set_bit(QDF_OD_LIST, &qd->qd_flags);
27321 +                       atomic_inc(&sdp->sd_quota_od_count);
27322 +               }
27323 +       } else {
27324 +               GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags), sdp,);
27325 +               clear_bit(QDF_OD_LIST, &qd->qd_flags);
27326 +               gfs_quota_put(sdp, qd);
27327 +               GFS_ASSERT_SBD(atomic_read(&sdp->sd_quota_od_count), sdp,);
27328 +               atomic_dec(&sdp->sd_quota_od_count);
27329 +       }
27330 +
27331 +       gfs_log_unlock(sdp);
27332 +
27333 +       gfs_quota_put(sdp, qd);
27334 +
27335 +       return 0;
27336 +}
27337 +
27338 +/**
27339 + * gfs_quota_scan - Look for unused struct gfs_quota_data structures to throw away
27340 + * @sdp: the filesystem
27341 + *
27342 + */
27343 +
27344 +void
27345 +gfs_quota_scan(struct gfs_sbd *sdp)
27346 +{
27347 +       struct list_head *head, *tmp, *next;
27348 +       struct gfs_quota_data *qd;
27349 +       LIST_HEAD(dead);
27350 +
27351 +       spin_lock(&sdp->sd_quota_lock);
27352 +
27353 +       for (head = &sdp->sd_quota_list, tmp = head->next, next = tmp->next;
27354 +            tmp != head;
27355 +            tmp = next, next = next->next) {
27356 +               qd = list_entry(tmp, struct gfs_quota_data, qd_list);
27357 +               if (!qd->qd_count)
27358 +                       list_move(&qd->qd_list, &dead);
27359 +       }
27360 +
27361 +       spin_unlock(&sdp->sd_quota_lock);
27362 +
27363 +       while (!list_empty(&dead)) {
27364 +               qd = list_entry(dead.next, struct gfs_quota_data, qd_list);
27365 +
27366 +               GFS_ASSERT_SBD(!qd->qd_count, sdp,);
27367 +               GFS_ASSERT_SBD(!test_bit(QDF_OD_LIST, &qd->qd_flags) &&
27368 +                              !test_bit(QDF_LOCK, &qd->qd_flags), sdp,);
27369 +               GFS_ASSERT_SBD(!qd->qd_change_new && !qd->qd_change_ic &&
27370 +                              !qd->qd_change_od, sdp,);
27371 +
27372 +               list_del(&qd->qd_list);
27373 +               gfs_lvb_unhold(qd->qd_gl);
27374 +               kfree(qd);
27375 +               atomic_dec(&sdp->sd_quota_count);
27376 +       }
27377 +}
27378 +
27379 +/**
27380 + * gfs_quota_cleanup - get rid of any extra struct gfs_quota_data structures
27381 + * @sdp: the filesystem
27382 + *
27383 + */
27384 +
27385 +void
27386 +gfs_quota_cleanup(struct gfs_sbd *sdp)
27387 +{
27388 +       struct gfs_quota_data *qd;
27389 +
27390 + restart:
27391 +       gfs_log_lock(sdp);
27392 +
27393 +       spin_lock(&sdp->sd_quota_lock);
27394 +
27395 +       while (!list_empty(&sdp->sd_quota_list)) {
27396 +               qd = list_entry(sdp->sd_quota_list.next,
27397 +                               struct gfs_quota_data,
27398 +                               qd_list);
27399 +
27400 +               if (qd->qd_count > 1) {
27401 +                       spin_unlock(&sdp->sd_quota_lock);
27402 +                       gfs_log_unlock(sdp);
27403 +                       current->state = TASK_UNINTERRUPTIBLE;
27404 +                       schedule_timeout(HZ);
27405 +                       goto restart;
27406 +
27407 +               } else if (qd->qd_count) {
27408 +                       GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags) &&
27409 +                                      !test_bit(QDF_LOCK, &qd->qd_flags),
27410 +                                      sdp,);
27411 +                       GFS_ASSERT_SBD(qd->qd_change_od &&
27412 +                                      qd->qd_change_od == qd->qd_change_ic,
27413 +                                      sdp,);
27414 +                       GFS_ASSERT_SBD(!qd->qd_change_new, sdp,);
27415 +
27416 +                       list_del(&qd->qd_list);
27417 +                       atomic_dec(&sdp->sd_quota_od_count);
27418 +
27419 +                       spin_unlock(&sdp->sd_quota_lock);
27420 +                       gfs_lvb_unhold(qd->qd_gl);
27421 +                       kfree(qd);
27422 +                       atomic_dec(&sdp->sd_quota_count);
27423 +                       spin_lock(&sdp->sd_quota_lock);
27424 +
27425 +               } else {
27426 +                       GFS_ASSERT_SBD(!test_bit(QDF_OD_LIST, &qd->qd_flags) &&
27427 +                                      !test_bit(QDF_LOCK, &qd->qd_flags), sdp,);
27428 +                       GFS_ASSERT_SBD(!qd->qd_change_new &&
27429 +                                      !qd->qd_change_ic &&
27430 +                                      !qd->qd_change_od, sdp,);
27431 +
27432 +                       list_del(&qd->qd_list);
27433 +
27434 +                       spin_unlock(&sdp->sd_quota_lock);
27435 +                       gfs_lvb_unhold(qd->qd_gl);
27436 +                       kfree(qd);
27437 +                       atomic_dec(&sdp->sd_quota_count);
27438 +                       spin_lock(&sdp->sd_quota_lock);
27439 +               }
27440 +       }
27441 +
27442 +       spin_unlock(&sdp->sd_quota_lock);
27443 +
27444 +       GFS_ASSERT_SBD(!atomic_read(&sdp->sd_quota_od_count), sdp,);
27445 +
27446 +       gfs_log_unlock(sdp);
27447 +}
27448 +
27449 +/**
27450 + * sort_qd - figure out the order between two quota data structures
27451 + * @a: first quota data structure
27452 + * @b: second quota data structure
27453 + *
27454 + * Returns: -1 if @a comes before @b, 0 if @a equals @b, 1 if @b comes before @a
27455 + */
27456 +
27457 +static int
27458 +sort_qd(const void *a, const void *b)
27459 +{
27460 +       struct gfs_quota_data *qd_a = *(struct gfs_quota_data **)a;
27461 +       struct gfs_quota_data *qd_b = *(struct gfs_quota_data **)b;
27462 +       int ret = 0;
27463 +
27464 +       if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
27465 +           !test_bit(QDF_USER, &qd_b->qd_flags)) {
27466 +               if (test_bit(QDF_USER, &qd_a->qd_flags))
27467 +                       ret = -1;
27468 +               else
27469 +                       ret = 1;
27470 +       } else {
27471 +               if (qd_a->qd_id < qd_b->qd_id)
27472 +                       ret = -1;
27473 +               else if (qd_a->qd_id > qd_b->qd_id)
27474 +                       ret = 1;
27475 +       }
27476 +
27477 +       return ret;
27478 +}
27479 +
27480 +/**
27481 + * do_quota_sync - Sync a bunch quota changes to the quota file
27482 + * @sdp: the filesystem
27483 + * @qda: an array of struct gfs_quota_data structures to be synced
27484 + * @num_qd: the number of elements in @qda
27485 + *
27486 + * Returns: 0 on success, -EXXX on failure
27487 + */
27488 +
27489 +static int
27490 +do_quota_sync(struct gfs_sbd *sdp, struct gfs_quota_data **qda,
27491 +             unsigned int num_qd)
27492 +{
27493 +       struct gfs_inode *ip = sdp->sd_qinode;
27494 +       struct gfs_alloc *al = NULL;
27495 +       struct gfs_holder i_gh, *ghs;
27496 +       struct gfs_quota q;
27497 +       char buf[sizeof(struct gfs_quota)];
27498 +       uint64_t offset;
27499 +       unsigned int qx, x;
27500 +       int ar;
27501 +       unsigned int nalloc = 0;
27502 +       unsigned int data_blocks, ind_blocks;
27503 +       int error;
27504 +
27505 +       gfs_write_calc_reserv(ip, sizeof(struct gfs_quota), &data_blocks,
27506 +                             &ind_blocks);
27507 +
27508 +       ghs = gmalloc(num_qd * sizeof(struct gfs_holder));
27509 +
27510 +       gfs_sort(qda, num_qd, sizeof (struct gfs_quota_data *), sort_qd);
27511 +       for (qx = 0; qx < num_qd; qx++) {
27512 +               error = gfs_glock_nq_init(qda[qx]->qd_gl,
27513 +                                         LM_ST_EXCLUSIVE,
27514 +                                         GL_NOCACHE, &ghs[qx]);
27515 +               if (error)
27516 +                       goto fail;
27517 +       }
27518 +
27519 +       error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
27520 +       if (error)
27521 +               goto fail;
27522 +
27523 +       for (x = 0; x < num_qd; x++) {
27524 +               offset = (2 * (uint64_t)qda[x]->qd_id +
27525 +                         ((test_bit(QDF_USER, &qda[x]->qd_flags)) ? 0 : 1)) *
27526 +                       sizeof(struct gfs_quota);
27527 +
27528 +               error = gfs_write_alloc_required(ip, offset,
27529 +                                                sizeof(struct gfs_quota),
27530 +                                                &ar);
27531 +               if (error)
27532 +                       goto fail_gunlock;
27533 +
27534 +               if (ar)
27535 +                       nalloc++;
27536 +       }
27537 +
27538 +       if (nalloc) {
27539 +               al = gfs_alloc_get(ip);
27540 +
27541 +               error =
27542 +                   gfs_quota_hold_m(ip, NO_QUOTA_CHANGE,
27543 +                                          NO_QUOTA_CHANGE);
27544 +               if (error)
27545 +                       goto fail_alloc;
27546 +
27547 +               al->al_requested_meta = nalloc * (data_blocks + ind_blocks);
27548 +
27549 +               error = gfs_inplace_reserve(ip);
27550 +               if (error)
27551 +                       goto fail_qs;
27552 +
27553 +               /* Trans may require:
27554 +                  two (journaled) data blocks, a dinode block, RG bitmaps to allocate from,
27555 +                  indirect blocks, and a quota block */
27556 +
27557 +               error = gfs_trans_begin(sdp,
27558 +                                       1 + al->al_rgd->rd_ri.ri_length +
27559 +                                       num_qd * data_blocks +
27560 +                                       nalloc * ind_blocks,
27561 +                                       gfs_struct2blk(sdp, num_qd + 2,
27562 +                                                      sizeof(struct gfs_quota_tag)));
27563 +               if (error)
27564 +                       goto fail_ipres;
27565 +       } else {
27566 +               /* Trans may require:
27567 +                  Data blocks, a dinode block, and quota blocks */
27568 +
27569 +               error = gfs_trans_begin(sdp,
27570 +                                       1 + data_blocks * num_qd,
27571 +                                       gfs_struct2blk(sdp, num_qd,
27572 +                                                      sizeof(struct gfs_quota_tag)));
27573 +               if (error)
27574 +                       goto fail_gunlock;
27575 +       }
27576 +
27577 +       for (x = 0; x < num_qd; x++) {
27578 +               offset = (2 * (uint64_t)qda[x]->qd_id +
27579 +                         ((test_bit(QDF_USER, &qda[x]->qd_flags)) ? 0 : 1)) *
27580 +                       sizeof(struct gfs_quota);
27581 +
27582 +               /*  The quota file may not be a multiple of sizeof(struct gfs_quota) bytes.  */
27583 +               memset(buf, 0, sizeof(struct gfs_quota));
27584 +
27585 +               error = gfs_internal_read(ip, buf, offset,
27586 +                                         sizeof(struct gfs_quota));
27587 +               if (error < 0)
27588 +                       goto fail_end_trans;
27589 +
27590 +               gfs_quota_in(&q, buf);
27591 +               q.qu_value += qda[x]->qd_change_sync;
27592 +               gfs_quota_out(&q, buf);
27593 +
27594 +               error = gfs_internal_write(ip, buf, offset,
27595 +                                          sizeof(struct gfs_quota));
27596 +               if (error < 0)
27597 +                       goto fail_end_trans;
27598 +               else if (error != sizeof(struct gfs_quota)) {
27599 +                       error = -EIO;
27600 +                       goto fail_end_trans;
27601 +               }
27602 +
27603 +               if (test_bit(QDF_USER, &qda[x]->qd_flags))
27604 +                       gfs_trans_add_quota(sdp, -qda[x]->qd_change_sync,
27605 +                                           qda[x]->qd_id, NO_QUOTA_CHANGE);
27606 +               else
27607 +                       gfs_trans_add_quota(sdp, -qda[x]->qd_change_sync,
27608 +                                           NO_QUOTA_CHANGE, qda[x]->qd_id);
27609 +
27610 +               memset(&qda[x]->qd_qb, 0, sizeof(struct gfs_quota_lvb));
27611 +               qda[x]->qd_qb.qb_magic = GFS_MAGIC;
27612 +               qda[x]->qd_qb.qb_limit = q.qu_limit;
27613 +               qda[x]->qd_qb.qb_warn = q.qu_warn;
27614 +               qda[x]->qd_qb.qb_value = q.qu_value;
27615 +
27616 +               gfs_quota_lvb_out(&qda[x]->qd_qb, qda[x]->qd_gl->gl_lvb);
27617 +               clear_bit(GLF_LVB_INVALID, &qda[x]->qd_gl->gl_flags);
27618 +       }
27619 +
27620 +       gfs_trans_end(sdp);
27621 +
27622 +       if (nalloc) {
27623 +               GFS_ASSERT_SBD(al->al_alloced_meta, sdp,);
27624 +               gfs_inplace_release(ip);
27625 +               gfs_quota_unhold_m(ip);
27626 +               gfs_alloc_put(ip);
27627 +       }
27628 +
27629 +       gfs_glock_dq_uninit(&i_gh);
27630 +
27631 +       for (x = 0; x < num_qd; x++)
27632 +               gfs_glock_dq_uninit(&ghs[x]);
27633 +
27634 +       kfree(ghs);
27635 +
27636 +       gfs_log_flush_glock(ip->i_gl);
27637 +
27638 +       return 0;
27639 +
27640 + fail_end_trans:
27641 +       gfs_trans_end(sdp);
27642 +
27643 + fail_ipres:
27644 +       if (nalloc)
27645 +               gfs_inplace_release(ip);
27646 +
27647 + fail_qs:
27648 +       if (nalloc)
27649 +               gfs_quota_unhold_m(ip);
27650 +
27651 + fail_alloc:
27652 +       if (nalloc)
27653 +               gfs_alloc_put(ip);
27654 +
27655 + fail_gunlock:
27656 +       gfs_glock_dq_uninit(&i_gh);
27657 +
27658 + fail:
27659 +       while (qx--)
27660 +               gfs_glock_dq_uninit(&ghs[qx]);
27661 +
27662 +       kfree(ghs);
27663 +
27664 +       return error;
27665 +}
27666 +
27667 +/**
27668 + * glock_q - Acquire a lock for a quota entry
27669 + * @sdp: the filesystem
27670 + * @qd: the quota data structure to glock
27671 + * @force_refresh: If TRUE, always read from the quota file
27672 + * @q_gh: the glock holder for the quota lock
27673 + *
27674 + * Returns: 0 on success, -EXXX on failure
27675 + */
27676 +
27677 +static int
27678 +glock_q(struct gfs_sbd *sdp, struct gfs_quota_data *qd, int force_refresh,
27679 +       struct gfs_holder *q_gh)
27680 +{
27681 +       struct gfs_holder i_gh;
27682 +       struct gfs_quota q;
27683 +       char buf[sizeof(struct gfs_quota)];
27684 +       int error;
27685 +
27686 + restart:
27687 +       error = gfs_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
27688 +       if (error)
27689 +               return error;
27690 +
27691 +       gfs_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
27692 +
27693 +       if (force_refresh ||
27694 +           qd->qd_qb.qb_magic != GFS_MAGIC ||
27695 +           test_bit(GLF_LVB_INVALID, &qd->qd_gl->gl_flags)) {
27696 +               gfs_glock_dq_uninit(q_gh);
27697 +               error = gfs_glock_nq_init(qd->qd_gl,
27698 +                                         LM_ST_EXCLUSIVE, GL_NOCACHE,
27699 +                                         q_gh);
27700 +               if (error)
27701 +                       return error;
27702 +
27703 +               error = gfs_glock_nq_init(sdp->sd_qinode->i_gl,
27704 +                                         LM_ST_SHARED, 0,
27705 +                                         &i_gh);
27706 +               if (error)
27707 +                       goto fail;
27708 +
27709 +               memset(buf, 0, sizeof(struct gfs_quota));
27710 +
27711 +               error = gfs_internal_read(sdp->sd_qinode, buf,
27712 +                                         (2 * (uint64_t)qd->qd_id +
27713 +                                          ((test_bit(QDF_USER, &qd->qd_flags)) ? 0 : 1)) *
27714 +                                         sizeof(struct gfs_quota),
27715 +                                         sizeof(struct gfs_quota));
27716 +               if (error < 0)
27717 +                       goto fail_gunlock;
27718 +
27719 +               gfs_glock_dq_uninit(&i_gh);
27720 +
27721 +               gfs_quota_in(&q, buf);
27722 +
27723 +               memset(&qd->qd_qb, 0, sizeof(struct gfs_quota_lvb));
27724 +               qd->qd_qb.qb_magic = GFS_MAGIC;
27725 +               qd->qd_qb.qb_limit = q.qu_limit;
27726 +               qd->qd_qb.qb_warn = q.qu_warn;
27727 +               qd->qd_qb.qb_value = q.qu_value;
27728 +
27729 +               gfs_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
27730 +               clear_bit(GLF_LVB_INVALID, &qd->qd_gl->gl_flags);
27731 +
27732 +               gfs_glock_dq_uninit(q_gh);
27733 +               force_refresh = FALSE;
27734 +               goto restart;
27735 +       }
27736 +
27737 +       return 0;
27738 +
27739 + fail_gunlock:
27740 +       gfs_glock_dq_uninit(&i_gh);
27741 +
27742 + fail:
27743 +       gfs_glock_dq_uninit(q_gh);
27744 +
27745 +       return error;
27746 +}
27747 +
27748 +/**
27749 + * gfs_quota_hold_m - Hold the quota structures for up to 4 IDs
27750 + * @ip: Two of the IDs are the UID and GID from this file
27751 + * @uid: a UID or the constant NO_QUOTA_CHANGE
27752 + * @gid: a GID or the constant NO_QUOTA_CHANGE
27753 + *
27754 + * The struct gfs_quota_data structures representing the locks are
27755 + * stored in the ip->i_alloc->al_qd array.
27756 + *
27757 + * Returns:  0 on success, -EXXX on failure
27758 + */
27759 +
27760 +int
27761 +gfs_quota_hold_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid)
27762 +{
27763 +       struct gfs_sbd *sdp = ip->i_sbd;
27764 +       struct gfs_alloc *al = ip->i_alloc;
27765 +       unsigned int x = 0;
27766 +       int error;
27767 +
27768 +       GFS_ASSERT_INODE(al && !al->al_qd_num &&
27769 +                        !test_bit(GIF_QD_LOCKED, &ip->i_flags), ip,);
27770 +
27771 +       if (!sdp->sd_tune.gt_quota_account)
27772 +               return 0;
27773 +
27774 +       error = gfs_quota_get(sdp, TRUE, ip->i_di.di_uid,
27775 +                             CREATE, &al->al_qd[x]);
27776 +       if (error)
27777 +               goto fail;
27778 +       x++;
27779 +
27780 +       error = gfs_quota_get(sdp, FALSE, ip->i_di.di_gid,
27781 +                             CREATE, &al->al_qd[x]);
27782 +       if (error)
27783 +               goto fail;
27784 +       x++;
27785 +
27786 +       if (uid != NO_QUOTA_CHANGE) {
27787 +               error = gfs_quota_get(sdp, TRUE, uid,
27788 +                                     CREATE, &al->al_qd[x]);
27789 +               if (error)
27790 +                       goto fail;
27791 +               x++;
27792 +       }
27793 +
27794 +       if (gid != NO_QUOTA_CHANGE) {
27795 +               error = gfs_quota_get(sdp, FALSE, gid,
27796 +                                     CREATE, &al->al_qd[x]);
27797 +               if (error)
27798 +                       goto fail;
27799 +               x++;
27800 +       }
27801 +
27802 +       al->al_qd_num = x;
27803 +
27804 +       return 0;
27805 +
27806 + fail:
27807 +       if (x) {
27808 +               al->al_qd_num = x;
27809 +               gfs_quota_unhold_m(ip);
27810 +       }
27811 +
27812 +       return error;
27813 +}
27814 +
27815 +/**
27816 + * gfs_quota_unhold_m - throw away some quota locks
27817 + * @ip: the inode who's ip->i_alloc->al_qd array holds the structures
27818 + *
27819 + */
27820 +
27821 +void
27822 +gfs_quota_unhold_m(struct gfs_inode *ip)
27823 +{
27824 +       struct gfs_sbd *sdp = ip->i_sbd;
27825 +       struct gfs_alloc *al = ip->i_alloc;
27826 +       unsigned int x;
27827 +
27828 +       GFS_ASSERT_INODE(al &&
27829 +                        !test_bit(GIF_QD_LOCKED, &ip->i_flags), ip,);
27830 +
27831 +       for (x = 0; x < al->al_qd_num; x++) {
27832 +               gfs_quota_put(sdp, al->al_qd[x]);
27833 +               al->al_qd[x] = NULL;
27834 +       }
27835 +       al->al_qd_num = 0;
27836 +}
27837 +
27838 +/**
27839 + * gfs_quota_lock_m - Acquire the quota locks for up to 4 IDs
27840 + * @ip: Two of the IDs are the UID and GID from this file
27841 + * @uid: a UID or the constant NO_QUOTA_CHANGE
27842 + * @gid: a GID or the constant NO_QUOTA_CHANGE
27843 + *
27844 + * The struct gfs_quota_data structures representing the locks are
27845 + * stored in the ip->i_alloc->al_qd array.
27846 + *
27847 + * Returns:  0 on success, -EXXX on failure
27848 + */
27849 +
27850 +int
27851 +gfs_quota_lock_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid)
27852 +{
27853 +       struct gfs_sbd *sdp = ip->i_sbd;
27854 +       struct gfs_alloc *al = ip->i_alloc;
27855 +       unsigned int x;
27856 +       int error;
27857 +
27858 +       gfs_quota_hold_m(ip, uid, gid);
27859 +
27860 +       if (!sdp->sd_tune.gt_quota_enforce)
27861 +               return 0;
27862 +       if (capable(CAP_SYS_RESOURCE))
27863 +               return 0;
27864 +
27865 +       gfs_sort(al->al_qd, al->al_qd_num,
27866 +                sizeof(struct gfs_quota_data *), sort_qd);
27867 +
27868 +       for (x = 0; x < al->al_qd_num; x++) {
27869 +               error = glock_q(sdp, al->al_qd[x], FALSE, &al->al_qd_ghs[x]);
27870 +               if (error)
27871 +                       goto fail;
27872 +       }
27873 +
27874 +       set_bit(GIF_QD_LOCKED, &ip->i_flags);
27875 +
27876 +       return 0;
27877 +
27878 +      fail:
27879 +       while (x--)
27880 +               gfs_glock_dq_uninit(&al->al_qd_ghs[x]);
27881 +
27882 +       return error;
27883 +}
27884 +
27885 +/**
27886 + * gfs_quota_unlock_m - drop some quota locks
27887 + * @ip: the inode who's ip->i_alloc->al_qd array holds the locks
27888 + *
27889 + */
27890 +
27891 +void
27892 +gfs_quota_unlock_m(struct gfs_inode *ip)
27893 +{
27894 +       struct gfs_sbd *sdp = ip->i_sbd;
27895 +       struct gfs_alloc *al = ip->i_alloc;
27896 +       struct gfs_quota_data *qd, *qda[4];
27897 +       int64_t value;
27898 +       unsigned int count = 0;
27899 +       unsigned int x;
27900 +       int do_sync;
27901 +
27902 +       if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
27903 +               goto out;
27904 +
27905 +       for (x = 0; x < al->al_qd_num; x++) {
27906 +               qd = al->al_qd[x];
27907 +
27908 +               spin_lock(&sdp->sd_quota_lock);
27909 +               value = qd->qd_change_new + qd->qd_change_ic;
27910 +               spin_unlock(&sdp->sd_quota_lock);
27911 +
27912 +               do_sync = TRUE;
27913 +               if (!qd->qd_qb.qb_limit)
27914 +                       do_sync = FALSE;
27915 +               else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
27916 +                       do_sync = FALSE;
27917 +               else {
27918 +                       int64_t v;
27919 +                       v = value * gfs_num_journals(sdp) * sdp->sd_tune.gt_quota_scale_num;
27920 +                       do_div(v, sdp->sd_tune.gt_quota_scale_den);
27921 +                       v += qd->qd_qb.qb_value;
27922 +                       if (v < (int64_t)qd->qd_qb.qb_limit)
27923 +                               do_sync = FALSE;
27924 +               }
27925 +
27926 +               gfs_glock_dq_uninit(&al->al_qd_ghs[x]);
27927 +
27928 +               if (do_sync) {
27929 +                       gfs_log_flush(sdp);
27930 +                       if (quota_trylock(sdp, qd))
27931 +                               qda[count++] = qd;
27932 +               }
27933 +       }
27934 +
27935 +       if (count) {
27936 +               do_quota_sync(sdp, qda, count);
27937 +
27938 +               for (x = 0; x < count; x++)
27939 +                       quota_unlock(sdp, qda[x]);
27940 +       }
27941 +
27942 + out:
27943 +       gfs_quota_unhold_m(ip);
27944 +}
27945 +
27946 +/**
27947 + * print_quota_message - print a message to the user's tty about quotas
27948 + * @sdp: the filesystem
27949 + * @qd: the quota ID that the message is about
27950 + * @type: the type of message ("exceeded" or "warning")
27951 + *
27952 + */
27953 +
27954 +static void
27955 +print_quota_message(struct gfs_sbd *sdp, struct gfs_quota_data *qd, char *type)
27956 +{
27957 +       char *line = gmalloc(256);
27958 +       int len;
27959 +       struct tty_struct *tty;
27960 +
27961 +       len = snprintf(line, 256, "GFS: fsid=%s: quota %s for %s %u\r\n",
27962 +                      sdp->sd_fsname, type,
27963 +                      (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
27964 +                      qd->qd_id);
27965 +
27966 +       if (current->signal) {
27967 +               tty = current->signal->tty;
27968 +               if (tty && tty->driver->write)
27969 +                       tty->driver->write(tty, 0, line, len);
27970 +       }
27971 +
27972 +       kfree(line);
27973 +}
27974 +
27975 +/**
27976 + * gfs_quota_check - Check to see if a block allocation is possible
27977 + * @ip: the inode who's ip->i_res.ir_qd array holds the quota locks
27978 + * @uid: the UID the block is allocated for
27979 + * @gid: the GID the block is allocated for
27980 + *
27981 + */
27982 +
27983 +int
27984 +gfs_quota_check(struct gfs_inode *ip, uint32_t uid, uint32_t gid)
27985 +{
27986 +       struct gfs_sbd *sdp = ip->i_sbd;
27987 +       struct gfs_alloc *al = ip->i_alloc;
27988 +       struct gfs_quota_data *qd;
27989 +       int64_t value;
27990 +       unsigned int x;
27991 +       int error = 0;
27992 +
27993 +       if (!al)
27994 +               return 0;
27995 +
27996 +       for (x = 0; x < al->al_qd_num; x++) {
27997 +               qd = al->al_qd[x];
27998 +
27999 +               if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
28000 +                     (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
28001 +                       continue;
28002 +
28003 +               spin_lock(&sdp->sd_quota_lock);
28004 +               value = qd->qd_change_new + qd->qd_change_ic;
28005 +               spin_unlock(&sdp->sd_quota_lock);
28006 +               value += qd->qd_qb.qb_value;
28007 +
28008 +               if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
28009 +                       print_quota_message(sdp, qd, "exceeded");
28010 +                       error = -EDQUOT;
28011 +                       break;
28012 +               } else if (qd->qd_qb.qb_warn &&
28013 +                          (int64_t)qd->qd_qb.qb_warn < value &&
28014 +                          time_after_eq(jiffies,
28015 +                                        qd->qd_last_warn +
28016 +                                        sdp->sd_tune.gt_quota_warn_period * HZ)) {
28017 +                       print_quota_message(sdp, qd, "warning");
28018 +                       qd->qd_last_warn = jiffies;
28019 +               }
28020 +       }
28021 +
28022 +       return error;
28023 +}
28024 +
28025 +/**
28026 + * gfs_quota_sync - Sync quota changes to the quota file
28027 + * @sdp: the filesystem
28028 + *
28029 + * Returns: 0 on success, -EXXX on failure
28030 + */
28031 +
28032 +int
28033 +gfs_quota_sync(struct gfs_sbd *sdp)
28034 +{
28035 +       struct gfs_quota_data **qda;
28036 +       unsigned int max_qd = sdp->sd_tune.gt_quota_simul_sync;
28037 +       unsigned int num_qd;
28038 +       unsigned int x;
28039 +       int error = 0;
28040 +
28041 +       sdp->sd_quota_sync_gen++;
28042 +
28043 +       qda = gmalloc(max_qd * sizeof(struct gfs_quota_data *));
28044 +
28045 +       memset(qda, 0, max_qd * sizeof(struct gfs_quota_data *));
28046 +
28047 +       do {
28048 +               num_qd = 0;
28049 +
28050 +               for (;;) {
28051 +                       qda[num_qd] = quota_find(sdp);
28052 +                       if (!qda[num_qd])
28053 +                               break;
28054 +
28055 +                       if (++num_qd == max_qd)
28056 +                               break;
28057 +               }
28058 +
28059 +               if (num_qd) {
28060 +                       error = do_quota_sync(sdp, qda, num_qd);
28061 +                       if (!error)
28062 +                               for (x = 0; x < num_qd; x++)
28063 +                                       qda[x]->qd_sync_gen =
28064 +                                               sdp->sd_quota_sync_gen;
28065 +
28066 +                       for (x = 0; x < num_qd; x++)
28067 +                               quota_unlock(sdp, qda[x]);
28068 +               }
28069 +       }
28070 +       while (!error && num_qd == max_qd);
28071 +
28072 +       kfree(qda);
28073 +
28074 +       return error;
28075 +}
28076 +
28077 +/**
28078 + * gfs_quota_refresh - Refresh the LVB for a given quota ID
28079 + * @sdp: the filesystem
28080 + * @arg: a pointer to a struct gfs_quota_name in user space
28081 + *
28082 + * Returns: 0 on success, -EXXX on failure
28083 + */
28084 +
28085 +int
28086 +gfs_quota_refresh(struct gfs_sbd *sdp, void *arg)
28087 +{
28088 +       struct gfs_quota_name qn;
28089 +       struct gfs_quota_data *qd;
28090 +       struct gfs_holder q_gh;
28091 +       int error;
28092 +
28093 +       if (copy_from_user(&qn, arg, sizeof(struct gfs_quota_name)))
28094 +               return -EFAULT;
28095 +
28096 +       error = gfs_quota_get(sdp, qn.qn_user, qn.qn_id, CREATE, &qd);
28097 +       if (error)
28098 +               return error;
28099 +
28100 +       error = glock_q(sdp, qd, TRUE, &q_gh);
28101 +       if (!error)
28102 +               gfs_glock_dq_uninit(&q_gh);
28103 +
28104 +       gfs_quota_put(sdp, qd);
28105 +
28106 +       return error;
28107 +}
28108 +
28109 +/**
28110 + * gfs_quota_read - Read the info a given quota ID
28111 + * @sdp: the filesystem
28112 + * @arg: a pointer to a gfs_quota_refresh_t in user space
28113 + *
28114 + * Returns: 0 on success, -EXXX on failure
28115 + */
28116 +
28117 +int
28118 +gfs_quota_read(struct gfs_sbd *sdp, void *arg)
28119 +{
28120 +       struct gfs_quota_name qn;
28121 +       struct gfs_quota_data *qd;
28122 +       struct gfs_holder q_gh;
28123 +       struct gfs_quota q;
28124 +       int error;
28125 +
28126 +       if (copy_from_user(&qn, arg, sizeof(struct gfs_quota_name)))
28127 +               return -EFAULT;
28128 +
28129 +       if (((qn.qn_user) ?
28130 +            (qn.qn_id != current->fsuid) :
28131 +            (!in_group_p(qn.qn_id))) &&
28132 +           !capable(CAP_SYS_ADMIN))
28133 +               return -EACCES;
28134 +
28135 +       error = gfs_quota_get(sdp, qn.qn_user, qn.qn_id, CREATE, &qd);
28136 +       if (error)
28137 +               return error;
28138 +
28139 +       error = glock_q(sdp, qd, FALSE, &q_gh);
28140 +       if (error)
28141 +               goto out;
28142 +
28143 +       memset(&q, 0, sizeof(struct gfs_quota));
28144 +       q.qu_limit = qd->qd_qb.qb_limit;
28145 +       q.qu_warn = qd->qd_qb.qb_warn;
28146 +       q.qu_value = qd->qd_qb.qb_value;
28147 +
28148 +       spin_lock(&sdp->sd_quota_lock);
28149 +       q.qu_value += qd->qd_change_new + qd->qd_change_ic;
28150 +       spin_unlock(&sdp->sd_quota_lock);
28151 +
28152 +       gfs_glock_dq_uninit(&q_gh);
28153 +
28154 + out:
28155 +       gfs_quota_put(sdp, qd);
28156 +
28157 +       if (!error &&
28158 +           copy_to_user((char *)arg + sizeof(struct gfs_quota_name),
28159 +                        &q, sizeof(struct gfs_quota)))
28160 +               error = -EFAULT;
28161 +
28162 +       return error;
28163 +}
28164 diff -urN linux-orig/fs/gfs/quota.h linux-patched/fs/gfs/quota.h
28165 --- linux-orig/fs/gfs/quota.h   1969-12-31 18:00:00.000000000 -0600
28166 +++ linux-patched/fs/gfs/quota.h        2004-06-30 13:27:49.356708115 -0500
28167 @@ -0,0 +1,40 @@
28168 +/******************************************************************************
28169 +*******************************************************************************
28170 +**
28171 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
28172 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
28173 +**
28174 +**  This copyrighted material is made available to anyone wishing to use,
28175 +**  modify, copy, or redistribute it subject to the terms and conditions
28176 +**  of the GNU General Public License v.2.
28177 +**
28178 +*******************************************************************************
28179 +******************************************************************************/
28180 +
28181 +#ifndef __QUOTA_DOT_H__
28182 +#define __QUOTA_DOT_H__
28183 +
28184 +#define NO_QUOTA_CHANGE ((uint32_t)-1)
28185 +
28186 +int gfs_quota_get(struct gfs_sbd *sdp, int user, uint32_t id, int create,
28187 +                    struct gfs_quota_data **qdp);
28188 +void gfs_quota_hold(struct gfs_sbd *sdp, struct gfs_quota_data *qd);
28189 +void gfs_quota_put(struct gfs_sbd *sdp, struct gfs_quota_data *qd);
28190 +
28191 +int gfs_quota_merge(struct gfs_sbd *sdp, struct gfs_quota_tag *tag);
28192 +void gfs_quota_scan(struct gfs_sbd *sdp);
28193 +void gfs_quota_cleanup(struct gfs_sbd *sdp);
28194 +
28195 +int gfs_quota_hold_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid);
28196 +void gfs_quota_unhold_m(struct gfs_inode *ip);
28197 +
28198 +int gfs_quota_lock_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid);
28199 +void gfs_quota_unlock_m(struct gfs_inode *ip);
28200 +
28201 +int gfs_quota_check(struct gfs_inode *ip, uint32_t uid, uint32_t gid);
28202 +
28203 +int gfs_quota_sync(struct gfs_sbd *sdp);
28204 +int gfs_quota_refresh(struct gfs_sbd *sdp, void *arg);
28205 +int gfs_quota_read(struct gfs_sbd *sdp, void *arg);
28206 +
28207 +#endif /* __QUOTA_DOT_H__ */
28208 diff -urN linux-orig/fs/gfs/recovery.c linux-patched/fs/gfs/recovery.c
28209 --- linux-orig/fs/gfs/recovery.c        1969-12-31 18:00:00.000000000 -0600
28210 +++ linux-patched/fs/gfs/recovery.c     2004-06-30 13:27:49.357707883 -0500
28211 @@ -0,0 +1,749 @@
28212 +/******************************************************************************
28213 +*******************************************************************************
28214 +**
28215 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
28216 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
28217 +**
28218 +**  This copyrighted material is made available to anyone wishing to use,
28219 +**  modify, copy, or redistribute it subject to the terms and conditions
28220 +**  of the GNU General Public License v.2.
28221 +**
28222 +*******************************************************************************
28223 +******************************************************************************/
28224 +
28225 +#include <linux/sched.h>
28226 +#include <linux/slab.h>
28227 +#include <linux/smp_lock.h>
28228 +#include <linux/spinlock.h>
28229 +#include <asm/semaphore.h>
28230 +#include <linux/completion.h>
28231 +#include <linux/buffer_head.h>
28232 +
28233 +#include "gfs.h"
28234 +#include "dio.h"
28235 +#include "glock.h"
28236 +#include "glops.h"
28237 +#include "lops.h"
28238 +#include "recovery.h"
28239 +
28240 +#define bn2seg(bn) (((uint32_t)((bn) - jdesc->ji_addr)) / sdp->sd_sb.sb_seg_size)
28241 +#define seg2bn(seg) ((seg) * sdp->sd_sb.sb_seg_size + jdesc->ji_addr)
28242 +
28243 +struct dirty_j {
28244 +       struct list_head dj_list;
28245 +       unsigned int dj_jid;
28246 +       struct gfs_jindex dj_desc;
28247 +};
28248 +
28249 +/**
28250 + * gfs_add_dirty_j - add a jid to the list of dirty journals
28251 + * @sdp: the filesystem
28252 + * @jid: the journal ID number
28253 + *
28254 + */
28255 +
28256 +void
28257 +gfs_add_dirty_j(struct gfs_sbd *sdp, unsigned int jid)
28258 +{
28259 +       struct dirty_j *dj;
28260 +
28261 +       dj = gmalloc(sizeof(struct dirty_j));
28262 +       memset(dj, 0, sizeof(struct dirty_j));
28263 +
28264 +       dj->dj_jid = jid;
28265 +
28266 +       spin_lock(&sdp->sd_dirty_j_lock);
28267 +       list_add(&dj->dj_list, &sdp->sd_dirty_j);
28268 +       spin_unlock(&sdp->sd_dirty_j_lock);
28269 +}
28270 +
28271 +/**
28272 + * get_dirty_j - return a dirty journal from the list
28273 + * @sdp: the filesystem
28274 + *
28275 + * Returns: a struct dirty_j or NULL
28276 + */
28277 +
28278 +static struct dirty_j *
28279 +get_dirty_j(struct gfs_sbd *sdp)
28280 +{
28281 +       struct dirty_j *dj = NULL;
28282 +
28283 +       spin_lock(&sdp->sd_dirty_j_lock);
28284 +       if (!list_empty(&sdp->sd_dirty_j)) {
28285 +               dj = list_entry(sdp->sd_dirty_j.prev, struct dirty_j, dj_list);
28286 +               list_del(&dj->dj_list);
28287 +       }
28288 +       spin_unlock(&sdp->sd_dirty_j_lock);
28289 +
28290 +       return dj;
28291 +}
28292 +
28293 +/**
28294 + * gfs_clear_dirty_j - destroy the list of dirty journals
28295 + * @sdp: the filesystem
28296 + *
28297 + */
28298 +
28299 +void
28300 +gfs_clear_dirty_j(struct gfs_sbd *sdp)
28301 +{
28302 +       struct dirty_j *dj;
28303 +       for (;;) {
28304 +               dj = get_dirty_j(sdp);
28305 +               if (!dj)
28306 +                       break;
28307 +               kfree(dj);
28308 +       }
28309 +}
28310 +
28311 +/**
28312 + * gfs_log_header - read the log header for a given segment
28313 + * @sdp: the filesystem
28314 + * @jdesc: the journal
28315 + * @gl: the journal's glock
28316 + * @seg: the segment to look at
28317 + * @lh: the log header to return
28318 + *
28319 + * Read the log header for a given segement in a given journal.  Do a few
28320 + * sanity checks on it.
28321 + *
28322 + * Returns: 0 on success, 1 if the header was invalid or incomplete and, -EXXX on error
28323 + */
28324 +
28325 +static int
28326 +get_log_header(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28327 +              struct gfs_glock *gl, uint32_t seg, struct gfs_log_header *lh)
28328 +{
28329 +       struct buffer_head *bh;
28330 +       struct gfs_log_header lh2;
28331 +       int error;
28332 +
28333 +       error = gfs_dread(sdp, seg2bn(seg), gl, DIO_START | DIO_WAIT, &bh);
28334 +       if (error)
28335 +               return error;
28336 +
28337 +       gfs_log_header_in(lh, bh->b_data);
28338 +       gfs_log_header_in(&lh2,
28339 +                         bh->b_data + GFS_BASIC_BLOCK -
28340 +                         sizeof(struct gfs_log_header));
28341 +
28342 +       brelse(bh);
28343 +
28344 +       if (memcmp(lh, &lh2, sizeof(struct gfs_log_header)) != 0 ||
28345 +           lh->lh_header.mh_magic != GFS_MAGIC ||
28346 +           lh->lh_header.mh_type != GFS_METATYPE_LH)
28347 +               error = 1;
28348 +
28349 +       return error;
28350 +}
28351 +
28352 +/**
28353 + * find_good_lh - find a good log header
28354 + * @sdp: the filesystem
28355 + * @jdesc: the journal
28356 + * @gl: the journal's glock
28357 + * @seg: the segment to start searching from (it's also filled in with a new value.)
28358 + * @lh: the log header to fill in
28359 + * @forward: if true search forward in the log, else search backward
28360 + *
28361 + * Call get_log_header() to get a log header for a segment, but if the
28362 + * segment is bad, either scan forward or backward until we find a good one.
28363 + *
28364 + * Returns: 0 on success, -EXXX on failure
28365 + */
28366 +
28367 +static int
28368 +find_good_lh(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28369 +            struct gfs_glock *gl, uint32_t *seg, struct gfs_log_header *lh,
28370 +            int forward)
28371 +{
28372 +       int error;
28373 +       uint32_t orig_seg = *seg;
28374 +
28375 +       for (;;) {
28376 +               error = get_log_header(sdp, jdesc, gl, *seg, lh);
28377 +               if (error <= 0)
28378 +                       return error;
28379 +
28380 +               if (forward) {
28381 +                       if (++*seg == jdesc->ji_nsegment)
28382 +                               *seg = 0;
28383 +               } else {
28384 +                       if (*seg-- == 0)
28385 +                               *seg = jdesc->ji_nsegment - 1;
28386 +               }
28387 +
28388 +               GFS_ASSERT_SBD(*seg != orig_seg, sdp,);
28389 +       }
28390 +}
28391 +
28392 +/**
28393 + * verify_jhead - make sure we've found the head of the log
28394 + * @sdp: the filesystem
28395 + * @jdesc: the journal
28396 + * @gl: the journal's glock
28397 + * @head: this is filled in with the log descriptor of the head
28398 + *
28399 + * At this point, seg and lh should be either the head of the log or just
28400 + * before.  Scan forward until we find the head.
28401 + *
28402 + * Returns: 0 on success, -EXXX on failure
28403 + */
28404 +
28405 +static int
28406 +verify_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28407 +            struct gfs_glock *gl, struct gfs_log_header *head)
28408 +{
28409 +       struct gfs_log_header lh;
28410 +       uint32_t seg;
28411 +       int error;
28412 +
28413 +       seg = bn2seg(head->lh_first);
28414 +
28415 +       for (;;) {
28416 +               if (++seg == jdesc->ji_nsegment)
28417 +                       seg = 0;
28418 +
28419 +               error = get_log_header(sdp, jdesc, gl, seg, &lh);
28420 +               if (error < 0)
28421 +                       return error;
28422 +
28423 +               if (error == 1)
28424 +                       continue;
28425 +               if (lh.lh_sequence == head->lh_sequence)
28426 +                       continue;
28427 +
28428 +               if (lh.lh_sequence < head->lh_sequence)
28429 +                       break;
28430 +
28431 +               memcpy(head, &lh, sizeof(struct gfs_log_header));
28432 +       }
28433 +
28434 +       return 0;
28435 +}
28436 +
28437 +/**
28438 + * gfs_find_jhead - find the head of a log
28439 + * @sdp: the filesystem
28440 + * @jdesc: the journal
28441 + * @gl: the journal's glock
28442 + * @head: the log descriptor for the head of the log is returned here
28443 + *
28444 + * Do a binary search of a journal and find the valid log entry with the
28445 + * highest sequence number.  (i.e. the log head)
28446 + *
28447 + * Returns: 0 on success, -EXXX on failure
28448 + */
28449 +
28450 +int
28451 +gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28452 +              struct gfs_glock *gl, struct gfs_log_header *head)
28453 +{
28454 +       struct gfs_log_header lh1, lh_m;
28455 +       uint32_t seg1, seg2, seg_m;
28456 +       int error;
28457 +
28458 +       seg1 = 0;
28459 +       seg2 = jdesc->ji_nsegment - 1;
28460 +
28461 +       for (;;) {
28462 +               seg_m = (seg1 + seg2) / 2;
28463 +
28464 +               error = find_good_lh(sdp, jdesc, gl, &seg1, &lh1, TRUE);
28465 +               if (error)
28466 +                       break;
28467 +
28468 +               if (seg1 == seg_m) {
28469 +                       error = verify_jhead(sdp, jdesc, gl, &lh1);
28470 +                       memcpy(head, &lh1, sizeof(struct gfs_log_header));
28471 +                       break;
28472 +               }
28473 +
28474 +               error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh_m, FALSE);
28475 +               if (error)
28476 +                       break;
28477 +
28478 +               if (lh1.lh_sequence <= lh_m.lh_sequence)
28479 +                       seg1 = seg_m;
28480 +               else
28481 +                       seg2 = seg_m;
28482 +       }
28483 +
28484 +       return error;
28485 +}
28486 +
28487 +/**
28488 + * gfs_increment_blkno - move to the next block in a journal
28489 + * @sdp: the filesystem
28490 + * @jdesc: the journal
28491 + * @gl: the journal's glock
28492 + * @addr: the block number to increment
28493 + * @skip_header: if this is TRUE, skip log headers
28494 + *
28495 + * Replace @addr with the location of the next block in the log.
28496 + * Take care of journal wrap and skip of log header if necessary.
28497 + *
28498 + * Returns: 0 on success, -EXXX on failure
28499 + */
28500 +
28501 +int
28502 +gfs_increment_blkno(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28503 +                   struct gfs_glock *gl, uint64_t *addr, int skip_headers)
28504 +{
28505 +       struct gfs_log_header header;
28506 +       int error;
28507 +
28508 +       (*addr)++;
28509 +
28510 +       /* Handle journal wrap */
28511 +
28512 +       if (*addr == seg2bn(jdesc->ji_nsegment))
28513 +               *addr -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size;
28514 +
28515 +       gfs_start_ra(gl, *addr,
28516 +                    jdesc->ji_addr +
28517 +                    jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size - *addr);
28518 +
28519 +       /* Handle landing on a header block */
28520 +
28521 +       if (skip_headers && !do_mod(*addr, sdp->sd_sb.sb_seg_size)) {
28522 +               error = get_log_header(sdp, jdesc, gl, bn2seg(*addr), &header);
28523 +               if (error < 0)
28524 +                       return error;
28525 +
28526 +               GFS_ASSERT_SBD(!error, sdp,); /* Corrupt headers here are bad */
28527 +               GFS_ASSERT_SBD(header.lh_first != *addr, sdp,
28528 +                              gfs_log_header_print(&header);
28529 +                              printk("*addr = %"PRIu64"\n", *addr););
28530 +
28531 +               (*addr)++;
28532 +               /* Can't wrap here */
28533 +       }
28534 +
28535 +       return 0;
28536 +}
28537 +
28538 +/**
28539 + * foreach_descriptor - go through the active part of the log
28540 + * @sdp: the filesystem
28541 + * @jdesc: the journal
28542 + * @gl: the journal's glock
28543 + * @start: the first log header in the active region
28544 + * @end: the last log header (don't process the contents of this entry))
28545 + * @pass: the recovery pass
28546 + *
28547 + * Call a given function once for every log descriptor in the active
28548 + * portion of the log.
28549 + *
28550 + * Returns: 0 on success, -EXXX on failure
28551 + */
28552 +
28553 +static int
28554 +foreach_descriptor(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28555 +                  struct gfs_glock *gl, uint64_t start, uint64_t end,
28556 +                  unsigned int pass)
28557 +{
28558 +       struct gfs_log_header header;
28559 +       struct gfs_log_descriptor desc;
28560 +       struct buffer_head *bh;
28561 +       int error = 0;
28562 +
28563 +       while (start != end) {
28564 +               GFS_ASSERT_SBD(!do_mod(start, sdp->sd_sb.sb_seg_size), sdp,);
28565 +
28566 +               error = get_log_header(sdp, jdesc, gl, bn2seg(start), &header);
28567 +               if (error < 0)
28568 +                       return error;
28569 +
28570 +               GFS_ASSERT_SBD(!error, sdp,); /* Corrupt headers are bad */
28571 +               GFS_ASSERT_SBD(header.lh_first == start, sdp,
28572 +                              gfs_log_header_print(&header);
28573 +                              printk("start = %"PRIu64"\n", start););
28574 +
28575 +               start++;
28576 +
28577 +               for (;;) {
28578 +                       error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh);
28579 +                       if (error)
28580 +                               return error;
28581 +
28582 +                       gfs_metatype_check(sdp, bh, GFS_METATYPE_LD);
28583 +                       gfs_desc_in(&desc, bh->b_data);
28584 +
28585 +                       brelse(bh);
28586 +
28587 +                       if (desc.ld_type != GFS_LOG_DESC_LAST) {
28588 +                               error = LO_SCAN_ELEMENTS(sdp, jdesc, gl, start,
28589 +                                                        &desc, pass);
28590 +                               if (error)
28591 +                                       return error;
28592 +
28593 +                               while (desc.ld_length--) {
28594 +                                       error = gfs_increment_blkno(sdp, jdesc, gl,
28595 +                                                                   &start, TRUE);
28596 +                                       if (error)
28597 +                                               return error;
28598 +                               }
28599 +                       } else {
28600 +                               while (desc.ld_length--) {
28601 +                                       error = gfs_increment_blkno(sdp, jdesc, gl,
28602 +                                                                   &start,
28603 +                                                                   !!desc.ld_length);
28604 +                                       if (error)
28605 +                                               return error;
28606 +                               }
28607 +
28608 +                               break;
28609 +                       }
28610 +               }
28611 +       }
28612 +
28613 +       return error;
28614 +}
28615 +
28616 +/**
28617 + * clean_journal - mark a dirty journal as being clean
28618 + * @sdp: the filesystem
28619 + * @jdesc: the journal
28620 + * @gl: the journal's glock
28621 + * @head: the head journal to start from
28622 + *
28623 + * Returns: 0 on success, -EXXX on failure
28624 + */
28625 +
28626 +static int
28627 +clean_journal(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28628 +             struct gfs_glock *gl, struct gfs_log_header *head)
28629 +{
28630 +       struct gfs_log_header lh;
28631 +       struct gfs_log_descriptor desc;
28632 +       struct buffer_head *bh;
28633 +       uint32_t seg;
28634 +       uint64_t blkno;
28635 +       int error;
28636 +
28637 +       seg = bn2seg(head->lh_first);
28638 +
28639 +       for (;;) {
28640 +               if (++seg == jdesc->ji_nsegment)
28641 +                       seg = 0;
28642 +
28643 +               error = get_log_header(sdp, jdesc, gl, seg, &lh);
28644 +               if (error < 0)
28645 +                       return error;
28646 +
28647 +               /* Rewrite corrupt header blocks */
28648 +
28649 +               if (error == 1) {
28650 +                       bh = gfs_dgetblk(sdp, seg2bn(seg), gl);
28651 +
28652 +                       gfs_prep_new_buffer(bh);
28653 +                       gfs_buffer_clear(bh);
28654 +                       gfs_log_header_out(head, bh->b_data);
28655 +                       gfs_log_header_out(head,
28656 +                                          bh->b_data + GFS_BASIC_BLOCK -
28657 +                                          sizeof(struct gfs_log_header));
28658 +
28659 +                       error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
28660 +                       brelse(bh);
28661 +                       if (error)
28662 +                               return error;
28663 +               }
28664 +
28665 +               /* Stop when we get to the end of the log. */
28666 +
28667 +               if (lh.lh_sequence < head->lh_sequence)
28668 +                       break;
28669 +       }
28670 +
28671 +       /*  Build a "last" descriptor for the transaction we are
28672 +          about to commit by writing the shutdown header.  */
28673 +
28674 +       memset(&desc, 0, sizeof(struct gfs_log_descriptor));
28675 +       desc.ld_header.mh_magic = GFS_MAGIC;
28676 +       desc.ld_header.mh_type = GFS_METATYPE_LD;
28677 +       desc.ld_header.mh_format = GFS_FORMAT_LD;
28678 +       desc.ld_type = GFS_LOG_DESC_LAST;
28679 +       desc.ld_length = 0;
28680 +
28681 +       for (blkno = head->lh_first + 1; blkno != seg2bn(seg);) {
28682 +               if (do_mod(blkno, sdp->sd_sb.sb_seg_size))
28683 +                       desc.ld_length++;
28684 +               if (++blkno == seg2bn(jdesc->ji_nsegment))
28685 +                       blkno -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size;
28686 +       }
28687 +
28688 +       /*  Write the descriptor  */
28689 +
28690 +       bh = gfs_dgetblk(sdp, head->lh_first + 1, gl);
28691 +
28692 +       gfs_prep_new_buffer(bh);
28693 +       gfs_buffer_clear(bh);
28694 +       gfs_desc_out(&desc, bh->b_data);
28695 +
28696 +       error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
28697 +       brelse(bh);
28698 +       if (error)
28699 +               return error;
28700 +
28701 +       /*  Build a log header that says the journal is clean  */
28702 +
28703 +       memset(&lh, 0, sizeof(struct gfs_log_header));
28704 +       lh.lh_header.mh_magic = GFS_MAGIC;
28705 +       lh.lh_header.mh_type = GFS_METATYPE_LH;
28706 +       lh.lh_header.mh_format = GFS_FORMAT_LH;
28707 +       lh.lh_flags = GFS_LOG_HEAD_UNMOUNT;
28708 +       lh.lh_first = seg2bn(seg);
28709 +       lh.lh_sequence = head->lh_sequence + 1;
28710 +       /*  Don't care about tail  */
28711 +       lh.lh_last_dump = head->lh_last_dump;
28712 +
28713 +       /*  Write the header  */
28714 +
28715 +       bh = gfs_dgetblk(sdp, lh.lh_first, gl);
28716 +
28717 +       gfs_prep_new_buffer(bh);
28718 +       gfs_buffer_clear(bh);
28719 +       gfs_log_header_out(&lh, bh->b_data);
28720 +       gfs_log_header_out(&lh,
28721 +                          bh->b_data + GFS_BASIC_BLOCK -
28722 +                          sizeof(struct gfs_log_header));
28723 +
28724 +       error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
28725 +       brelse(bh);
28726 +
28727 +       return error;
28728 +}
28729 +
28730 +/**
28731 + * gfs_recover_journal - recovery a given journal
28732 + * @sdp: the filesystem
28733 + * @jid: the number of the journal to recover
28734 + * @jdesc: the struct gfs_jindex describing the journal
28735 + * @wait: Don't return until the journal is clean (or an error is encountered)
28736 + *
28737 + * Acquire a journals lock, check to see if the journal is clean, and
28738 + * do recovery if necessary.
28739 + *
28740 + * Returns: 0 on success, -EXXX on failure
28741 + */
28742 +
28743 +int
28744 +gfs_recover_journal(struct gfs_sbd *sdp,
28745 +                   unsigned int jid, struct gfs_jindex *jdesc,
28746 +                   int wait)
28747 +{
28748 +       struct gfs_log_header head;
28749 +       struct gfs_holder j_gh, t_gh;
28750 +       unsigned long t;
28751 +       int error;
28752 +
28753 +       printk("GFS: fsid=%s: jid=%u: Trying to acquire journal lock...\n",
28754 +              sdp->sd_fsname, jid);
28755 +
28756 +       /*  Aquire the journal lock so we can do recovery  */
28757 +
28758 +       error = gfs_glock_nq_num(sdp,
28759 +                                jdesc->ji_addr, &gfs_meta_glops,
28760 +                                LM_ST_EXCLUSIVE,
28761 +                                LM_FLAG_NOEXP |
28762 +                                ((wait) ? 0 : LM_FLAG_TRY) |
28763 +                                GL_NOCACHE, &j_gh);
28764 +       switch (error) {
28765 +       case 0:
28766 +               break;
28767 +
28768 +       case GLR_TRYFAILED:
28769 +               GFS_ASSERT_SBD(!wait, sdp,);
28770 +               printk("GFS: fsid=%s: jid=%u: Busy\n", sdp->sd_fsname, jid);
28771 +               error = 0;
28772 +
28773 +       default:
28774 +               goto fail;
28775 +       };
28776 +
28777 +       printk("GFS: fsid=%s: jid=%u: Looking at journal...\n",
28778 +              sdp->sd_fsname, jid);
28779 +
28780 +       error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, &head);
28781 +       if (error)
28782 +               goto fail_gunlock;
28783 +
28784 +       if (!(head.lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
28785 +               if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
28786 +                       printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
28787 +                              sdp->sd_fsname, jid);
28788 +                       error = -EROFS;
28789 +                       goto fail_gunlock;
28790 +               }
28791 +
28792 +               printk("GFS: fsid=%s: jid=%u: Acquiring the transaction lock...\n",
28793 +                      sdp->sd_fsname, jid);
28794 +
28795 +               t = jiffies;
28796 +
28797 +               /*  Acquire an exclusive hold on the transaction lock  */
28798 +
28799 +               error = gfs_glock_nq_init(sdp->sd_trans_gl,
28800 +                                         LM_ST_EXCLUSIVE,
28801 +                                         LM_FLAG_NOEXP |
28802 +                                         LM_FLAG_PRIORITY |
28803 +                                         GL_NOCACHE,
28804 +                                         &t_gh);
28805 +               if (error)
28806 +                       goto fail_gunlock;
28807 +
28808 +               if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
28809 +                       printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
28810 +                              sdp->sd_fsname, jid);
28811 +                       error = -EROFS;
28812 +                       goto fail_gunlock_tr;
28813 +               }
28814 +
28815 +               printk("GFS: fsid=%s: jid=%u: Replaying journal...\n",
28816 +                      sdp->sd_fsname, jid);
28817 +
28818 +               set_bit(GLF_DIRTY, &j_gh.gh_gl->gl_flags);
28819 +
28820 +               LO_BEFORE_SCAN(sdp, jid, &head, GFS_RECPASS_A1);
28821 +
28822 +               error = foreach_descriptor(sdp, jdesc, j_gh.gh_gl,
28823 +                                          head.lh_tail, head.lh_first,
28824 +                                          GFS_RECPASS_A1);
28825 +               if (error)
28826 +                       goto fail_gunlock_tr;
28827 +
28828 +               LO_AFTER_SCAN(sdp, jid, GFS_RECPASS_A1);
28829 +
28830 +               gfs_replay_wait(sdp);
28831 +
28832 +               error = clean_journal(sdp, jdesc, j_gh.gh_gl, &head);
28833 +               if (error)
28834 +                       goto fail_gunlock_tr;
28835 +
28836 +               gfs_glock_dq_uninit(&t_gh);
28837 +
28838 +               t = DIV_RU(jiffies - t, HZ);
28839 +
28840 +               printk("GFS: fsid=%s: jid=%u: Journal replayed in %lus\n",
28841 +                      sdp->sd_fsname, jid, t);
28842 +       }
28843 +
28844 +       sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace,
28845 +                                                   jid,
28846 +                                                   LM_RD_SUCCESS);
28847 +
28848 +       gfs_glock_dq_uninit(&j_gh);
28849 +
28850 +       printk("GFS: fsid=%s: jid=%u: Done\n", sdp->sd_fsname, jid);
28851 +
28852 +       return 0;
28853 +
28854 + fail_gunlock_tr:
28855 +       gfs_replay_wait(sdp);
28856 +       gfs_glock_dq_uninit(&t_gh);
28857 +
28858 + fail_gunlock:
28859 +       gfs_glock_dq_uninit(&j_gh);
28860 +
28861 +       printk("GFS: fsid=%s: jid=%u: %s\n",
28862 +              sdp->sd_fsname, jid, (error) ? "Failed" : "Done");
28863 +
28864 + fail:
28865 +       sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace,
28866 +                                                   jid,
28867 +                                                   LM_RD_GAVEUP);
28868 +
28869 +       return error;
28870 +}
28871 +
28872 +/**
28873 + * gfs_check_journals - Recovery any dirty journals
28874 + * @sdp: the filesystem
28875 + *
28876 + */
28877 +
28878 +void
28879 +gfs_check_journals(struct gfs_sbd *sdp)
28880 +{
28881 +       struct dirty_j *dj;
28882 +
28883 +       for (;;) {
28884 +               dj = get_dirty_j(sdp);
28885 +               if (!dj)
28886 +                       break;
28887 +
28888 +               down(&sdp->sd_jindex_lock);
28889 +
28890 +               if (dj->dj_jid != sdp->sd_lockstruct.ls_jid &&
28891 +                   dj->dj_jid < sdp->sd_journals) {
28892 +                       memcpy(&dj->dj_desc,
28893 +                              sdp->sd_jindex + dj->dj_jid,
28894 +                              sizeof(struct gfs_jindex));
28895 +                       up(&sdp->sd_jindex_lock);
28896 +
28897 +                       gfs_recover_journal(sdp,
28898 +                                           dj->dj_jid, &dj->dj_desc,
28899 +                                           FALSE);
28900 +
28901 +               } else {
28902 +                       up(&sdp->sd_jindex_lock);
28903 +                       sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace,
28904 +                                                                   dj->dj_jid, LM_RD_GAVEUP);
28905 +               }
28906 +
28907 +               kfree(dj);
28908 +       }
28909 +}
28910 +
28911 +/**
28912 + * gfs_recover_dump - recover the log elements in this machine's journal
28913 + * @sdp: the filesystem
28914 + *
28915 + * Returns: 0 on success, -EXXX on failure
28916 + */
28917 +
28918 +int
28919 +gfs_recover_dump(struct gfs_sbd *sdp)
28920 +{
28921 +       struct gfs_log_header head;
28922 +       int error;
28923 +
28924 +       error = gfs_find_jhead(sdp, &sdp->sd_jdesc, sdp->sd_journal_gh.gh_gl,
28925 +                              &head);
28926 +       if (error)
28927 +               goto fail;
28928 +
28929 +       GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,);
28930 +       if (!head.lh_last_dump)
28931 +               return error;
28932 +
28933 +       printk("GFS: fsid=%s: Scanning for log elements...\n",
28934 +              sdp->sd_fsname);
28935 +
28936 +       LO_BEFORE_SCAN(sdp, sdp->sd_lockstruct.ls_jid, &head, GFS_RECPASS_B1);
28937 +
28938 +       error = foreach_descriptor(sdp, &sdp->sd_jdesc, sdp->sd_journal_gh.gh_gl,
28939 +                                  head.lh_last_dump, head.lh_first,
28940 +                                  GFS_RECPASS_B1);
28941 +       if (error)
28942 +               goto fail;
28943 +
28944 +       LO_AFTER_SCAN(sdp, sdp->sd_lockstruct.ls_jid, GFS_RECPASS_B1);
28945 +
28946 +       /* We need to make sure if we crash during the next log dump that
28947 +          all intermediate headers in the transaction point to the last
28948 +          log dump before the one we're making so we don't lose it. */
28949 +
28950 +       sdp->sd_log_dump_last = head.lh_last_dump;
28951 +
28952 +       printk("GFS: fsid=%s: Done\n", sdp->sd_fsname);
28953 +
28954 +       return 0;
28955 +
28956 + fail:
28957 +       printk("GFS: fsid=%s: Failed\n", sdp->sd_fsname);
28958 +
28959 +       return error;
28960 +}
28961 diff -urN linux-orig/fs/gfs/recovery.h linux-patched/fs/gfs/recovery.h
28962 --- linux-orig/fs/gfs/recovery.h        1969-12-31 18:00:00.000000000 -0600
28963 +++ linux-patched/fs/gfs/recovery.h     2004-06-30 13:27:49.357707883 -0500
28964 @@ -0,0 +1,36 @@
28965 +/******************************************************************************
28966 +*******************************************************************************
28967 +**
28968 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
28969 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
28970 +**
28971 +**  This copyrighted material is made available to anyone wishing to use,
28972 +**  modify, copy, or redistribute it subject to the terms and conditions
28973 +**  of the GNU General Public License v.2.
28974 +**
28975 +*******************************************************************************
28976 +******************************************************************************/
28977 +
28978 +#ifndef __RECOVERY_DOT_H__
28979 +#define __RECOVERY_DOT_H__
28980 +
28981 +#define GFS_RECPASS_A1  (12)
28982 +#define GFS_RECPASS_B1  (14)
28983 +
28984 +void gfs_add_dirty_j(struct gfs_sbd *sdp, unsigned int jid);
28985 +void gfs_clear_dirty_j(struct gfs_sbd *sdp);
28986 +
28987 +int gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28988 +                  struct gfs_glock *gl, struct gfs_log_header *head);
28989 +int gfs_increment_blkno(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28990 +                       struct gfs_glock *gl, uint64_t *addr,
28991 +                       int skip_headers);
28992 +
28993 +int gfs_recover_journal(struct gfs_sbd *sdp,
28994 +                       unsigned int jid, struct gfs_jindex *jdesc,
28995 +                       int wait);
28996 +void gfs_check_journals(struct gfs_sbd *sdp);
28997 +
28998 +int gfs_recover_dump(struct gfs_sbd *sdp);
28999 +
29000 +#endif /* __RECOVERY_DOT_H__ */
29001 diff -urN linux-orig/fs/gfs/rgrp.c linux-patched/fs/gfs/rgrp.c
29002 --- linux-orig/fs/gfs/rgrp.c    1969-12-31 18:00:00.000000000 -0600
29003 +++ linux-patched/fs/gfs/rgrp.c 2004-06-30 13:27:49.358707651 -0500
29004 @@ -0,0 +1,1932 @@
29005 +/******************************************************************************
29006 +*******************************************************************************
29007 +**
29008 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
29009 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
29010 +**
29011 +**  This copyrighted material is made available to anyone wishing to use,
29012 +**  modify, copy, or redistribute it subject to the terms and conditions
29013 +**  of the GNU General Public License v.2.
29014 +**
29015 +*******************************************************************************
29016 +******************************************************************************/
29017 +
29018 +#include <linux/sched.h>
29019 +#include <linux/slab.h>
29020 +#include <linux/smp_lock.h>
29021 +#include <linux/spinlock.h>
29022 +#include <asm/semaphore.h>
29023 +#include <linux/completion.h>
29024 +#include <linux/buffer_head.h>
29025 +
29026 +#include "gfs.h"
29027 +#include "bits.h"
29028 +#include "dio.h"
29029 +#include "file.h"
29030 +#include "glock.h"
29031 +#include "glops.h"
29032 +#include "rgrp.h"
29033 +#include "super.h"
29034 +#include "trans.h"
29035 +
29036 +/**
29037 + * mhc_hash: find the mhc hash bucket for a buffer
29038 + * @bh: the buffer
29039 + *
29040 + * Returns: The bucket number
29041 + */
29042 +
29043 +static unsigned int
29044 +mhc_hash(struct buffer_head *bh)
29045 +{
29046 +       uint64_t blkno;
29047 +       unsigned int h;
29048 +
29049 +       blkno = bh->b_blocknr;
29050 +       h = gfs_hash(&blkno, sizeof(uint64_t)) & GFS_MHC_HASH_MASK;
29051 +
29052 +       return h;
29053 +}
29054 +
29055 +/**
29056 + * mhc_trim -
29057 + * @sdp:
29058 + * @max:
29059 + *
29060 + */
29061 +
29062 +static void
29063 +mhc_trim(struct gfs_sbd *sdp, unsigned int max)
29064 +{
29065 +       struct gfs_meta_header_cache *mc;
29066 +
29067 +       for (;;) {
29068 +               spin_lock(&sdp->sd_mhc_lock);
29069 +               if (list_empty(&sdp->sd_mhc_single)) {
29070 +                       spin_unlock(&sdp->sd_mhc_lock);
29071 +                       return;
29072 +               } else {
29073 +                       mc = list_entry(sdp->sd_mhc_single.prev,
29074 +                                       struct gfs_meta_header_cache,
29075 +                                       mc_list_single);
29076 +                       list_del(&mc->mc_list_hash);
29077 +                       list_del(&mc->mc_list_single);
29078 +                       list_del(&mc->mc_list_rgd);
29079 +                       spin_unlock(&sdp->sd_mhc_lock);
29080 +
29081 +                       kmem_cache_free(gfs_mhc_cachep, mc);
29082 +                       atomic_dec(&sdp->sd_mhc_count);
29083 +
29084 +                       if (atomic_read(&sdp->sd_mhc_count) <= max)
29085 +                               return;
29086 +               }
29087 +       }
29088 +}
29089 +
29090 +/**
29091 + * gfs_mhc_add - add buffers to the cache of metadata
29092 + * @rgd: a RG
29093 + * @bh: an array of buffers
29094 + * @num: the number of buffers in the array
29095 + *
29096 + */
29097 +
29098 +void
29099 +gfs_mhc_add(struct gfs_rgrpd *rgd,
29100 +           struct buffer_head **bh, unsigned int num)
29101 +{
29102 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29103 +       struct gfs_meta_header_cache *mc;
29104 +       unsigned int x;
29105 +       uint64_t gen;
29106 +       struct list_head *head;
29107 +
29108 +       for (x = 0; x < num; x++) {
29109 +               gfs_meta_check(sdp, bh[x]);
29110 +
29111 +               RETRY_MALLOC(mc = kmem_cache_alloc(gfs_mhc_cachep, GFP_KERNEL), mc);
29112 +               memset(mc, 0, sizeof(struct gfs_meta_header_cache));
29113 +
29114 +               mc->mc_block = bh[x]->b_blocknr;
29115 +               memcpy(&mc->mc_mh, bh[x]->b_data,
29116 +                      sizeof(struct gfs_meta_header));
29117 +
29118 +               gen = gfs64_to_cpu(mc->mc_mh.mh_generation) + 2;
29119 +               mc->mc_mh.mh_generation = cpu_to_gfs64(gen);
29120 +
29121 +               head = &sdp->sd_mhc[mhc_hash(bh[x])];
29122 +
29123 +               spin_lock(&sdp->sd_mhc_lock);
29124 +               list_add(&mc->mc_list_hash, head);
29125 +               list_add(&mc->mc_list_single, &sdp->sd_mhc_single);
29126 +               list_add(&mc->mc_list_rgd, &rgd->rd_mhc);
29127 +               spin_unlock(&sdp->sd_mhc_lock);
29128 +
29129 +               atomic_inc(&sdp->sd_mhc_count);
29130 +       }
29131 +
29132 +       if (atomic_read(&sdp->sd_mhc_count) > sdp->sd_tune.gt_max_mhc)
29133 +               mhc_trim(sdp, sdp->sd_tune.gt_max_mhc);
29134 +}
29135 +
29136 +/**
29137 + * gfs_mhc_fish - Try to fill in a buffer with data from the cache
29138 + * @sdp: the filesystem
29139 + * @bh: the buffer to fill in
29140 + *
29141 + * Returns: TRUE if the buffer was cached, FALSE otherwise
29142 + */
29143 +
29144 +int
29145 +gfs_mhc_fish(struct gfs_sbd *sdp, struct buffer_head *bh)
29146 +{
29147 +       struct list_head *tmp, *head;
29148 +       struct gfs_meta_header_cache *mc;
29149 +
29150 +       head = &sdp->sd_mhc[mhc_hash(bh)];
29151 +
29152 +       spin_lock(&sdp->sd_mhc_lock);
29153 +
29154 +       for (tmp = head->next;
29155 +            tmp != head;
29156 +            tmp = tmp->next) {
29157 +               mc = list_entry(tmp, struct gfs_meta_header_cache, mc_list_hash);
29158 +               if (mc->mc_block != bh->b_blocknr)
29159 +                       continue;
29160 +
29161 +               list_del(&mc->mc_list_hash);
29162 +               list_del(&mc->mc_list_single);
29163 +               list_del(&mc->mc_list_rgd);
29164 +               spin_unlock(&sdp->sd_mhc_lock);
29165 +
29166 +               gfs_prep_new_buffer(bh);
29167 +               memcpy(bh->b_data, &mc->mc_mh,
29168 +                      sizeof(struct gfs_meta_header));
29169 +
29170 +               kmem_cache_free(gfs_mhc_cachep, mc);
29171 +               atomic_dec(&sdp->sd_mhc_count);
29172 +
29173 +               return TRUE;
29174 +       }
29175 +
29176 +       spin_unlock(&sdp->sd_mhc_lock);
29177 +
29178 +       return FALSE;
29179 +}
29180 +
29181 +/**
29182 + * gfs_mhc_zap - Get rid of the data in the cache of metadata headers
29183 + * @rgd: a RG
29184 + *
29185 + */
29186 +
29187 +void
29188 +gfs_mhc_zap(struct gfs_rgrpd *rgd)
29189 +{
29190 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29191 +       struct gfs_meta_header_cache *mc;
29192 +
29193 +       spin_lock(&sdp->sd_mhc_lock);
29194 +
29195 +       while (!list_empty(&rgd->rd_mhc)) {
29196 +               mc = list_entry(rgd->rd_mhc.next,
29197 +                               struct gfs_meta_header_cache,
29198 +                               mc_list_rgd);
29199 +
29200 +               list_del(&mc->mc_list_hash);
29201 +               list_del(&mc->mc_list_single);
29202 +               list_del(&mc->mc_list_rgd);
29203 +               spin_unlock(&sdp->sd_mhc_lock);
29204 +
29205 +               kmem_cache_free(gfs_mhc_cachep, mc);
29206 +               atomic_dec(&sdp->sd_mhc_count);
29207 +
29208 +               spin_lock(&sdp->sd_mhc_lock);
29209 +       }
29210 +
29211 +       spin_unlock(&sdp->sd_mhc_lock);
29212 +}
29213 +
29214 +/**
29215 + * depend_hash() - Turn glock number into hash bucket number
29216 + * @formal_ino:
29217 + *
29218 + * Returns: The number of the corresponding hash bucket
29219 + */
29220 +
29221 +static unsigned int
29222 +depend_hash(uint64_t formal_ino)
29223 +{
29224 +       unsigned int h;
29225 +
29226 +       h = gfs_hash(&formal_ino, sizeof(uint64_t));
29227 +       h &= GFS_DEPEND_HASH_MASK;
29228 +
29229 +       return h;
29230 +}
29231 +
29232 +/**
29233 + * depend_sync_one -
29234 + * @sdp:
29235 + * @gd:
29236 + *
29237 + */
29238 +
29239 +static void
29240 +depend_sync_one(struct gfs_sbd *sdp, struct gfs_depend *gd)
29241 +{
29242 +       struct gfs_glock *gl;
29243 +
29244 +       spin_lock(&sdp->sd_depend_lock);
29245 +       list_del(&gd->gd_list_hash);
29246 +       spin_unlock(&sdp->sd_depend_lock);
29247 +       list_del(&gd->gd_list_rgd);
29248 +
29249 +       gl = gfs_glock_find(sdp,
29250 +                           &(struct lm_lockname){gd->gd_formal_ino,
29251 +                                                 LM_TYPE_INODE});
29252 +       if (gl) {
29253 +               if (gl->gl_ops->go_sync)
29254 +                       gl->gl_ops->go_sync(gl,
29255 +                                           DIO_METADATA |
29256 +                                           DIO_INVISIBLE);
29257 +               gfs_glock_put(gl);
29258 +       }
29259 +
29260 +       kfree(gd);
29261 +       atomic_dec(&sdp->sd_depend_count);
29262 +}
29263 +
29264 +/**
29265 + * depend_sync_old -
29266 + * @rgd:
29267 + *
29268 + */
29269 +
29270 +static void
29271 +depend_sync_old(struct gfs_rgrpd *rgd)
29272 +{
29273 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29274 +       struct gfs_depend *gd;
29275 +
29276 +       for (;;) {
29277 +               gd = list_entry(rgd->rd_depend.prev,
29278 +                               struct gfs_depend,
29279 +                               gd_list_rgd);
29280 +
29281 +               if (time_before(jiffies,
29282 +                               gd->gd_time +
29283 +                               sdp->sd_tune.gt_depend_secs * HZ))
29284 +                       return;
29285 +
29286 +               depend_sync_one(sdp, gd);
29287 +       }
29288 +}
29289 +
29290 +/**
29291 + * gfs_depend_add -
29292 + * @rgd:
29293 + * @formal_ino:
29294 + *
29295 + */
29296 +
29297 +void
29298 +gfs_depend_add(struct gfs_rgrpd *rgd, uint64_t formal_ino)
29299 +{
29300 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29301 +       struct list_head *head, *tmp;
29302 +       struct gfs_depend *gd;
29303 +
29304 +       head = &sdp->sd_depend[depend_hash(formal_ino)];
29305 +
29306 +       spin_lock(&sdp->sd_depend_lock);
29307 +
29308 +       for (tmp = head->next;
29309 +            tmp != head;
29310 +            tmp = tmp->next) {
29311 +               gd = list_entry(tmp, struct gfs_depend, gd_list_hash);
29312 +               if (gd->gd_rgd == rgd &&
29313 +                   gd->gd_formal_ino == formal_ino) {
29314 +                       list_move(&gd->gd_list_hash, head);
29315 +                       spin_unlock(&sdp->sd_depend_lock);
29316 +                       list_move(&gd->gd_list_rgd, &rgd->rd_depend);
29317 +                       gd->gd_time = jiffies;
29318 +                       return;
29319 +               }
29320 +       }
29321 +
29322 +       spin_unlock(&sdp->sd_depend_lock);
29323 +
29324 +       gd = gmalloc(sizeof(struct gfs_depend));
29325 +       memset(gd, 0, sizeof(struct gfs_depend));
29326 +
29327 +       gd->gd_rgd = rgd;
29328 +       gd->gd_formal_ino = formal_ino;
29329 +       gd->gd_time = jiffies;
29330 +
29331 +       spin_lock(&sdp->sd_depend_lock);
29332 +       list_add(&gd->gd_list_hash, head);
29333 +       spin_unlock(&sdp->sd_depend_lock);
29334 +       list_add(&gd->gd_list_rgd, &rgd->rd_depend);
29335 +
29336 +       atomic_inc(&sdp->sd_depend_count);
29337 +
29338 +       depend_sync_old(rgd);
29339 +}
29340 +
29341 +/**
29342 + * gfs_depend_sync -
29343 + * @rgd:
29344 + *
29345 + */
29346 +
29347 +void
29348 +gfs_depend_sync(struct gfs_rgrpd *rgd)
29349 +{
29350 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29351 +       struct gfs_depend *gd;
29352 +
29353 +       while (!list_empty(&rgd->rd_depend)) {
29354 +               gd = list_entry(rgd->rd_depend.next,
29355 +                               struct gfs_depend,
29356 +                               gd_list_rgd);
29357 +               depend_sync_one(sdp, gd);
29358 +       }
29359 +}
29360 +
29361 +/**
29362 + * rgrp_verify - Verify that a resource group is consistent
29363 + * @sdp: the filesystem
29364 + * @rgd: the rgrp
29365 + *
29366 + * Somebody should have already called gfs_glock_rg() on this RG.
29367 + */
29368 +
29369 +static void
29370 +rgrp_verify(struct gfs_rgrpd *rgd)
29371 +{
29372 +       struct gfs_bitmap *bits = NULL;
29373 +       uint32_t length = rgd->rd_ri.ri_length;
29374 +       uint32_t count[4], tmp;
29375 +       int buf, x;
29376 +
29377 +       memset(count, 0, 4 * sizeof(uint32_t));
29378 +
29379 +       for (buf = 0; buf < length; buf++) {
29380 +               bits = &rgd->rd_bits[buf];
29381 +               for (x = 0; x < 4; x++)
29382 +                       count[x] += gfs_bitcount(rgd,
29383 +                                                rgd->rd_bh[buf]->b_data +
29384 +                                                bits->bi_offset,
29385 +                                                bits->bi_len, x);
29386 +       }
29387 +
29388 +       GFS_ASSERT_RGRPD(count[0] == rgd->rd_rg.rg_free, rgd,
29389 +                        printk("free data mismatch:  %u != %u\n",
29390 +                               count[0], rgd->rd_rg.rg_free););
29391 +
29392 +       tmp = rgd->rd_ri.ri_data -
29393 +               (rgd->rd_rg.rg_usedmeta + rgd->rd_rg.rg_freemeta) -
29394 +               (rgd->rd_rg.rg_useddi + rgd->rd_rg.rg_freedi) -
29395 +               rgd->rd_rg.rg_free;
29396 +       GFS_ASSERT_RGRPD(count[1] == tmp, rgd,
29397 +                        printk("used data mismatch:  %u != %u\n",
29398 +                               count[1], tmp););
29399 +
29400 +       GFS_ASSERT_RGRPD(count[2] == rgd->rd_rg.rg_freemeta, rgd,
29401 +                        printk("free metadata mismatch:  %u != %u\n",
29402 +                               count[2], rgd->rd_rg.rg_freemeta););
29403 +
29404 +       tmp = rgd->rd_rg.rg_usedmeta +
29405 +               (rgd->rd_rg.rg_useddi + rgd->rd_rg.rg_freedi);
29406 +       GFS_ASSERT_RGRPD(count[3] == tmp, rgd,
29407 +                        printk("used metadata mismatch:  %u != %u\n",
29408 +                               count[3], tmp););
29409 +}
29410 +
29411 +/**
29412 + * gfs_blk2rgrpd - Find resource group for a given data block number
29413 + * @sdp: The GFS superblock
29414 + * @n: The data block number
29415 + *
29416 + * Returns: Ths resource group, or NULL if not found
29417 + */
29418 +
29419 +struct gfs_rgrpd *
29420 +gfs_blk2rgrpd(struct gfs_sbd *sdp, uint64_t blk)
29421 +{
29422 +       struct list_head *tmp, *head;
29423 +       struct gfs_rgrpd *rgd = NULL;
29424 +       struct gfs_rindex *ri;
29425 +
29426 +       spin_lock(&sdp->sd_rg_mru_lock);
29427 +
29428 +       for (head = &sdp->sd_rg_mru_list, tmp = head->next;
29429 +            tmp != head;
29430 +            tmp = tmp->next) {
29431 +               rgd = list_entry(tmp, struct gfs_rgrpd, rd_list_mru);
29432 +               ri = &rgd->rd_ri;
29433 +
29434 +               if (ri->ri_data1 <= blk && blk < ri->ri_data1 + ri->ri_data) {
29435 +                       list_move(&rgd->rd_list_mru, &sdp->sd_rg_mru_list);
29436 +                       spin_unlock(&sdp->sd_rg_mru_lock);
29437 +                       return rgd;
29438 +               }
29439 +       }
29440 +
29441 +       spin_unlock(&sdp->sd_rg_mru_lock);
29442 +
29443 +       return NULL;
29444 +}
29445 +
29446 +/**
29447 + * gfs_rgrpd_get_first - get the first RG
29448 + * @sdp: The GFS superblock
29449 + *
29450 + * Returns: The first rgrp in the filesystem
29451 + */
29452 +
29453 +struct gfs_rgrpd *
29454 +gfs_rgrpd_get_first(struct gfs_sbd *sdp)
29455 +{
29456 +       GFS_ASSERT_SBD(!list_empty(&sdp->sd_rglist), sdp,);
29457 +       return list_entry(sdp->sd_rglist.next, struct gfs_rgrpd, rd_list);
29458 +}
29459 +
29460 +/**
29461 + * gfs_rgrpd_get_next - get the next RG
29462 + * @rgd: A RG
29463 + *
29464 + * Returns: The next rgrp
29465 + */
29466 +
29467 +struct gfs_rgrpd *
29468 +gfs_rgrpd_get_next(struct gfs_rgrpd *rgd)
29469 +{
29470 +       if (rgd->rd_list.next == &rgd->rd_sbd->sd_rglist)
29471 +               return NULL;
29472 +       return list_entry(rgd->rd_list.next, struct gfs_rgrpd, rd_list);
29473 +}
29474 +
29475 +/**
29476 + * clear_rgrpdi - Clear up rgrps
29477 + * @sdp: The GFS superblock
29478 + *
29479 + */
29480 +
29481 +void
29482 +clear_rgrpdi(struct gfs_sbd *sdp)
29483 +{
29484 +       struct gfs_rgrpd *rgd;
29485 +       struct gfs_glock *gl;
29486 +
29487 +       sdp->sd_rg_forward = NULL;
29488 +
29489 +       while (!list_empty(&sdp->sd_rg_recent)) {
29490 +               rgd = list_entry(sdp->sd_rg_recent.next,
29491 +                                struct gfs_rgrpd, rd_recent);
29492 +               list_del(&rgd->rd_recent);
29493 +       }
29494 +
29495 +       while (!list_empty(&sdp->sd_rglist)) {
29496 +               rgd = list_entry(sdp->sd_rglist.next,
29497 +                                struct gfs_rgrpd, rd_list);
29498 +               gl = rgd->rd_gl;
29499 +
29500 +               list_del(&rgd->rd_list);
29501 +               list_del(&rgd->rd_list_mru);
29502 +
29503 +               if (gl) {
29504 +                       gfs_glock_force_drop(gl);
29505 +                       if (atomic_read(&gl->gl_lvb_count))
29506 +                               gfs_lvb_unhold(gl);
29507 +                       gl2rgd(gl) = NULL;
29508 +                       gfs_glock_put(gl);
29509 +               }
29510 +
29511 +               if (rgd->rd_bits)
29512 +                       kfree(rgd->rd_bits);
29513 +               if (rgd->rd_bh)
29514 +                       kfree(rgd->rd_bh);
29515 +
29516 +               kfree(rgd);
29517 +       }
29518 +}
29519 +
29520 +/**
29521 + * gfs_clear_rgrpd - Clear up rgrps
29522 + * @sdp: The GFS superblock
29523 + *
29524 + */
29525 +
29526 +void
29527 +gfs_clear_rgrpd(struct gfs_sbd *sdp)
29528 +{
29529 +       down(&sdp->sd_rindex_lock);
29530 +       clear_rgrpdi(sdp);
29531 +       up(&sdp->sd_rindex_lock);
29532 +}
29533 +
29534 +/**
29535 + * gfs_compute_bitstructs - Compute the bitmap sizes
29536 + * @rgd: The resource group descriptor
29537 + *
29538 + */
29539 +
29540 +static void
29541 +compute_bitstructs(struct gfs_rgrpd *rgd)
29542 +{
29543 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29544 +       struct gfs_bitmap *bits;
29545 +       uint32_t length = rgd->rd_ri.ri_length;
29546 +       uint32_t bytes_left, bytes;
29547 +       int x;
29548 +
29549 +       rgd->rd_bits = gmalloc(length * sizeof(struct gfs_bitmap));
29550 +       memset(rgd->rd_bits, 0, length * sizeof(struct gfs_bitmap));
29551 +
29552 +       bytes_left = rgd->rd_ri.ri_bitbytes;
29553 +
29554 +       for (x = 0; x < length; x++) {
29555 +               bits = &rgd->rd_bits[x];
29556 +
29557 +               if (length == 1) {
29558 +                       bytes = bytes_left;
29559 +                       bits->bi_offset = sizeof(struct gfs_rgrp);
29560 +                       bits->bi_start = 0;
29561 +                       bits->bi_len = bytes;
29562 +               } else if (x == 0) {
29563 +                       bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_rgrp);
29564 +                       bits->bi_offset = sizeof(struct gfs_rgrp);
29565 +                       bits->bi_start = 0;
29566 +                       bits->bi_len = bytes;
29567 +               } else if (x + 1 == length) {
29568 +                       bytes = bytes_left;
29569 +                       bits->bi_offset = sizeof(struct gfs_meta_header);
29570 +                       bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
29571 +                       bits->bi_len = bytes;
29572 +               } else {
29573 +                       bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
29574 +                       bits->bi_offset = sizeof(struct gfs_meta_header);
29575 +                       bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
29576 +                       bits->bi_len = bytes;
29577 +               }
29578 +
29579 +               bytes_left -= bytes;
29580 +       }
29581 +
29582 +       GFS_ASSERT_RGRPD(!bytes_left, rgd,);
29583 +       GFS_ASSERT_RGRPD((rgd->rd_bits[length - 1].bi_start +
29584 +                         rgd->rd_bits[length - 1].bi_len) * GFS_NBBY ==
29585 +                        rgd->rd_ri.ri_data, rgd,
29586 +                        printk("start=%u len=%u offset=%u\n",
29587 +                               rgd->rd_bits[length - 1].bi_start,
29588 +                               rgd->rd_bits[length - 1].bi_len,
29589 +                               rgd->rd_bits[length - 1].bi_offset);
29590 +                        gfs_rindex_print(&rgd->rd_ri););
29591 +
29592 +       rgd->rd_bh = gmalloc(length * sizeof(struct buffer_head *));
29593 +       memset(rgd->rd_bh, 0, length * sizeof(struct buffer_head *));
29594 +}
29595 +
29596 +/**
29597 + * gfs_ri_update - Pull in a new resource index from the disk
29598 + * @gl: The glock covering the rindex inode
29599 + *
29600 + * Returns: 0 on successful update, error code otherwise
29601 + */
29602 +
29603 +static int
29604 +gfs_ri_update(struct gfs_inode *ip)
29605 +{
29606 +       struct gfs_sbd *sdp = ip->i_sbd;
29607 +       struct gfs_rgrpd *rgd;
29608 +       char buf[sizeof(struct gfs_rindex)];
29609 +       int error;
29610 +
29611 +       GFS_ASSERT_SBD(!do_mod(ip->i_di.di_size, sizeof(struct gfs_rindex)),
29612 +                      sdp,);
29613 +
29614 +       clear_rgrpdi(sdp);
29615 +
29616 +       for (sdp->sd_rgcount = 0;; sdp->sd_rgcount++) {
29617 +               error = gfs_internal_read(ip, buf,
29618 +                                         sdp->sd_rgcount *
29619 +                                         sizeof(struct gfs_rindex),
29620 +                                         sizeof(struct gfs_rindex));
29621 +               if (!error)
29622 +                       break;
29623 +               if (error != sizeof(struct gfs_rindex)) {
29624 +                       if (error > 0)
29625 +                               error = -EIO;
29626 +                       goto fail;
29627 +               }
29628 +
29629 +               rgd = gmalloc(sizeof(struct gfs_rgrpd));
29630 +               memset(rgd, 0, sizeof(struct gfs_rgrpd));
29631 +
29632 +               INIT_LIST_HEAD(&rgd->rd_mhc);
29633 +               INIT_LIST_HEAD(&rgd->rd_depend);
29634 +               rgd->rd_sbd = sdp;
29635 +
29636 +               list_add_tail(&rgd->rd_list, &sdp->sd_rglist);
29637 +               list_add_tail(&rgd->rd_list_mru, &sdp->sd_rg_mru_list);
29638 +
29639 +               gfs_rindex_in(&rgd->rd_ri, buf);
29640 +
29641 +               compute_bitstructs(rgd);
29642 +
29643 +               error = gfs_glock_get(sdp, rgd->rd_ri.ri_addr, &gfs_rgrp_glops,
29644 +                                     CREATE, &rgd->rd_gl);
29645 +               if (error)
29646 +                       goto fail;
29647 +
29648 +               error = gfs_lvb_hold(rgd->rd_gl);
29649 +               if (error)
29650 +                       goto fail;
29651 +
29652 +               gl2rgd(rgd->rd_gl) = rgd;
29653 +               rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
29654 +       }
29655 +
29656 +       sdp->sd_riinode_vn = ip->i_gl->gl_vn;
29657 +
29658 +       return 0;
29659 +
29660 + fail:
29661 +       clear_rgrpdi(sdp);
29662 +
29663 +       return error;
29664 +}
29665 +
29666 +/**
29667 + * gfs_rindex_hold - Grab a lock on the rindex
29668 + * @sdp: The GFS superblock
29669 + * @ri_gh: the glock holder
29670 + *
29671 + * We grab a lock in the rindex inode to make sure that it doesn't
29672 + * change whilst we are performing an operation. We keep this lock
29673 + * for quite long periods of time compared to other locks. This
29674 + * doesn't matter, since its shared and it is very, very rarely
29675 + * accessed in the exclusive mode.
29676 + *
29677 + * Returns: 0 on success, error code otherwise
29678 + */
29679 +
29680 +int
29681 +gfs_rindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ri_gh)
29682 +{
29683 +       struct gfs_inode *ip = sdp->sd_riinode;
29684 +       struct gfs_glock *gl = ip->i_gl;
29685 +       int error;
29686 +
29687 +       error = gfs_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
29688 +       if (error)
29689 +               return error;
29690 +
29691 +       if (sdp->sd_riinode_vn != gl->gl_vn) {
29692 +               down(&sdp->sd_rindex_lock);
29693 +               if (sdp->sd_riinode_vn != gl->gl_vn) {
29694 +                       error = gfs_ri_update(ip);
29695 +                       if (error)
29696 +                               gfs_glock_dq_uninit(ri_gh);
29697 +               }
29698 +               up(&sdp->sd_rindex_lock);
29699 +       }
29700 +
29701 +       return error;
29702 +}
29703 +
29704 +/**
29705 + * gfs_rgrp_read - Read in a RG's bitmaps
29706 + * @rgd: the struct gfs_rgrpd describing the RG to read in
29707 + *
29708 + * Read in RG bitmaps.  Must call gfs_rgrp_relse() it free the bitmaps.
29709 + *
29710 + * Returns: 0 on success, -EXXX on failure
29711 + */
29712 +
29713 +int
29714 +gfs_rgrp_read(struct gfs_rgrpd *rgd)
29715 +{
29716 +       struct gfs_sbd *sdp = rgd->rd_sbd;
29717 +       struct gfs_glock *gl = rgd->rd_gl;
29718 +       unsigned int x, length = rgd->rd_ri.ri_length;
29719 +       int error;
29720 +
29721 +       for (x = 0; x < length; x++) {
29722 +               GFS_ASSERT_RGRPD(!rgd->rd_bh[x], rgd,);
29723 +               rgd->rd_bh[x] = gfs_dgetblk(sdp, rgd->rd_ri.ri_addr + x, gl);
29724 +       }
29725 +
29726 +       for (x = 0; x < length; x++) {
29727 +               error = gfs_dreread(sdp, rgd->rd_bh[x], DIO_START);
29728 +               if (error)
29729 +                       goto fail;
29730 +       }
29731 +
29732 +       for (x = length; x--;) {
29733 +               error = gfs_dreread(sdp, rgd->rd_bh[x], DIO_WAIT);
29734 +               if (error)
29735 +                       goto fail;
29736 +               gfs_metatype_check(sdp, rgd->rd_bh[x],
29737 +                                  (x) ? GFS_METATYPE_RB : GFS_METATYPE_RG);
29738 +       }
29739 +
29740 +       if (rgd->rd_rg_vn != gl->gl_vn) {
29741 +               gfs_rgrp_in(&rgd->rd_rg, (rgd->rd_bh[0])->b_data);
29742 +               rgd->rd_rg_vn = gl->gl_vn;
29743 +       }
29744 +
29745 +       return 0;
29746 +
29747 + fail:
29748 +       for (x = 0; x < length; x++) {
29749 +               brelse(rgd->rd_bh[x]);
29750 +               rgd->rd_bh[x] = NULL;
29751 +       }
29752 +
29753 +       return error;
29754 +}
29755 +
29756 +/**
29757 + * gfs_rgrp_relse - Release RG bitmaps read in with gfs_rgrp_read()
29758 + * @rgd: the struct gfs_rgrpd describing the RG to read in
29759 + *
29760 + */
29761 +
29762 +void
29763 +gfs_rgrp_relse(struct gfs_rgrpd *rgd)
29764 +{
29765 +       int x, length = rgd->rd_ri.ri_length;
29766 +
29767 +       for (x = 0; x < length; x++) {
29768 +               brelse(rgd->rd_bh[x]);
29769 +               rgd->rd_bh[x] = NULL;
29770 +       }
29771 +}
29772 +
29773 +/**
29774 + * gfs_rgrp_lvb_fill - copy RG usage data out of the struct gfs_rgrp into the struct gfs_rgrp_lvb
29775 + * @rgd: the resource group data structure
29776 + *
29777 + */
29778 +
29779 +void
29780 +gfs_rgrp_lvb_fill(struct gfs_rgrpd *rgd)
29781 +{
29782 +       struct gfs_rgrp *rg = &rgd->rd_rg;
29783 +       struct gfs_rgrp_lvb *rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb;
29784 +
29785 +       rb->rb_magic = cpu_to_gfs32(GFS_MAGIC);
29786 +       rb->rb_free = cpu_to_gfs32(rg->rg_free);
29787 +       rb->rb_useddi = cpu_to_gfs32(rg->rg_useddi);
29788 +       rb->rb_freedi = cpu_to_gfs32(rg->rg_freedi);
29789 +       rb->rb_usedmeta = cpu_to_gfs32(rg->rg_usedmeta);
29790 +       rb->rb_freemeta = cpu_to_gfs32(rg->rg_freemeta);
29791 +
29792 +       clear_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags);
29793 +}
29794 +
29795 +/**
29796 + * gfs_rgrp_lvb_init - Init the data of a RG LVB
29797 + * @rgd: the resource group data structure
29798 + *
29799 + * Returns:  0 on success, -EXXX on failure
29800 + */
29801 +
29802 +int
29803 +gfs_rgrp_lvb_init(struct gfs_rgrpd *rgd)
29804 +{
29805 +       struct gfs_glock *gl = rgd->rd_gl;
29806 +       struct gfs_holder rgd_gh;
29807 +       int error;
29808 +
29809 +       error = gfs_glock_nq_init(gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
29810 +       if (!error) {
29811 +               gfs_rgrp_lvb_fill(rgd);
29812 +               gfs_glock_dq_uninit(&rgd_gh);
29813 +       }
29814 +
29815 +       return error;
29816 +}
29817 +
29818 +/**
29819 + * gfs_alloc_get - allocate a struct gfs_alloc structure for an inode
29820 + * @ip: the inode
29821 + *
29822 + * Returns: the struct gfs_alloc
29823 + */
29824 +
29825 +struct gfs_alloc *
29826 +gfs_alloc_get(struct gfs_inode *ip)
29827 +{
29828 +       struct gfs_alloc *al = ip->i_alloc;
29829 +
29830 +       GFS_ASSERT_INODE(!al, ip,);
29831 +
29832 +       al = gmalloc(sizeof(struct gfs_alloc));
29833 +       memset(al, 0, sizeof(struct gfs_alloc));
29834 +
29835 +       ip->i_alloc = al;
29836 +
29837 +       return al;
29838 +}
29839 +
29840 +/**
29841 + * gfs_alloc_put - throw away the struct gfs_alloc for an inode
29842 + * @ip: the inode
29843 + *
29844 + */
29845 +
29846 +void
29847 +gfs_alloc_put(struct gfs_inode *ip)
29848 +{
29849 +       struct gfs_alloc *al = ip->i_alloc;
29850 +
29851 +       GFS_ASSERT_INODE(al, ip,);
29852 +
29853 +       ip->i_alloc = NULL;
29854 +       kfree(al);
29855 +}
29856 +
29857 +/**
29858 + * try_rgrp_fit - See if a given reservation will fit in a given RG
29859 + * @rgd: the RG data
29860 + * @al: the struct gfs_alloc structure describing the reservation
29861 + *
29862 + * Sets the $ir_datares field in @res.
29863 + * Sets the $ir_metares field in @res.
29864 + *
29865 + * Returns: 1 on success, 0 on failure
29866 + */
29867 +
29868 +static int
29869 +try_rgrp_fit(struct gfs_rgrpd *rgd, struct gfs_alloc *al)
29870 +{
29871 +       uint32_t freeblks = rgd->rd_rg.rg_free;
29872 +       uint32_t freemeta = rgd->rd_rg.rg_freemeta;
29873 +       uint32_t metares = al->al_requested_meta;
29874 +       uint32_t datares = al->al_requested_data;
29875 +
29876 +       /* First take care of the data blocks required */
29877 +
29878 +       if (freeblks < al->al_requested_data)
29879 +               return 0;
29880 +
29881 +       freeblks -= al->al_requested_data;
29882 +
29883 +       /* Then take care of the dinodes */
29884 +
29885 +       metares += al->al_requested_di;
29886 +
29887 +       /* Then take care of the metadata blocks */
29888 +
29889 +       while (freemeta < metares) {
29890 +               if (freeblks < GFS_META_CLUMP)
29891 +                       return 0;
29892 +
29893 +               freeblks -= GFS_META_CLUMP;
29894 +               freemeta += GFS_META_CLUMP;
29895 +
29896 +               datares += GFS_META_CLUMP;
29897 +       }
29898 +
29899 +       al->al_rgd = rgd;
29900 +       al->al_reserved_meta = metares;
29901 +       al->al_reserved_data = datares;
29902 +
29903 +       return 1;
29904 +}
29905 +
29906 +/**
29907 + * recent_rgrp_first - get first RG from recent list
29908 + * @sdp: The GFS superblock
29909 + * @rglast: address of the rgrp used last
29910 + *
29911 + * Returns: The first rgrp in the recent list
29912 + */
29913 +
29914 +static struct gfs_rgrpd *
29915 +recent_rgrp_first(struct gfs_sbd *sdp, uint64_t rglast)
29916 +{
29917 +       struct list_head *tmp, *head;
29918 +       struct gfs_rgrpd *rgd = NULL;
29919 +
29920 +       spin_lock(&sdp->sd_rg_recent_lock);
29921 +
29922 +       if (list_empty(&sdp->sd_rg_recent))
29923 +               goto out;
29924 +
29925 +       if (!rglast)
29926 +               goto first;
29927 +
29928 +       for (head = &sdp->sd_rg_recent, tmp = head->next;
29929 +            tmp != head;
29930 +            tmp = tmp->next) {
29931 +               rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent);
29932 +               if (rgd->rd_ri.ri_addr == rglast)
29933 +                       goto out;
29934 +       }
29935 +
29936 + first:
29937 +       rgd = list_entry(sdp->sd_rg_recent.next, struct gfs_rgrpd, rd_recent);
29938 +
29939 + out:
29940 +       spin_unlock(&sdp->sd_rg_recent_lock);
29941 +
29942 +       return rgd;
29943 +}
29944 +
29945 +/**
29946 + * recent_rgrp_next - get next RG from recent list
29947 + * @cur_rgd: current rgrp
29948 + *
29949 + * Returns: The next rgrp in the recent list
29950 + */
29951 +
29952 +static struct gfs_rgrpd *
29953 +recent_rgrp_next(struct gfs_rgrpd *cur_rgd)
29954 +{
29955 +       struct gfs_sbd *sdp = cur_rgd->rd_sbd;
29956 +       struct list_head *tmp, *head;
29957 +       struct gfs_rgrpd *rgd;
29958 +
29959 +       spin_lock(&sdp->sd_rg_recent_lock);
29960 +
29961 +       for (head = &sdp->sd_rg_recent, tmp = head->next;
29962 +            tmp != head;
29963 +            tmp = tmp->next) {
29964 +               rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent);
29965 +               if (rgd == cur_rgd) {
29966 +                       if (cur_rgd->rd_recent.next != &sdp->sd_rg_recent)
29967 +                               rgd = list_entry(cur_rgd->rd_recent.next,
29968 +                                                struct gfs_rgrpd, rd_recent);
29969 +                       else
29970 +                               rgd = NULL;
29971 +
29972 +                       goto out;
29973 +               }
29974 +       }
29975 +
29976 +       rgd = NULL;
29977 +
29978 + out:
29979 +       spin_unlock(&sdp->sd_rg_recent_lock);
29980 +
29981 +       return rgd;
29982 +}
29983 +
29984 +/**
29985 + * recent_rgrp_remove - remove an RG from recent list
29986 + * @rgd: The rgrp to remove
29987 + *
29988 + */
29989 +
29990 +static void
29991 +recent_rgrp_remove(struct gfs_rgrpd *rgd)
29992 +{
29993 +       spin_lock(&rgd->rd_sbd->sd_rg_recent_lock);
29994 +       list_del(&rgd->rd_recent);
29995 +       spin_unlock(&rgd->rd_sbd->sd_rg_recent_lock);
29996 +}
29997 +
29998 +/**
29999 + * recent_rgrp_add - add an RG to recent list
30000 + * @new_rgd: The rgrp to add
30001 + *
30002 + */
30003 +
30004 +static void
30005 +recent_rgrp_add(struct gfs_rgrpd *new_rgd)
30006 +{
30007 +       struct gfs_sbd *sdp = new_rgd->rd_sbd;
30008 +       struct list_head *tmp, *head;
30009 +       struct gfs_rgrpd *rgd = NULL;
30010 +       unsigned int count = 0;
30011 +       unsigned int max = sdp->sd_rgcount / gfs_num_journals(sdp);
30012 +
30013 +       spin_lock(&sdp->sd_rg_recent_lock);
30014 +
30015 +       for (head = &sdp->sd_rg_recent, tmp = head->next;
30016 +            tmp != head;
30017 +            tmp = tmp->next) {
30018 +               rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent);
30019 +               if (rgd == new_rgd)
30020 +                       goto out;
30021 +
30022 +               if (++count >= max)
30023 +                       goto out;
30024 +       }
30025 +       list_add_tail(&new_rgd->rd_recent, &sdp->sd_rg_recent);
30026 +
30027 + out:
30028 +       spin_unlock(&sdp->sd_rg_recent_lock);
30029 +}
30030 +
30031 +/**
30032 + * forward_rgrp_get - get an rgrp to try next from full list
30033 + * @sdp: The GFS superblock
30034 + *
30035 + * Returns: The rgrp to try next
30036 + */
30037 +
30038 +static struct gfs_rgrpd *
30039 +forward_rgrp_get(struct gfs_sbd *sdp)
30040 +{
30041 +       struct gfs_rgrpd *rgd;
30042 +       unsigned int journals = gfs_num_journals(sdp);
30043 +       unsigned int rg = 0, x;
30044 +
30045 +       spin_lock(&sdp->sd_rg_forward_lock);
30046 +
30047 +       rgd = sdp->sd_rg_forward;
30048 +       if (!rgd) {
30049 +               if (sdp->sd_rgcount >= journals)
30050 +                       rg = sdp->sd_rgcount *
30051 +                               sdp->sd_lockstruct.ls_jid /
30052 +                               journals;
30053 +
30054 +               for (x = 0, rgd = gfs_rgrpd_get_first(sdp);
30055 +                    x < rg;
30056 +                    x++, rgd = gfs_rgrpd_get_next(rgd))
30057 +                       /* Do Nothing */;
30058 +
30059 +               sdp->sd_rg_forward = rgd;
30060 +       }
30061 +
30062 +       spin_unlock(&sdp->sd_rg_forward_lock);
30063 +
30064 +       return rgd;
30065 +}
30066 +
30067 +/**
30068 + * forward_rgrp_set - set the forward rgrp pointer
30069 + * @sdp: the filesystem
30070 + * @rgd: The new forward rgrp
30071 + *
30072 + */
30073 +
30074 +static void
30075 +forward_rgrp_set(struct gfs_sbd *sdp, struct gfs_rgrpd *rgd)
30076 +{
30077 +       spin_lock(&sdp->sd_rg_forward_lock);
30078 +       sdp->sd_rg_forward = rgd;
30079 +       spin_unlock(&sdp->sd_rg_forward_lock);
30080 +}
30081 +
30082 +/**
30083 + * get_local_rgrp - Choose and lock a rgrp for allocation
30084 + * @ip: the inode to reserve space for
30085 + * @rgp: the chosen and locked rgrp
30086 + *
30087 + * Try to acquire rgrp in way which avoids contending with others.
30088 + *
30089 + * Returns: 0 on success, -EXXX on failure
30090 + */
30091 +
30092 +static int
30093 +get_local_rgrp(struct gfs_inode *ip)
30094 +{
30095 +       struct gfs_sbd *sdp = ip->i_sbd;
30096 +       struct gfs_rgrpd *rgd, *begin, *next = NULL;
30097 +       struct gfs_alloc *al = ip->i_alloc;
30098 +       int flags = LM_FLAG_TRY;
30099 +       int error = 0;
30100 +       int skipped = 0;
30101 +       int loops = 0;
30102 +       int update_recent = FALSE;
30103 +
30104 +       /* Try recently successful rgrps */
30105 +
30106 +       rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
30107 +
30108 +       while (rgd) {
30109 +               error = gfs_glock_nq_init(rgd->rd_gl,
30110 +                                         LM_ST_EXCLUSIVE, LM_FLAG_TRY,
30111 +                                         &al->al_rgd_gh);
30112 +               switch (error) {
30113 +               case 0:
30114 +                       if (try_rgrp_fit(rgd, al))
30115 +                               goto out;
30116 +
30117 +                       next = recent_rgrp_next(rgd);
30118 +                       recent_rgrp_remove(rgd);
30119 +                       gfs_glock_dq_uninit(&al->al_rgd_gh);
30120 +                       rgd = next;
30121 +                       break;
30122 +
30123 +               case GLR_TRYFAILED:
30124 +                       rgd = recent_rgrp_next(rgd);
30125 +                       break;
30126 +
30127 +               default:
30128 +                       GFS_ASSERT_RGRPD(error < 0, rgd,);
30129 +                       return error;
30130 +               }
30131 +       }
30132 +
30133 +       /* Go through full list of rgrps */
30134 +
30135 +       update_recent = TRUE;
30136 +       begin = rgd = forward_rgrp_get(sdp);
30137 +
30138 +       for (;;) {
30139 +               error = gfs_glock_nq_init(rgd->rd_gl,
30140 +                                         LM_ST_EXCLUSIVE, flags,
30141 +                                         &al->al_rgd_gh);
30142 +               switch (error) {
30143 +               case 0:
30144 +                       if (try_rgrp_fit(rgd, al))
30145 +                               goto out;
30146 +                       gfs_glock_dq_uninit(&al->al_rgd_gh);
30147 +                       break;
30148 +
30149 +               case GLR_TRYFAILED:
30150 +                       GFS_ASSERT_RGRPD(flags == LM_FLAG_TRY, rgd,);
30151 +                       skipped++;
30152 +                       break;
30153 +
30154 +               default:
30155 +                       GFS_ASSERT_RGRPD(error < 0, rgd,);
30156 +                       return error;
30157 +               }
30158 +
30159 +               rgd = gfs_rgrpd_get_next(rgd);
30160 +               if (!rgd)
30161 +                       rgd = gfs_rgrpd_get_first(sdp);
30162 +
30163 +               if (rgd == begin) {
30164 +                       if (++loops >= 2 || !skipped) {
30165 +                               return -ENOSPC;
30166 +                       }
30167 +                       flags = 0;
30168 +               }
30169 +       }
30170 +
30171 + out:
30172 +       ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
30173 +
30174 +       if (update_recent) {
30175 +               recent_rgrp_add(rgd);
30176 +               rgd = gfs_rgrpd_get_next(rgd);
30177 +               forward_rgrp_set(sdp, rgd);
30178 +       }
30179 +
30180 +       return 0;
30181 +}
30182 +
30183 +/**
30184 + * gfs_inplace_reserve_i - Reserve space in the filesystem
30185 + * @ip: the inode to reserve space for
30186 + *
30187 + * Acquire resource group locks to allow for the maximum allocation
30188 + * described by "res".
30189 + *
30190 + * This should probably become more complex again, but for now, let's go
30191 + * for simple (one resource group) reservations.
30192 + *
30193 + * Returns: 0 on success, -EXXX on failure
30194 + */
30195 +
30196 +int
30197 +gfs_inplace_reserve_i(struct gfs_inode *ip,
30198 +                    char *file, unsigned int line)
30199 +{
30200 +       struct gfs_sbd *sdp = ip->i_sbd;
30201 +       struct gfs_alloc *al = ip->i_alloc;
30202 +       int error;
30203 +
30204 +       GFS_ASSERT_INODE(al->al_requested_di ||
30205 +                        al->al_requested_data ||
30206 +                        al->al_requested_meta, ip,);
30207 +
30208 +       error = gfs_rindex_hold(sdp, &al->al_ri_gh);
30209 +       if (error)
30210 +               return error;
30211 +
30212 +       error = get_local_rgrp(ip);
30213 +       if (error) {
30214 +               gfs_glock_dq_uninit(&al->al_ri_gh);
30215 +               return error;
30216 +       }
30217 +
30218 +       gfs_depend_sync(al->al_rgd);
30219 +
30220 +       al->al_file = file;
30221 +       al->al_line = line;
30222 +
30223 +       return 0;
30224 +}
30225 +
30226 +/**
30227 + * gfs_inplace_release - release an inplace reservation
30228 + * @ip: the inode the reservation was taken out on
30229 + *
30230 + * Release a reservation made by gfs_inplace_reserve().
30231 + */
30232 +
30233 +void
30234 +gfs_inplace_release(struct gfs_inode *ip)
30235 +{
30236 +       struct gfs_alloc *al = ip->i_alloc;
30237 +
30238 +       GFS_ASSERT_INODE(al->al_alloced_di <= al->al_requested_di, ip,
30239 +                        printk("al_alloced_di = %u, al_requested_di = %u\n",
30240 +                               al->al_alloced_di, al->al_requested_di);
30241 +                        printk("al_file = %s, al_line = %u\n",
30242 +                               al->al_file, al->al_line););
30243 +       GFS_ASSERT_INODE(al->al_alloced_meta <= al->al_reserved_meta, ip,
30244 +                        printk("al_alloced_meta = %u, al_reserved_meta = %u\n",
30245 +                               al->al_alloced_meta, al->al_reserved_meta);
30246 +                        printk("al_file = %s, al_line = %u\n",
30247 +                               al->al_file, al->al_line););
30248 +       GFS_ASSERT_INODE(al->al_alloced_data <= al->al_reserved_data, ip,
30249 +                        printk("al_alloced_data = %u, al_reserved_data = %u\n",
30250 +                               al->al_alloced_data, al->al_reserved_data);
30251 +                        printk("al_file = %s, al_line = %u\n",
30252 +                               al->al_file, al->al_line););
30253 +
30254 +       al->al_rgd = NULL;
30255 +       gfs_glock_dq_uninit(&al->al_rgd_gh);
30256 +       gfs_glock_dq_uninit(&al->al_ri_gh);
30257 +}
30258 +
30259 +/**
30260 + * gfs_get_block_type - Check a block in a RG is of given type
30261 + * @rgd: the resource group holding the block
30262 + * @block: the block number
30263 + *
30264 + * Returns: The block type (GFS_BLKST_*)
30265 + */
30266 +
30267 +unsigned char
30268 +gfs_get_block_type(struct gfs_rgrpd *rgd, uint64_t block)
30269 +{
30270 +       struct gfs_bitmap *bits = NULL;
30271 +       uint32_t length, rgrp_block, buf_block;
30272 +       unsigned int buf;
30273 +       unsigned char type;
30274 +
30275 +       length = rgd->rd_ri.ri_length;
30276 +       rgrp_block = block - rgd->rd_ri.ri_data1;
30277 +
30278 +       for (buf = 0; buf < length; buf++) {
30279 +               bits = &rgd->rd_bits[buf];
30280 +               if (rgrp_block < (bits->bi_start + bits->bi_len) * GFS_NBBY)
30281 +                       break;
30282 +       }
30283 +
30284 +       GFS_ASSERT_RGRPD(buf < length, rgd,);
30285 +       buf_block = rgrp_block - bits->bi_start * GFS_NBBY;
30286 +
30287 +       type = gfs_testbit(rgd,
30288 +                          rgd->rd_bh[buf]->b_data + bits->bi_offset,
30289 +                          bits->bi_len, buf_block);
30290 +
30291 +       return type;
30292 +}
30293 +
30294 +/**
30295 + * blkalloc_internal - allocate a single block
30296 + * @rgd: the resource group descriptor
30297 + * @goal: the goal block in the RG
30298 + * @old_state: the type of block to find
30299 + * @new_state: the resulting block type
30300 + *
30301 + * This function never fails.
30302 + *
30303 + * Returns:  returns the block allocated
30304 + */
30305 +
30306 +static uint32_t
30307 +blkalloc_internal(struct gfs_rgrpd *rgd,
30308 +                 uint32_t goal,
30309 +                 unsigned char old_state, unsigned char new_state)
30310 +{
30311 +       struct gfs_bitmap *bits = NULL;
30312 +       uint32_t length = rgd->rd_ri.ri_length;
30313 +       uint32_t blk = 0;
30314 +       unsigned int buf, x;
30315 +
30316 +       for (buf = 0; buf < length; buf++) {
30317 +               bits = &rgd->rd_bits[buf];
30318 +               if (goal < (bits->bi_start + bits->bi_len) * GFS_NBBY)
30319 +                       break;
30320 +       }
30321 +
30322 +       GFS_ASSERT_RGRPD(buf < length, rgd,);
30323 +       goal -= bits->bi_start * GFS_NBBY;
30324 +
30325 +       /* "x <= length" because we're skipping over some of the first
30326 +          buffer when the goal is non-zero. */
30327 +
30328 +       for (x = 0; x <= length; x++) {
30329 +               blk = gfs_bitfit(rgd,
30330 +                                rgd->rd_bh[buf]->b_data + bits->bi_offset,
30331 +                                bits->bi_len, goal, old_state);
30332 +               if (blk != BFITNOENT)
30333 +                       break;
30334 +
30335 +               buf = (buf + 1) % length;
30336 +               bits = &rgd->rd_bits[buf];
30337 +               goal = 0;
30338 +       }
30339 +
30340 +       GFS_ASSERT_RGRPD(x <= length, rgd,);
30341 +
30342 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[buf]);
30343 +       gfs_setbit(rgd,
30344 +                  rgd->rd_bh[buf]->b_data + bits->bi_offset,
30345 +                  bits->bi_len, blk, new_state);
30346 +
30347 +       return bits->bi_start * GFS_NBBY + blk;
30348 +}
30349 +
30350 +/**
30351 + * blkfree_internal - Free a block
30352 + * @sdp: the filesystem
30353 + * @bstart: the start of a run of blocks to free
30354 + * @blen: the length of the block run
30355 + * @new_state: the new state of the block
30356 + *
30357 + */
30358 +
30359 +static struct gfs_rgrpd *
30360 +blkfree_internal(struct gfs_sbd *sdp, uint64_t bstart, uint32_t blen,
30361 +                unsigned char new_state)
30362 +{
30363 +       struct gfs_rgrpd *rgd;
30364 +       struct gfs_bitmap *bits = NULL;
30365 +       uint32_t length, rgrp_blk, buf_blk;
30366 +       unsigned int buf;
30367 +
30368 +       rgd = gfs_blk2rgrpd(sdp, bstart);
30369 +       GFS_ASSERT_SBD(rgd, sdp,
30370 +                      printk("block = %"PRIu64"\n", bstart););
30371 +
30372 +       length = rgd->rd_ri.ri_length;
30373 +       rgrp_blk = bstart - rgd->rd_ri.ri_data1;
30374 +
30375 +       while (blen--) {
30376 +               for (buf = 0; buf < length; buf++) {
30377 +                       bits = &rgd->rd_bits[buf];
30378 +                       if (rgrp_blk < (bits->bi_start + bits->bi_len) * GFS_NBBY)
30379 +                               break;
30380 +               }
30381 +
30382 +               GFS_ASSERT_RGRPD(buf < length, rgd,);
30383 +               buf_blk = rgrp_blk - bits->bi_start * GFS_NBBY;
30384 +               rgrp_blk++;
30385 +
30386 +               gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[buf]);
30387 +               gfs_setbit(rgd,
30388 +                          rgd->rd_bh[buf]->b_data + bits->bi_offset,
30389 +                          bits->bi_len, buf_blk, new_state);
30390 +       }
30391 +
30392 +       return rgd;
30393 +}
30394 +
30395 +/**
30396 + * clump_alloc - Allocate a clump of metadata
30397 + * @rgd: the resource group descriptor
30398 + * @first: returns the first block allocated
30399 + *
30400 + * Returns: 0 on success, -EXXX on failure
30401 + */
30402 +
30403 +static int
30404 +clump_alloc(struct gfs_rgrpd *rgd, uint32_t *first)
30405 +{
30406 +       struct gfs_sbd *sdp = rgd->rd_sbd;
30407 +       struct gfs_meta_header mh;
30408 +       struct buffer_head **bh;
30409 +       uint32_t goal, blk;
30410 +       unsigned int x;
30411 +       int error = 0;
30412 +
30413 +       memset(&mh, 0, sizeof(struct gfs_meta_header));
30414 +       mh.mh_magic = GFS_MAGIC;
30415 +       mh.mh_type = GFS_METATYPE_NONE;
30416 +
30417 +       bh = gmalloc(GFS_META_CLUMP * sizeof(struct buffer_head *));
30418 +       memset(bh, 0, sizeof(GFS_META_CLUMP * sizeof(struct buffer_head *)));
30419 +
30420 +       goal = rgd->rd_last_alloc_data;
30421 +
30422 +       for (x = 0; x < GFS_META_CLUMP; x++) {
30423 +               blk = blkalloc_internal(rgd, goal, GFS_BLKST_FREE,
30424 +                                       GFS_BLKST_FREEMETA);
30425 +               if (!x)
30426 +                       *first = blk;
30427 +
30428 +               bh[x] = gfs_dgetblk(sdp, rgd->rd_ri.ri_data1 + blk, rgd->rd_gl);
30429 +
30430 +               gfs_prep_new_buffer(bh[x]);
30431 +
30432 +               gfs_meta_header_out(&mh, bh[x]->b_data);
30433 +               ((struct gfs_meta_header *)bh[x]->b_data)->mh_generation = 0;
30434 +
30435 +               error = gfs_dwrite(sdp, bh[x], DIO_DIRTY | DIO_START);
30436 +               if (error)
30437 +                       goto out;
30438 +
30439 +               goal = blk;
30440 +       }
30441 +
30442 +       rgd->rd_last_alloc_data = goal;
30443 +
30444 +       for (x = 0; x < GFS_META_CLUMP; x++) {
30445 +               error = gfs_dwrite(sdp, bh[x], DIO_WAIT);
30446 +               if (error)
30447 +                       goto out;
30448 +       }
30449 +
30450 +       gfs_mhc_add(rgd, bh, GFS_META_CLUMP);
30451 +
30452 +       GFS_ASSERT_RGRPD(rgd->rd_rg.rg_free >= GFS_META_CLUMP, rgd,);
30453 +       rgd->rd_rg.rg_free -= GFS_META_CLUMP;
30454 +       rgd->rd_rg.rg_freemeta += GFS_META_CLUMP;
30455 +
30456 + out:
30457 +       for (x = 0; x < GFS_META_CLUMP; x++)
30458 +               if (bh[x]) {
30459 +                       gfs_dwrite(sdp, bh[x], DIO_WAIT);
30460 +                       brelse(bh[x]);
30461 +               }
30462 +       kfree(bh);
30463 +
30464 +       return error;
30465 +}
30466 +
30467 +/**
30468 + * gfs_blkalloc - Allocate a data block
30469 + * @ip: the inode to allocate the data block for
30470 + * @block: the block allocated
30471 + *
30472 + */
30473 +
30474 +void
30475 +gfs_blkalloc(struct gfs_inode *ip, uint64_t *block)
30476 +{
30477 +       struct gfs_sbd *sdp = ip->i_sbd;
30478 +       struct gfs_alloc *al = ip->i_alloc;
30479 +       struct gfs_rgrpd *rgd = al->al_rgd;
30480 +       uint32_t goal, blk;
30481 +       int same;
30482 +
30483 +       GFS_ASSERT_INODE(rgd, ip,);
30484 +
30485 +       same = (rgd->rd_ri.ri_addr == ip->i_di.di_goal_rgrp);
30486 +       goal = (same) ? ip->i_di.di_goal_dblk : rgd->rd_last_alloc_data;
30487 +
30488 +       blk = blkalloc_internal(rgd, goal,
30489 +                               GFS_BLKST_FREE, GFS_BLKST_USED);
30490 +       rgd->rd_last_alloc_data = blk;
30491 +
30492 +       if (!same) {
30493 +               ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr;
30494 +               ip->i_di.di_goal_mblk = 0;
30495 +       }
30496 +       ip->i_di.di_goal_dblk = blk;
30497 +
30498 +       *block = rgd->rd_ri.ri_data1 + blk;
30499 +
30500 +       GFS_ASSERT_RGRPD(rgd->rd_rg.rg_free, rgd,);
30501 +       rgd->rd_rg.rg_free--;
30502 +
30503 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30504 +       gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30505 +
30506 +       al->al_alloced_data++;
30507 +
30508 +       gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid);
30509 +}
30510 +
30511 +/**
30512 + * gfs_metaalloc - Allocate a metadata block to a file
30513 + * @ip:  the file
30514 + * @block: the block allocated
30515 + *
30516 + * Returns: 0 on success, -EXXX on failure
30517 + */
30518 +
30519 +int
30520 +gfs_metaalloc(struct gfs_inode *ip, uint64_t *block)
30521 +{
30522 +       struct gfs_sbd *sdp = ip->i_sbd;
30523 +       struct gfs_alloc *al = ip->i_alloc;
30524 +       struct gfs_rgrpd *rgd = al->al_rgd;
30525 +       uint32_t goal, blk;
30526 +       int same;
30527 +       int error;
30528 +
30529 +       GFS_ASSERT_INODE(rgd, ip,);
30530 +
30531 +       same = (rgd->rd_ri.ri_addr == ip->i_di.di_goal_rgrp);
30532 +
30533 +       if (!rgd->rd_rg.rg_freemeta) {
30534 +               error = clump_alloc(rgd, &goal);
30535 +               if (error)
30536 +                       return error;
30537 +
30538 +               al->al_alloced_data += GFS_META_CLUMP;
30539 +       } else
30540 +               goal = (same) ? ip->i_di.di_goal_mblk : rgd->rd_last_alloc_meta;
30541 +
30542 +       blk = blkalloc_internal(rgd, goal,
30543 +                               GFS_BLKST_FREEMETA, GFS_BLKST_USEDMETA);
30544 +       rgd->rd_last_alloc_meta = blk;
30545 +
30546 +       if (!same) {
30547 +               ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr;
30548 +               ip->i_di.di_goal_dblk = 0;
30549 +       }
30550 +       ip->i_di.di_goal_mblk = blk;
30551 +
30552 +       *block = rgd->rd_ri.ri_data1 + blk;
30553 +
30554 +       GFS_ASSERT_RGRPD(rgd->rd_rg.rg_freemeta, rgd,);
30555 +       rgd->rd_rg.rg_freemeta--;
30556 +       rgd->rd_rg.rg_usedmeta++;
30557 +
30558 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30559 +       gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30560 +
30561 +       al->al_alloced_meta++;
30562 +
30563 +       gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid);
30564 +
30565 +       return 0;
30566 +}
30567 +
30568 +/**
30569 + * gfs_dialloc - Allocate a dinode
30570 + * @dip: the directory that the inode is going in
30571 + * @block: the block
30572 + *
30573 + * Returns: errno
30574 + */
30575 +
30576 +int
30577 +gfs_dialloc(struct gfs_inode *dip, uint64_t *block)
30578 +{
30579 +       struct gfs_alloc *al = dip->i_alloc;
30580 +       struct gfs_rgrpd *rgd = al->al_rgd;
30581 +       uint32_t goal, blk;
30582 +       int error = 0;
30583 +
30584 +       GFS_ASSERT_INODE(rgd, dip,);
30585 +
30586 +       if (rgd->rd_rg.rg_freemeta)
30587 +               goal = rgd->rd_last_alloc_meta;
30588 +       else {
30589 +               error = clump_alloc(rgd, &goal);
30590 +               if (error)
30591 +                       return error;
30592 +
30593 +               al->al_alloced_data += GFS_META_CLUMP;
30594 +       }
30595 +
30596 +       blk = blkalloc_internal(rgd, goal,
30597 +                               GFS_BLKST_FREEMETA, GFS_BLKST_USEDMETA);
30598 +       rgd->rd_last_alloc_meta = blk;
30599 +
30600 +       *block = rgd->rd_ri.ri_data1 + blk;
30601 +
30602 +       GFS_ASSERT_RGRPD(rgd->rd_rg.rg_freemeta, rgd,);
30603 +       rgd->rd_rg.rg_freemeta--;
30604 +       rgd->rd_rg.rg_useddi++;
30605 +
30606 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30607 +       gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30608 +
30609 +       al->al_alloced_di++;
30610 +       al->al_alloced_meta++;
30611 +
30612 +       return error;
30613 +}
30614 +
30615 +/**
30616 + * gfs_blkfree - free a piece of data
30617 + * @ip: the inode these blocks are being free from
30618 + * @bstart: the start of a run of blocks to free
30619 + * @blen: the length of the block run
30620 + *
30621 + */
30622 +
30623 +void
30624 +gfs_blkfree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen)
30625 +{
30626 +       struct gfs_sbd *sdp = ip->i_sbd;
30627 +       struct gfs_rgrpd *rgd;
30628 +
30629 +       rgd = blkfree_internal(sdp, bstart, blen, GFS_BLKST_FREE);
30630 +
30631 +       rgd->rd_rg.rg_free += blen;
30632 +
30633 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30634 +       gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30635 +
30636 +       gfs_trans_add_quota(sdp, -(int64_t)blen,
30637 +                           ip->i_di.di_uid,
30638 +                           ip->i_di.di_gid);
30639 +}
30640 +
30641 +/**
30642 + * gfs_metafree - free a piece of metadata
30643 + * @ip: the inode these blocks are being free from
30644 + * @bstart: the start of a run of blocks to free
30645 + * @blen: the length of the block run
30646 + *
30647 + */
30648 +
30649 +void
30650 +gfs_metafree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen)
30651 +{
30652 +       struct gfs_sbd *sdp = ip->i_sbd;
30653 +       struct gfs_rgrpd *rgd;
30654 +
30655 +       rgd = blkfree_internal(sdp, bstart, blen, GFS_BLKST_FREEMETA);
30656 +
30657 +       GFS_ASSERT_RGRPD(rgd->rd_rg.rg_usedmeta >= blen, rgd,);
30658 +       rgd->rd_rg.rg_usedmeta -= blen;
30659 +       rgd->rd_rg.rg_freemeta += blen;
30660 +
30661 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30662 +       gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30663 +
30664 +       gfs_trans_add_quota(sdp, -(int64_t)blen,
30665 +                           ip->i_di.di_uid,
30666 +                           ip->i_di.di_gid);
30667 +       gfs_wipe_buffers(ip, rgd, bstart, blen);
30668 +}
30669 +
30670 +/**
30671 + * gfs_difree_uninit - free a piece of metadata
30672 + * @rgd: the resource group that contains the dinode
30673 + * @addr: the dinode address
30674 + *
30675 + */
30676 +
30677 +void
30678 +gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr)
30679 +{
30680 +       struct gfs_sbd *sdp = rgd->rd_sbd;
30681 +       struct gfs_rgrpd *tmp_rgd;
30682 +
30683 +       tmp_rgd = blkfree_internal(sdp, addr, 1,
30684 +                                  GFS_BLKST_FREEMETA);
30685 +       GFS_ASSERT_RGRPD(rgd == tmp_rgd, rgd,);
30686 +
30687 +       GFS_ASSERT_RGRPD(rgd->rd_rg.rg_useddi, rgd,);
30688 +       rgd->rd_rg.rg_useddi--;
30689 +       rgd->rd_rg.rg_freemeta++;
30690 +
30691 +       gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30692 +       gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30693 +}
30694 +
30695 +/**
30696 + * gfs_difree - free a piece of metadata
30697 + * @rgd: the resource group that contains the dinode
30698 + * @ip: the inode representing the dinode to free
30699 + *
30700 + */
30701 +
30702 +void
30703 +gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip)
30704 +{
30705 +       gfs_difree_uninit(rgd, ip->i_num.no_addr);
30706 +
30707 +       gfs_trans_add_quota(ip->i_sbd, -1, ip->i_di.di_uid, ip->i_di.di_gid);
30708 +       gfs_wipe_buffers(ip, rgd, ip->i_num.no_addr, 1);
30709 +}
30710 +
30711 +/**
30712 + * gfs_rlist_add - add a RG to a list of RGs
30713 + * @sdp: the filesystem
30714 + * @rlist: the list of resource groups
30715 + * @block: the block
30716 + *
30717 + * Figure out what RG a block belongs to and add that RG to the list
30718 + *
30719 + */
30720 +
30721 +void
30722 +gfs_rlist_add(struct gfs_sbd *sdp, struct gfs_rgrp_list *rlist, uint64_t block)
30723 +{
30724 +       struct gfs_rgrpd *rgd;
30725 +       struct gfs_rgrpd **tmp;
30726 +       unsigned int new_space;
30727 +       unsigned int x;
30728 +
30729 +       GFS_ASSERT_SBD(rlist->rl_rgrps <= rlist->rl_space, sdp,);
30730 +       GFS_ASSERT_SBD(!rlist->rl_ghs, sdp,);
30731 +
30732 +       rgd = gfs_blk2rgrpd(sdp, block);
30733 +       GFS_ASSERT_SBD(rgd, sdp,
30734 +                      printk("block = %"PRIu64"\n", block););
30735 +
30736 +       for (x = 0; x < rlist->rl_rgrps; x++)
30737 +               if (rlist->rl_rgd[x] == rgd)
30738 +                       return;
30739 +
30740 +       if (rlist->rl_rgrps == rlist->rl_space) {
30741 +               new_space = rlist->rl_space + 10;
30742 +
30743 +               tmp = gmalloc(new_space * sizeof(struct gfs_rgrpd *));
30744 +
30745 +               if (rlist->rl_rgd) {
30746 +                       memcpy(tmp, rlist->rl_rgd,
30747 +                              rlist->rl_space * sizeof(struct gfs_rgrpd *));
30748 +                       kfree(rlist->rl_rgd);
30749 +               }
30750 +
30751 +               rlist->rl_space = new_space;
30752 +               rlist->rl_rgd = tmp;
30753 +       }
30754 +
30755 +       rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
30756 +}
30757 +
30758 +/**
30759 + * gfs_rlist_alloc - all RGs have been added to the rlist, allocated holders for them
30760 + * @rlist: the list of resource groups
30761 + * @state: the lock state to acquire the RG lock in
30762 + * @flags: the modifier flags for the holder structures
30763 + *
30764 + */
30765 +
30766 +void
30767 +gfs_rlist_alloc(struct gfs_rgrp_list *rlist, unsigned int state, int flags)
30768 +{
30769 +       unsigned int x;
30770 +
30771 +       rlist->rl_ghs = gmalloc(rlist->rl_rgrps * sizeof(struct gfs_holder));
30772 +       for (x = 0; x < rlist->rl_rgrps; x++)
30773 +               gfs_holder_init(rlist->rl_rgd[x]->rd_gl,
30774 +                               state, flags,
30775 +                               &rlist->rl_ghs[x]);
30776 +}
30777 +
30778 +/**
30779 + * gfs_rlist_free - free a resource group list
30780 + * @list: the list of resource groups
30781 + *
30782 + */
30783 +
30784 +void
30785 +gfs_rlist_free(struct gfs_rgrp_list *rlist)
30786 +{
30787 +       unsigned int x;
30788 +
30789 +       if (rlist->rl_rgd)
30790 +               kfree(rlist->rl_rgd);
30791 +
30792 +       if (rlist->rl_ghs) {
30793 +               for (x = 0; x < rlist->rl_rgrps; x++)
30794 +                       gfs_holder_uninit(&rlist->rl_ghs[x]);
30795 +               kfree(rlist->rl_ghs);
30796 +       }
30797 +}
30798 +
30799 +/**
30800 + * gfs_reclaim_metadata - reclaims unused metadata
30801 + * @sdp: the file system
30802 + * @stats: stats on reclaimation
30803 + *
30804 + * This function will look through the resource groups and
30805 + * free the unused metadata.
30806 + *
30807 + * Returns: 0 on success, -EXXX on error
30808 + */
30809 +
30810 +int
30811 +gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats)
30812 +{
30813 +       struct gfs_holder ji_gh, ri_gh, rgd_gh, t_gh;
30814 +       struct gfs_rgrpd *rgd;
30815 +       struct gfs_rgrp *rg;
30816 +       struct gfs_dinode *di;
30817 +       struct gfs_inum next;
30818 +       struct buffer_head *bh;
30819 +       uint32_t flags;
30820 +       uint32_t goal;
30821 +       unsigned int x;
30822 +       int error = 0;
30823 +
30824 +       /* Acquire the jindex lock here so we don't deadlock with a
30825 +          process writing the the jindex inode. :-( */
30826 +
30827 +       error = gfs_jindex_hold(sdp, &ji_gh);
30828 +       if (error)
30829 +               goto fail;
30830 +
30831 +       error = gfs_rindex_hold(sdp, &ri_gh);
30832 +       if (error)
30833 +               goto fail_jindex_relse;
30834 +
30835 +       for (rgd = gfs_rgrpd_get_first(sdp);
30836 +            rgd;
30837 +            rgd = gfs_rgrpd_get_next(rgd)) {
30838 +               error = gfs_glock_nq_init(rgd->rd_gl,
30839 +                                         LM_ST_EXCLUSIVE, GL_NOCACHE,
30840 +                                         &rgd_gh);
30841 +               if (error)
30842 +                       goto fail_rindex_relse;
30843 +
30844 +               rgrp_verify(rgd);
30845 +
30846 +               rg = &rgd->rd_rg;
30847 +
30848 +               if (!rg->rg_freedi && !rg->rg_freemeta) {
30849 +                       gfs_glock_dq_uninit(&rgd_gh);
30850 +                       continue;
30851 +               }
30852 +
30853 +               gfs_mhc_zap(rgd);
30854 +               gfs_depend_sync(rgd);
30855 +
30856 +               error = gfs_lock_fs_check_clean(sdp, LM_ST_EXCLUSIVE, &t_gh);
30857 +               if (error)
30858 +                       goto fail_gunlock_rg;
30859 +
30860 +               error = gfs_trans_begin(sdp, rgd->rd_ri.ri_length, 0);
30861 +               if (error)
30862 +                       goto fail_unlock_fs;
30863 +
30864 +               next = rg->rg_freedi_list;
30865 +
30866 +               for (x = rg->rg_freedi; x--;) {
30867 +                       GFS_ASSERT_RGRPD(next.no_formal_ino &&
30868 +                                        next.no_addr, rgd,);
30869 +
30870 +                       blkfree_internal(sdp, next.no_addr, 1, GFS_BLKST_FREE);
30871 +
30872 +                       error = gfs_dread(sdp, next.no_addr, rgd->rd_gl,
30873 +                                         DIO_FORCE | DIO_START | DIO_WAIT, &bh);
30874 +                       if (error)
30875 +                               goto fail_end_trans;
30876 +
30877 +                       di = (struct gfs_dinode *)bh->b_data;
30878 +                       flags = di->di_flags;
30879 +                       flags = gfs32_to_cpu(flags);
30880 +                       GFS_ASSERT_RGRPD(flags & GFS_DIF_UNUSED, rgd,);
30881 +
30882 +                       gfs_inum_in(&next, (char *)&di->di_next_unused);
30883 +
30884 +                       brelse(bh);
30885 +
30886 +                       rg->rg_freedi--;
30887 +                       rg->rg_free++;
30888 +                       stats->rc_inodes++;
30889 +               }
30890 +
30891 +               GFS_ASSERT_RGRPD(!next.no_formal_ino && !next.no_addr, rgd,);
30892 +               rg->rg_freedi_list = next;
30893 +
30894 +               goal = 0;
30895 +               for (x = rg->rg_freemeta; x--;) {
30896 +                       goal = blkalloc_internal(rgd, goal,
30897 +                                                GFS_BLKST_FREEMETA, GFS_BLKST_FREE);
30898 +                       rg->rg_freemeta--;
30899 +                       rg->rg_free++;
30900 +                       stats->rc_metadata++;
30901 +               }
30902 +
30903 +               gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30904 +               gfs_rgrp_out(rg, rgd->rd_bh[0]->b_data);
30905 +
30906 +               gfs_trans_end(sdp);
30907 +
30908 +               gfs_glock_dq_uninit(&t_gh);
30909 +
30910 +               gfs_glock_dq_uninit(&rgd_gh);
30911 +       }
30912 +
30913 +       gfs_glock_dq_uninit(&ri_gh);
30914 +
30915 +       gfs_glock_dq_uninit(&ji_gh);
30916 +
30917 +       return 0;
30918 +
30919 + fail_end_trans:
30920 +       gfs_trans_end(sdp);
30921 +
30922 + fail_unlock_fs:
30923 +       gfs_glock_dq_uninit(&t_gh);
30924 +
30925 + fail_gunlock_rg:
30926 +       gfs_glock_dq_uninit(&rgd_gh);
30927 +
30928 + fail_rindex_relse:
30929 +       gfs_glock_dq_uninit(&ri_gh);
30930 +
30931 + fail_jindex_relse:
30932 +       gfs_glock_dq_uninit(&ji_gh);
30933 +
30934 + fail:
30935 +       return error;
30936 +}
30937 diff -urN linux-orig/fs/gfs/rgrp.h linux-patched/fs/gfs/rgrp.h
30938 --- linux-orig/fs/gfs/rgrp.h    1969-12-31 18:00:00.000000000 -0600
30939 +++ linux-patched/fs/gfs/rgrp.h 2004-06-30 13:27:49.358707651 -0500
30940 @@ -0,0 +1,75 @@
30941 +/******************************************************************************
30942 +*******************************************************************************
30943 +**
30944 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
30945 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
30946 +**
30947 +**  This copyrighted material is made available to anyone wishing to use,
30948 +**  modify, copy, or redistribute it subject to the terms and conditions
30949 +**  of the GNU General Public License v.2.
30950 +**
30951 +*******************************************************************************
30952 +******************************************************************************/
30953 +
30954 +#ifndef __RGRP_DOT_H__
30955 +#define __RGRP_DOT_H__
30956 +
30957 +void gfs_mhc_add(struct gfs_rgrpd *rgd, struct buffer_head **bh,
30958 +                        unsigned int num);
30959 +int gfs_mhc_fish(struct gfs_sbd *sdp, struct buffer_head *bh);
30960 +void gfs_mhc_zap(struct gfs_rgrpd *rgd);
30961 +
30962 +void gfs_depend_add(struct gfs_rgrpd *rgd, uint64_t formal_ino);
30963 +void gfs_depend_sync(struct gfs_rgrpd *rgd);
30964 +
30965 +struct gfs_rgrpd *gfs_blk2rgrpd(struct gfs_sbd *sdp, uint64_t blk);
30966 +struct gfs_rgrpd *gfs_rgrpd_get_first(struct gfs_sbd *sdp);
30967 +struct gfs_rgrpd *gfs_rgrpd_get_next(struct gfs_rgrpd *rgd);
30968 +
30969 +void gfs_clear_rgrpd(struct gfs_sbd *sdp);
30970 +
30971 +int gfs_rindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ri_gh);
30972 +
30973 +int gfs_rgrp_read(struct gfs_rgrpd *rgd);
30974 +void gfs_rgrp_relse(struct gfs_rgrpd *rgd);
30975 +
30976 +void gfs_rgrp_lvb_fill(struct gfs_rgrpd *rgd);
30977 +int gfs_rgrp_lvb_init(struct gfs_rgrpd *rgd);
30978 +
30979 +struct gfs_alloc *gfs_alloc_get(struct gfs_inode *ip);
30980 +void gfs_alloc_put(struct gfs_inode *ip);
30981 +
30982 +int gfs_inplace_reserve_i(struct gfs_inode *ip,
30983 +                        char *file, unsigned int line);
30984 +#define gfs_inplace_reserve(ip) \
30985 +gfs_inplace_reserve_i((ip), __FILE__, __LINE__)
30986 +
30987 +void gfs_inplace_release(struct gfs_inode *ip);
30988 +
30989 +unsigned char gfs_get_block_type(struct gfs_rgrpd *rgd, uint64_t block);
30990 +
30991 +void gfs_blkalloc(struct gfs_inode *ip, uint64_t *block);
30992 +int gfs_metaalloc(struct gfs_inode *ip, uint64_t *block);
30993 +int gfs_dialloc(struct gfs_inode *dip, uint64_t *block);
30994 +
30995 +void gfs_blkfree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen);
30996 +void gfs_metafree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen);
30997 +void gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr);
30998 +void gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip);
30999 +
31000 +struct gfs_rgrp_list {
31001 +       unsigned int rl_rgrps;
31002 +       unsigned int rl_space;
31003 +       struct gfs_rgrpd **rl_rgd;
31004 +       struct gfs_holder *rl_ghs;
31005 +};
31006 +
31007 +void gfs_rlist_add(struct gfs_sbd *sdp, struct gfs_rgrp_list *rlist,
31008 +                  uint64_t block);
31009 +void gfs_rlist_alloc(struct gfs_rgrp_list *rlist, unsigned int state,
31010 +                    int flags);
31011 +void gfs_rlist_free(struct gfs_rgrp_list *rlist);
31012 +
31013 +int gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats);
31014 +
31015 +#endif /* __RGRP_DOT_H__ */
31016 diff -urN linux-orig/fs/gfs/super.c linux-patched/fs/gfs/super.c
31017 --- linux-orig/fs/gfs/super.c   1969-12-31 18:00:00.000000000 -0600
31018 +++ linux-patched/fs/gfs/super.c        2004-06-30 13:27:49.359707419 -0500
31019 @@ -0,0 +1,1035 @@
31020 +/******************************************************************************
31021 +*******************************************************************************
31022 +**
31023 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
31024 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
31025 +**
31026 +**  This copyrighted material is made available to anyone wishing to use,
31027 +**  modify, copy, or redistribute it subject to the terms and conditions
31028 +**  of the GNU General Public License v.2.
31029 +**
31030 +*******************************************************************************
31031 +******************************************************************************/
31032 +
31033 +#include <linux/sched.h>
31034 +#include <linux/slab.h>
31035 +#include <linux/smp_lock.h>
31036 +#include <linux/spinlock.h>
31037 +#include <asm/semaphore.h>
31038 +#include <linux/completion.h>
31039 +#include <linux/buffer_head.h>
31040 +
31041 +#include "gfs.h"
31042 +#include "dio.h"
31043 +#include "file.h"
31044 +#include "format.h"
31045 +#include "glock.h"
31046 +#include "glops.h"
31047 +#include "inode.h"
31048 +#include "log.h"
31049 +#include "quota.h"
31050 +#include "recovery.h"
31051 +#include "rgrp.h"
31052 +#include "super.h"
31053 +#include "unlinked.h"
31054 +
31055 +/**
31056 + * gfs_init_tune_data - Fill in the struct gfs_tune (sd_tune) in the struct gfs_sbd.
31057 + * @sdp: the filesystem
31058 + *
31059 + */
31060 +
31061 +void
31062 +gfs_init_tune_data(struct gfs_sbd *sdp)
31063 +{
31064 +       struct gfs_tune *gt = &sdp->sd_tune;
31065 +
31066 +       gt->gt_tune_version = GFS_TUNE_VERSION;
31067 +
31068 +       gt->gt_ilimit1 = 100;
31069 +       gt->gt_ilimit1_tries = 3;
31070 +       gt->gt_ilimit1_min = 1;
31071 +       gt->gt_ilimit2 = 500;
31072 +       gt->gt_ilimit2_tries = 10;
31073 +       gt->gt_ilimit2_min = 3;
31074 +       gt->gt_demote_secs = 300;
31075 +       gt->gt_incore_log_blocks = 1024;
31076 +       gt->gt_jindex_refresh_secs = 60;
31077 +       gt->gt_depend_secs = 60;
31078 +       gt->gt_scand_secs = 5;
31079 +       gt->gt_recoverd_secs = 60;
31080 +       gt->gt_logd_secs = 1;
31081 +       gt->gt_quotad_secs = 5;
31082 +       gt->gt_inoded_secs = 15;
31083 +       gt->gt_quota_simul_sync = 64;
31084 +       gt->gt_quota_warn_period = 10;
31085 +       gt->gt_atime_quantum = 3600;
31086 +       gt->gt_quota_quantum = 60;
31087 +       gt->gt_quota_scale_num = 1;
31088 +       gt->gt_quota_scale_den = 1;
31089 +       gt->gt_quota_enforce = 1;
31090 +       gt->gt_quota_account = 1;
31091 +       gt->gt_new_files_jdata = 0;
31092 +       gt->gt_new_files_directio = 0;
31093 +       gt->gt_max_atomic_write = 4 << 20;
31094 +       gt->gt_max_readahead = 1 << 18;
31095 +       gt->gt_lockdump_size = 131072;
31096 +       gt->gt_stall_secs = 600;
31097 +       gt->gt_complain_secs = 10;
31098 +       gt->gt_reclaim_limit = 5000;
31099 +       gt->gt_entries_per_readdir = 32;
31100 +       gt->gt_prefetch_secs = 10;
31101 +       gt->gt_statfs_slots = 64;
31102 +       gt->gt_max_mhc = 10000;
31103 +}
31104 +
31105 +/**
31106 + * gfs_check_sb - Check superblock
31107 + * @sdp: the filesystem
31108 + * @sb: The superblock
31109 + * @silent: Don't print a message if the check fails
31110 + *
31111 + * Checks the version code of the FS is one that we understand how to
31112 + * read and that the sizes of the various on-disk structures have not
31113 + * changed.
31114 + */
31115 +
31116 +int
31117 +gfs_check_sb(struct gfs_sbd *sdp, struct gfs_sb *sb, int silent)
31118 +{
31119 +       unsigned int x;
31120 +
31121 +       if (sb->sb_header.mh_magic != GFS_MAGIC ||
31122 +           sb->sb_header.mh_type != GFS_METATYPE_SB) {
31123 +               if (!silent)
31124 +                       printk("GFS: not a GFS filesystem\n");
31125 +               return -EINVAL;
31126 +       }
31127 +
31128 +       /*  If format numbers match exactly, we're done.  */
31129 +
31130 +       if (sb->sb_fs_format == GFS_FORMAT_FS &&
31131 +           sb->sb_multihost_format == GFS_FORMAT_MULTI)
31132 +               return 0;
31133 +
31134 +       if (sb->sb_fs_format != GFS_FORMAT_FS) {
31135 +               for (x = 0; gfs_old_fs_formats[x]; x++)
31136 +                       if (gfs_old_fs_formats[x] == sb->sb_fs_format)
31137 +                               break;
31138 +
31139 +               if (!gfs_old_fs_formats[x]) {
31140 +                       printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n",
31141 +                              GFS_FORMAT_FS, GFS_FORMAT_MULTI,
31142 +                              sb->sb_fs_format, sb->sb_multihost_format);
31143 +                       printk("GFS: I don't know how to upgrade this FS\n");
31144 +                       return -EINVAL;
31145 +               }
31146 +       }
31147 +
31148 +       if (sb->sb_multihost_format != GFS_FORMAT_MULTI) {
31149 +               for (x = 0; gfs_old_multihost_formats[x]; x++)
31150 +                       if (gfs_old_multihost_formats[x] == sb->sb_multihost_format)
31151 +                               break;
31152 +
31153 +               if (!gfs_old_multihost_formats[x]) {
31154 +                       printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n",
31155 +                            GFS_FORMAT_FS, GFS_FORMAT_MULTI,
31156 +                              sb->sb_fs_format, sb->sb_multihost_format);
31157 +                       printk("GFS: I don't know how to upgrade this FS\n");
31158 +                       return -EINVAL;
31159 +               }
31160 +       }
31161 +
31162 +       if (!sdp->sd_args.ar_upgrade) {
31163 +               printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n",
31164 +                      GFS_FORMAT_FS, GFS_FORMAT_MULTI,
31165 +                      sb->sb_fs_format, sb->sb_multihost_format);
31166 +               printk("GFS: Use the \"upgrade\" mount option to upgrade the FS\n");
31167 +               printk("GFS: See the manual for more details\n");
31168 +               return -EINVAL;
31169 +       }
31170 +
31171 +       return 0;
31172 +}
31173 +
31174 +/**
31175 + * gfs_read_sb - Read super block
31176 + * @sdp: The GFS superblock
31177 + * @gl: the glock for the superblock (assumed to be held)
31178 + * @silent: Don't print message if mount fails
31179 + *
31180 + */
31181 +
31182 +int
31183 +gfs_read_sb(struct gfs_sbd *sdp, struct gfs_glock *gl, int silent)
31184 +{
31185 +       struct buffer_head *bh;
31186 +       uint32_t hash_blocks, ind_blocks, leaf_blocks;
31187 +       uint32_t tmp_blocks;
31188 +       uint64_t space = 0;
31189 +       unsigned int x;
31190 +       int error;
31191 +
31192 +       error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift,
31193 +                         gl, DIO_FORCE | DIO_START | DIO_WAIT, &bh);
31194 +       if (error) {
31195 +               if (!silent)
31196 +                       printk("GFS: fsid=%s: can't read superblock\n",
31197 +                              sdp->sd_fsname);
31198 +               return error;
31199 +       }
31200 +
31201 +       GFS_ASSERT_SBD(sizeof(struct gfs_sb) <= bh->b_size, sdp,);
31202 +
31203 +       gfs_sb_in(&sdp->sd_sb, bh->b_data);
31204 +
31205 +       brelse(bh);
31206 +
31207 +       error = gfs_check_sb(sdp, &sdp->sd_sb, silent);
31208 +       if (error)
31209 +               return error;
31210 +
31211 +       sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
31212 +               GFS_BASIC_BLOCK_SHIFT;
31213 +       sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
31214 +       sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) /
31215 +               sizeof(uint64_t);
31216 +       sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_indirect)) /
31217 +               sizeof(uint64_t);
31218 +       sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
31219 +       sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
31220 +       sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
31221 +       sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
31222 +
31223 +       /*  Compute maximum reservation required to add a entry to a directory  */
31224 +
31225 +       hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS_DIR_MAX_DEPTH),
31226 +                            sdp->sd_jbsize);
31227 +
31228 +       ind_blocks = 0;
31229 +       for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
31230 +               tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs);
31231 +               ind_blocks += tmp_blocks;
31232 +       }
31233 +
31234 +       leaf_blocks = 2 + GFS_DIR_MAX_DEPTH;
31235 +
31236 +       sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
31237 +
31238 +       sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
31239 +       sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
31240 +       for (x = 2;; x++) {
31241 +               uint64_t d;
31242 +               uint32_t m;
31243 +               space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
31244 +               d = space;
31245 +               m = do_div(d, sdp->sd_inptrs);
31246 +
31247 +               if (d != sdp->sd_heightsize[x - 1] || m)
31248 +                       break;
31249 +               sdp->sd_heightsize[x] = space;
31250 +       }
31251 +       sdp->sd_max_height = x;
31252 +       GFS_ASSERT_SBD(sdp->sd_max_height <= GFS_MAX_META_HEIGHT, sdp,);
31253 +
31254 +       sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
31255 +       sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
31256 +       for (x = 2;; x++) {
31257 +               uint64_t d;
31258 +               uint32_t m;
31259 +               space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
31260 +               d = space;
31261 +               m = do_div(d, sdp->sd_inptrs);
31262 +
31263 +               if (d != sdp->sd_jheightsize[x - 1] || m)
31264 +                       break;
31265 +               sdp->sd_jheightsize[x] = space;
31266 +       }
31267 +       sdp->sd_max_jheight = x;
31268 +       GFS_ASSERT_SBD(sdp->sd_max_jheight <= GFS_MAX_META_HEIGHT, sdp,);
31269 +
31270 +       return 0;
31271 +}
31272 +
31273 +/**
31274 + * gfs_do_upgrade - upgrade a filesystem
31275 + * @sdp: The GFS superblock
31276 + *
31277 + */
31278 +
31279 +int
31280 +gfs_do_upgrade(struct gfs_sbd *sdp, struct gfs_glock *sb_gl)
31281 +{
31282 +       struct gfs_holder ji_gh, t_gh, j_gh;
31283 +       struct gfs_log_header lh;
31284 +       struct buffer_head *bh;
31285 +       unsigned int x;
31286 +       int error;
31287 +
31288 +       /*  If format numbers match exactly, we're done.  */
31289 +
31290 +       if (sdp->sd_sb.sb_fs_format == GFS_FORMAT_FS &&
31291 +           sdp->sd_sb.sb_multihost_format == GFS_FORMAT_MULTI) {
31292 +               printk("GFS: fsid=%s: no upgrade necessary\n",
31293 +                      sdp->sd_fsname);
31294 +               sdp->sd_args.ar_upgrade = FALSE;
31295 +               return 0;
31296 +       }
31297 +
31298 +       error = gfs_jindex_hold(sdp, &ji_gh);
31299 +       if (error)
31300 +               goto fail;
31301 +
31302 +       error = gfs_glock_nq_init(sdp->sd_trans_gl,
31303 +                                 LM_ST_EXCLUSIVE, GL_NOCACHE,
31304 +                                 &t_gh);
31305 +       if (error)
31306 +               goto fail_ji_relse;
31307 +
31308 +       if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
31309 +               printk("GFS: fsid=%s: can't upgrade: read-only FS\n",
31310 +                      sdp->sd_fsname);
31311 +               error = -EROFS;
31312 +               goto fail_gunlock_tr;
31313 +       }
31314 +
31315 +       for (x = 0; x < sdp->sd_journals; x++) {
31316 +               error = gfs_glock_nq_num(sdp,
31317 +                                        sdp->sd_jindex[x].ji_addr,
31318 +                                        &gfs_meta_glops, LM_ST_SHARED,
31319 +                                        LM_FLAG_TRY | GL_NOCACHE, &j_gh);
31320 +               switch (error) {
31321 +               case 0:
31322 +                       break;
31323 +
31324 +               case GLR_TRYFAILED:
31325 +                       printk("GFS: fsid=%s: journal %u is busy\n",
31326 +                              sdp->sd_fsname, x);
31327 +                       error = -EBUSY;
31328 +
31329 +               default:
31330 +                       goto fail_gunlock_tr;
31331 +               }
31332 +
31333 +               error = gfs_find_jhead(sdp, &sdp->sd_jindex[x],
31334 +                                      j_gh.gh_gl, &lh);
31335 +
31336 +               gfs_glock_dq_uninit(&j_gh);
31337 +
31338 +               if (error)
31339 +                       goto fail_gunlock_tr;
31340 +
31341 +               if (!(lh.lh_flags & GFS_LOG_HEAD_UNMOUNT) || lh.lh_last_dump) {
31342 +                       printk("GFS: fsid=%s: journal %u is busy\n",
31343 +                              sdp->sd_fsname, x);
31344 +                       error = -EBUSY;
31345 +                       goto fail_gunlock_tr;
31346 +               }
31347 +       }
31348 +
31349 +       /* We don't need to journal this change because we're changing
31350 +          only one sector of one block.  We definitely don't want to have
31351 +          the journaling code running at this point. */
31352 +
31353 +       error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, sb_gl,
31354 +                         DIO_START | DIO_WAIT, &bh);
31355 +       if (error)
31356 +               goto fail_gunlock_tr;
31357 +
31358 +       gfs_sb_in(&sdp->sd_sb, bh->b_data);
31359 +
31360 +       error = gfs_check_sb(sdp, &sdp->sd_sb, FALSE);
31361 +       GFS_ASSERT_SBD(!error, sdp,);
31362 +
31363 +       sdp->sd_sb.sb_fs_format = GFS_FORMAT_FS;
31364 +       sdp->sd_sb.sb_multihost_format = GFS_FORMAT_MULTI;
31365 +
31366 +       gfs_sb_out(&sdp->sd_sb, bh->b_data);
31367 +
31368 +       set_bit(GLF_DIRTY, &sb_gl->gl_flags);
31369 +       error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
31370 +
31371 +       brelse(bh);
31372 +
31373 +       gfs_glock_dq_uninit(&t_gh);
31374 +
31375 +       gfs_glock_dq_uninit(&ji_gh);
31376 +
31377 +       if (!error) {
31378 +               printk("GFS: fsid=%s: upgrade successful\n",
31379 +                      sdp->sd_fsname);
31380 +               sdp->sd_args.ar_upgrade = FALSE;
31381 +       }
31382 +
31383 +       return error;
31384 +
31385 + fail_gunlock_tr:
31386 +       gfs_glock_dq_uninit(&t_gh);
31387 +
31388 + fail_ji_relse:
31389 +       gfs_glock_dq_uninit(&ji_gh);
31390 +
31391 + fail:
31392 +       if (error == -EBUSY)
31393 +               printk("GFS: fsid=%s: can't upgrade: the FS is still busy or contains dirty journals\n",
31394 +                      sdp->sd_fsname);
31395 +       else
31396 +               printk("GFS: fsid=%s: can't upgrade: %d\n",
31397 +                      sdp->sd_fsname, error);
31398 +
31399 +       return error;
31400 +}
31401 +
31402 +/**
31403 + * clear_journalsi - Clear all the journal index information (without locking)
31404 + * @sdp: The GFS superblock
31405 + *
31406 + */
31407 +
31408 +static void
31409 +clear_journalsi(struct gfs_sbd *sdp)
31410 +{
31411 +       if (sdp->sd_jindex) {
31412 +               kfree(sdp->sd_jindex);
31413 +               sdp->sd_jindex = NULL;
31414 +       }
31415 +       sdp->sd_journals = 0;
31416 +}
31417 +
31418 +/**
31419 + * gfs_clear_journals - Clear all the journal index information
31420 + * @sdp: The GFS superblock
31421 + *
31422 + */
31423 +
31424 +void
31425 +gfs_clear_journals(struct gfs_sbd *sdp)
31426 +{
31427 +       down(&sdp->sd_jindex_lock);
31428 +       clear_journalsi(sdp);
31429 +       up(&sdp->sd_jindex_lock);
31430 +}
31431 +
31432 +/**
31433 + * gfs_ji_update - Update the journal index information
31434 + * @ip: The journal index inode
31435 + *
31436 + * Returns: 0 on success, error code otherwise
31437 + */
31438 +
31439 +static int
31440 +gfs_ji_update(struct gfs_inode *ip)
31441 +{
31442 +       struct gfs_sbd *sdp = ip->i_sbd;
31443 +       char buf[sizeof(struct gfs_jindex)];
31444 +       unsigned int j;
31445 +       int error;
31446 +
31447 +       GFS_ASSERT_SBD(!do_mod(ip->i_di.di_size, sizeof(struct gfs_jindex)),
31448 +                      sdp,);
31449 +
31450 +       clear_journalsi(sdp);
31451 +
31452 +       sdp->sd_jindex = gmalloc(ip->i_di.di_size);
31453 +       memset(sdp->sd_jindex, 0, ip->i_di.di_size);
31454 +
31455 +       for (j = 0;; j++) {
31456 +               error = gfs_internal_read(ip, buf,
31457 +                                         j * sizeof(struct gfs_jindex),
31458 +                                         sizeof(struct gfs_jindex));
31459 +               if (!error)
31460 +                       break;
31461 +               if (error != sizeof(struct gfs_jindex)) {
31462 +                       if (error > 0)
31463 +                               error = -EIO;
31464 +                       goto fail;
31465 +               }
31466 +
31467 +               gfs_jindex_in(sdp->sd_jindex + j, buf);
31468 +       }
31469 +
31470 +       GFS_ASSERT_SBD(j * sizeof(struct gfs_jindex) == ip->i_di.di_size,
31471 +                      sdp,);
31472 +
31473 +       sdp->sd_journals = j;
31474 +       sdp->sd_jiinode_vn = ip->i_gl->gl_vn;
31475 +
31476 +       return 0;
31477 +
31478 + fail:
31479 +       clear_journalsi(sdp);
31480 +       return error;
31481 +}
31482 +
31483 +/**
31484 + * gfs_jindex_hold - Grab a lock on the jindex
31485 + * @sdp: The GFS superblock
31486 + * @ji_gh: the holder for the jindex glock
31487 + *
31488 + * This is very similar to the gfs_rindex_hold() function, except that
31489 + * in general we hold the jindex lock for longer periods of time and
31490 + * we grab it far less frequently (in general) then the rgrp lock.
31491 + *
31492 + * Returns: 0 on success, error code otherwise
31493 + */
31494 +
31495 +int
31496 +gfs_jindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ji_gh)
31497 +{
31498 +       struct gfs_inode *ip = sdp->sd_jiinode;
31499 +       struct gfs_glock *gl = ip->i_gl;
31500 +       int error;
31501 +
31502 +       error = gfs_glock_nq_init(gl, LM_ST_SHARED, 0, ji_gh);
31503 +       if (error)
31504 +               return error;
31505 +
31506 +       if (sdp->sd_jiinode_vn != gl->gl_vn) {
31507 +               down(&sdp->sd_jindex_lock);
31508 +               if (sdp->sd_jiinode_vn != gl->gl_vn)
31509 +                       error = gfs_ji_update(ip);
31510 +               up(&sdp->sd_jindex_lock);
31511 +       }
31512 +
31513 +       if (error)
31514 +               gfs_glock_dq_uninit(ji_gh);
31515 +
31516 +       return error;
31517 +}
31518 +
31519 +/**
31520 + * gfs_get_jiinode - Read in the jindex inode for the superblock
31521 + * @sdp: The GFS superblock
31522 + *
31523 + * Returns: 0 on success, error code otherwise
31524 + */
31525 +
31526 +int
31527 +gfs_get_jiinode(struct gfs_sbd *sdp)
31528 +{
31529 +       struct gfs_holder ji_gh;
31530 +       int error;
31531 +
31532 +       error = gfs_glock_nq_num(sdp,
31533 +                                sdp->sd_sb.sb_jindex_di.no_formal_ino,
31534 +                                &gfs_inode_glops,
31535 +                                LM_ST_SHARED, GL_LOCAL_EXCL,
31536 +                                &ji_gh);
31537 +       if (error)
31538 +               return error;
31539 +
31540 +       error = gfs_inode_get(ji_gh.gh_gl, &sdp->sd_sb.sb_jindex_di,
31541 +                             CREATE, &sdp->sd_jiinode);
31542 +       if (!error) {
31543 +               sdp->sd_jiinode_vn = ji_gh.gh_gl->gl_vn - 1;
31544 +               set_bit(GLF_STICKY, &ji_gh.gh_gl->gl_flags);
31545 +       }
31546 +
31547 +       gfs_glock_dq_uninit(&ji_gh);
31548 +
31549 +       return error;
31550 +}
31551 +
31552 +/**
31553 + * gfs_get_riinode - Read in the rindex inode for the superblock
31554 + * @sdp: The GFS superblock
31555 + *
31556 + * Returns: 0 on success, error code otherwise
31557 + */
31558 +
31559 +int
31560 +gfs_get_riinode(struct gfs_sbd *sdp)
31561 +{
31562 +       struct gfs_holder ri_gh;
31563 +       int error;
31564 +
31565 +       error = gfs_glock_nq_num(sdp,
31566 +                                sdp->sd_sb.sb_rindex_di.no_formal_ino,
31567 +                                &gfs_inode_glops,
31568 +                                LM_ST_SHARED, GL_LOCAL_EXCL,
31569 +                                &ri_gh);
31570 +       if (error)
31571 +               return error;
31572 +
31573 +       error = gfs_inode_get(ri_gh.gh_gl, &sdp->sd_sb.sb_rindex_di,
31574 +                             CREATE, &sdp->sd_riinode);
31575 +       if (!error) {
31576 +               sdp->sd_riinode_vn = ri_gh.gh_gl->gl_vn - 1;
31577 +               set_bit(GLF_STICKY, &ri_gh.gh_gl->gl_flags);
31578 +       }
31579 +
31580 +       gfs_glock_dq_uninit(&ri_gh);
31581 +
31582 +       return error;
31583 +}
31584 +
31585 +/**
31586 + * gfs_get_rootinode - Read in the root inode
31587 + * @sdp: The GFS superblock
31588 + *
31589 + * Returns: 0 on success, error code otherwise
31590 + */
31591 +
31592 +int
31593 +gfs_get_rootinode(struct gfs_sbd *sdp)
31594 +{
31595 +       struct gfs_holder i_gh;
31596 +       int error;
31597 +
31598 +       error = gfs_glock_nq_num(sdp,
31599 +                                sdp->sd_sb.sb_root_di.no_formal_ino,
31600 +                                &gfs_inode_glops,
31601 +                                LM_ST_SHARED, GL_LOCAL_EXCL,
31602 +                                &i_gh);
31603 +       if (error)
31604 +               return error;
31605 +
31606 +       error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_root_di,
31607 +                             CREATE, &sdp->sd_rooti);
31608 +
31609 +       gfs_glock_dq_uninit(&i_gh);
31610 +
31611 +       return error;
31612 +}
31613 +
31614 +/**
31615 + * gfs_get_qinode - Read in the quota inode
31616 + * @sdp: The GFS superblock
31617 + *
31618 + * Returns: 0 on success, error code otherwise
31619 + */
31620 +
31621 +int
31622 +gfs_get_qinode(struct gfs_sbd *sdp)
31623 +{
31624 +       struct gfs_holder i_gh;
31625 +       int error;
31626 +
31627 +       if (!sdp->sd_sb.sb_quota_di.no_formal_ino) {
31628 +               error = gfs_alloc_qinode(sdp);
31629 +               if (error)
31630 +                       return error;
31631 +       }
31632 +
31633 +       error = gfs_glock_nq_num(sdp,
31634 +                                sdp->sd_sb.sb_quota_di.no_formal_ino,
31635 +                                &gfs_inode_glops,
31636 +                                LM_ST_SHARED, GL_LOCAL_EXCL,
31637 +                                &i_gh);
31638 +       if (error)
31639 +               return error;
31640 +
31641 +       error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_quota_di,
31642 +                             CREATE, &sdp->sd_qinode);
31643 +
31644 +       gfs_glock_dq_uninit(&i_gh);
31645 +
31646 +       return error;
31647 +}
31648 +
31649 +/**
31650 + * gfs_get_linode - Read in the quota inode
31651 + * @sdp: The GFS superblock
31652 + *
31653 + * Returns: 0 on success, error code otherwise
31654 + */
31655 +
31656 +int
31657 +gfs_get_linode(struct gfs_sbd *sdp)
31658 +{
31659 +       struct gfs_holder i_gh;
31660 +       int error;
31661 +
31662 +       if (!sdp->sd_sb.sb_license_di.no_formal_ino) {
31663 +               error = gfs_alloc_linode(sdp);
31664 +               if (error)
31665 +                       return error;
31666 +       }
31667 +
31668 +       error = gfs_glock_nq_num(sdp,
31669 +                                sdp->sd_sb.sb_license_di.no_formal_ino,
31670 +                                &gfs_inode_glops,
31671 +                                LM_ST_SHARED, GL_LOCAL_EXCL,
31672 +                                &i_gh);
31673 +       if (error)
31674 +               return error;
31675 +
31676 +       error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_license_di,
31677 +                             CREATE, &sdp->sd_linode);
31678 +
31679 +       gfs_glock_dq_uninit(&i_gh);
31680 +
31681 +       return error;
31682 +}
31683 +
31684 +/**
31685 + * gfs_make_fs_rw - Turn a RO FS into a RW one
31686 + * @sdp: the filesystem
31687 + *
31688 + * Returns: 0 on success, -EXXX on failure
31689 + */
31690 +
31691 +int
31692 +gfs_make_fs_rw(struct gfs_sbd *sdp)
31693 +{
31694 +       struct gfs_glock *j_gl = sdp->sd_journal_gh.gh_gl;
31695 +       struct gfs_holder t_gh;
31696 +       struct gfs_log_header head;
31697 +       int error;
31698 +
31699 +       error = gfs_glock_nq_init(sdp->sd_trans_gl,
31700 +                                 LM_ST_SHARED,
31701 +                                 GL_LOCAL_EXCL | GL_EXACT,
31702 +                                 &t_gh);
31703 +       if (error)
31704 +               return error;
31705 +
31706 +       j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
31707 +
31708 +       error = gfs_find_jhead(sdp, &sdp->sd_jdesc, j_gl, &head);
31709 +       if (error)
31710 +               goto fail;
31711 +
31712 +       GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,);
31713 +
31714 +       /*  Initialize some head of the log stuff  */
31715 +       sdp->sd_sequence = head.lh_sequence;
31716 +       sdp->sd_log_head = head.lh_first + 1;
31717 +
31718 +       error = gfs_recover_dump(sdp);
31719 +       if (error)
31720 +               goto fail;
31721 +
31722 +       set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
31723 +       clear_bit(SDF_ROFS, &sdp->sd_flags);
31724 +
31725 +       set_bit(GLF_DIRTY, &j_gl->gl_flags);
31726 +       gfs_log_dump(sdp, TRUE);
31727 +
31728 +       gfs_glock_dq_uninit(&t_gh);
31729 +
31730 +       return 0;
31731 +
31732 + fail:
31733 +       t_gh.gh_flags |= GL_NOCACHE;
31734 +       gfs_glock_dq_uninit(&t_gh);
31735 +
31736 +       return error;
31737 +}
31738 +
31739 +/**
31740 + * gfs_make_fs_ro - Turn a RW FS into a RO one
31741 + * @sdp: the filesystem
31742 + *
31743 + * Returns: 0 on success, -EXXX on failure
31744 + */
31745 +
31746 +int
31747 +gfs_make_fs_ro(struct gfs_sbd *sdp)
31748 +{
31749 +       struct gfs_holder t_gh;
31750 +       int error;
31751 +
31752 +       error = gfs_glock_nq_init(sdp->sd_trans_gl,
31753 +                                 LM_ST_SHARED,
31754 +                                 GL_LOCAL_EXCL | GL_EXACT | GL_NOCACHE,
31755 +                                 &t_gh);
31756 +       if (error)
31757 +               return error;
31758 +
31759 +       gfs_sync_meta(sdp);
31760 +       gfs_log_dump(sdp, TRUE);
31761 +
31762 +       error = gfs_log_shutdown(sdp);
31763 +       if (error)
31764 +               gfs_io_error(sdp);
31765 +
31766 +       set_bit(SDF_ROFS, &sdp->sd_flags);
31767 +       clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
31768 +
31769 +       gfs_glock_dq_uninit(&t_gh);
31770 +
31771 +       gfs_unlinked_cleanup(sdp);
31772 +       gfs_quota_cleanup(sdp);
31773 +
31774 +       return error;
31775 +}
31776 +
31777 +/**
31778 + * stat_gfs_async - Stat a filesystem using asynchronous locking
31779 + * @sdp: the filesystem
31780 + * @usage: the usage info that will be returned
31781 + * @interruptible: TRUE if we should look for signals.
31782 + *
31783 + * Any error (other than a signal) will cause this routine to fall back
31784 + * to the synchronous version.
31785 + *
31786 + * This really shouldn't busy wait like this.
31787 + *
31788 + * Returns: 0 on success, -EXXX on failure
31789 + */
31790 +
31791 +static int
31792 +stat_gfs_async(struct gfs_sbd *sdp, struct gfs_usage *usage, int interruptible)
31793 +{
31794 +       struct gfs_rgrpd *rgd_next = gfs_rgrpd_get_first(sdp), *rgd;
31795 +       struct gfs_holder *gha, *gh;
31796 +       struct gfs_rgrp_lvb *rb;
31797 +       unsigned int slots = sdp->sd_tune.gt_statfs_slots;
31798 +       unsigned int x;
31799 +       int done;
31800 +       int error = 0, err;
31801 +
31802 +       gha = gmalloc(slots * sizeof(struct gfs_holder));
31803 +       memset(gha, 0, slots * sizeof(struct gfs_holder));
31804 +
31805 +       for (;;) {
31806 +               done = TRUE;
31807 +
31808 +               for (x = 0; x < slots; x++) {
31809 +                       gh = gha + x;
31810 +
31811 +                       if (gh->gh_gl && gfs_glock_poll(gh)) {
31812 +                               err = gfs_glock_wait(gh);
31813 +                               if (err) {
31814 +                                       gfs_holder_uninit(gh);
31815 +                                       error = err;
31816 +                               } else {
31817 +                                       rgd = gl2rgd(gh->gh_gl);
31818 +
31819 +                                       rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb;
31820 +                                       if (gfs32_to_cpu(rb->rb_magic) == GFS_MAGIC &&
31821 +                                           !test_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags)) {
31822 +                                               usage->gu_total_blocks += rgd->rd_ri.ri_data;
31823 +                                               usage->gu_free += gfs32_to_cpu(rb->rb_free);
31824 +                                               usage->gu_used_dinode += gfs32_to_cpu(rb->rb_useddi);
31825 +                                               usage->gu_free_dinode += gfs32_to_cpu(rb->rb_freedi);
31826 +                                               usage->gu_used_meta += gfs32_to_cpu(rb->rb_usedmeta);
31827 +                                               usage->gu_free_meta += gfs32_to_cpu(rb->rb_freemeta);
31828 +                                       } else
31829 +                                               error = -EINVAL;
31830 +
31831 +                                       gfs_glock_dq_uninit(gh);
31832 +                               }
31833 +                       }
31834 +
31835 +                       if (gh->gh_gl)
31836 +                               done = FALSE;
31837 +                       else if (rgd_next && !error) {
31838 +                               gfs_glock_nq_init(rgd_next->rd_gl,
31839 +                                                 LM_ST_SHARED,
31840 +                                                 GL_LOCAL_EXCL | GL_SKIP | GL_ASYNC,
31841 +                                                 gh);
31842 +                               rgd_next = gfs_rgrpd_get_next(rgd_next);
31843 +                               done = FALSE;
31844 +                       }
31845 +
31846 +                       if (interruptible && signal_pending(current))
31847 +                               error = -ERESTARTSYS;
31848 +               }
31849 +
31850 +               if (done)
31851 +                       break;
31852 +
31853 +               yield();
31854 +       }
31855 +
31856 +       kfree(gha);
31857 +
31858 +       return error;
31859 +}
31860 +
31861 +/**
31862 + * gfs_stat_gfs - Do a statfs
31863 + * @sdp: the filesystem
31864 + * @usage: the usage structure
31865 + * @interruptible:  Stop if there is a signal pending
31866 + *
31867 + * Returns: 0 on success, -EXXX on failure
31868 + */
31869 +
31870 +int
31871 +gfs_stat_gfs(struct gfs_sbd *sdp, struct gfs_usage *usage, int interruptible)
31872 +{
31873 +       struct gfs_holder ri_gh, rgd_gh;
31874 +       struct gfs_rgrpd *rgd;
31875 +       struct gfs_rgrp_lvb *rb;
31876 +       int error;
31877 +
31878 +       memset(usage, 0, sizeof(struct gfs_usage));
31879 +       usage->gu_block_size = sdp->sd_sb.sb_bsize;
31880 +
31881 +       error = gfs_rindex_hold(sdp, &ri_gh);
31882 +       if (error)
31883 +               return error;
31884 +
31885 +       if (GFS_ASYNC_LM(sdp)) {
31886 +               error = stat_gfs_async(sdp, usage, interruptible);
31887 +               if (!error || error == -ERESTARTSYS)
31888 +                       goto out;
31889 +
31890 +               memset(usage, 0, sizeof(struct gfs_usage));
31891 +               usage->gu_block_size = sdp->sd_sb.sb_bsize;
31892 +       }
31893 +
31894 +       for (rgd = gfs_rgrpd_get_first(sdp);
31895 +            rgd;
31896 +            rgd = gfs_rgrpd_get_next(rgd)) {
31897 +               for (;;) {
31898 +                       error = gfs_glock_nq_init(rgd->rd_gl,
31899 +                                                 LM_ST_SHARED,
31900 +                                                 GL_LOCAL_EXCL | GL_SKIP,
31901 +                                                 &rgd_gh);
31902 +                       if (error)
31903 +                               goto out;
31904 +
31905 +                       rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb;
31906 +                       if (gfs32_to_cpu(rb->rb_magic) == GFS_MAGIC &&
31907 +                           !test_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags)) {
31908 +                               usage->gu_total_blocks += rgd->rd_ri.ri_data;
31909 +                               usage->gu_free += gfs32_to_cpu(rb->rb_free);
31910 +                               usage->gu_used_dinode += gfs32_to_cpu(rb->rb_useddi);
31911 +                               usage->gu_free_dinode += gfs32_to_cpu(rb->rb_freedi);
31912 +                               usage->gu_used_meta += gfs32_to_cpu(rb->rb_usedmeta);
31913 +                               usage->gu_free_meta += gfs32_to_cpu(rb->rb_freemeta);
31914 +
31915 +                               gfs_glock_dq_uninit(&rgd_gh);
31916 +
31917 +                               break;
31918 +                       } else {
31919 +                               gfs_glock_dq_uninit(&rgd_gh);
31920 +
31921 +                               error = gfs_rgrp_lvb_init(rgd);
31922 +                               if (error)
31923 +                                       goto out;
31924 +                       }
31925 +               }
31926 +
31927 +               if (interruptible && signal_pending(current)) {
31928 +                       error = -ERESTARTSYS;
31929 +                       goto out;
31930 +               }
31931 +       }
31932 +
31933 + out:
31934 +       gfs_glock_dq_uninit(&ri_gh);
31935 +
31936 +       return error;
31937 +}
31938 +
31939 +/**
31940 + * gfs_lock_fs_check_clean - Stop all writes to the FS and check that all journals are clean
31941 + * @sdp: the file system
31942 + * @state: the state to put the transaction lock into
31943 + * @t_gh: the hold on the transaction lock
31944 + *
31945 + * Returns: 0 on success, -EXXX on error
31946 + */
31947 +
31948 +int
31949 +gfs_lock_fs_check_clean(struct gfs_sbd *sdp, unsigned int state,
31950 +                       struct gfs_holder *t_gh)
31951 +{
31952 +       struct gfs_holder ji_gh, cl_gh;
31953 +       struct gfs_log_header lh;
31954 +       unsigned int x;
31955 +       int error;
31956 +
31957 +       error = gfs_jindex_hold(sdp, &ji_gh);
31958 +       if (error)
31959 +               return error;
31960 +
31961 +       error = gfs_glock_nq_num(sdp,
31962 +                                GFS_CRAP_LOCK, &gfs_meta_glops,
31963 +                                LM_ST_SHARED, GL_NOCACHE,
31964 +                                &cl_gh);
31965 +       if (error)
31966 +               goto fail;
31967 +
31968 +       error = gfs_glock_nq_init(sdp->sd_trans_gl, state,
31969 +                                 LM_FLAG_PRIORITY | GL_EXACT | GL_NOCACHE,
31970 +                                 t_gh);
31971 +       if (error)
31972 +               goto fail_gunlock_craplock;
31973 +
31974 +       for (x = 0; x < sdp->sd_journals; x++) {
31975 +               error = gfs_find_jhead(sdp, &sdp->sd_jindex[x],
31976 +                                      cl_gh.gh_gl, &lh);
31977 +               if (error)
31978 +                       goto fail_gunlock_trans;
31979 +
31980 +               if (!(lh.lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
31981 +                       error = -EBUSY;
31982 +                       goto fail_gunlock_trans;
31983 +               }
31984 +       }
31985 +
31986 +       gfs_glock_dq_uninit(&cl_gh);
31987 +       gfs_glock_dq_uninit(&ji_gh);
31988 +
31989 +       return 0;
31990 +
31991 + fail_gunlock_trans:
31992 +       gfs_glock_dq_uninit(t_gh);
31993 +
31994 + fail_gunlock_craplock:
31995 +       gfs_glock_dq_uninit(&cl_gh);
31996 +
31997 + fail:
31998 +       gfs_glock_dq_uninit(&ji_gh);
31999 +
32000 +       return error;
32001 +}
32002 +
32003 +/**
32004 + * gfs_freeze_fs - freezes the file system
32005 + * @sdp: the file system
32006 + *
32007 + * This function flushes data and meta data for all machines by
32008 + * aquiring the transaction log exclusively.  All journals are
32009 + * ensured to be in a clean state as well.
32010 + *
32011 + * Returns: 0 on success, -EXXX on error
32012 + */
32013 +
32014 +int
32015 +gfs_freeze_fs(struct gfs_sbd *sdp)
32016 +{
32017 +       int error = 0;
32018 +
32019 +       down(&sdp->sd_freeze_lock);
32020 +
32021 +       if (!sdp->sd_freeze_count++) {
32022 +               error = gfs_lock_fs_check_clean(sdp, LM_ST_DEFERRED,
32023 +                                               &sdp->sd_freeze_gh);
32024 +               if (error)
32025 +                       sdp->sd_freeze_count--;
32026 +               else
32027 +                       sdp->sd_freeze_gh.gh_owner = NULL;
32028 +       }
32029 +
32030 +       up(&sdp->sd_freeze_lock);
32031 +
32032 +       return error;
32033 +}
32034 +
32035 +/**
32036 + * gfs_unfreeze_fs - unfreezes the file system
32037 + * @sdp: the file system
32038 + *
32039 + * This function allows the file system to proceed by unlocking
32040 + * the exclusively held transaction lock.  Other GFS nodes are
32041 + * now free to acquire the lock shared and go on with their lives.
32042 + *
32043 + */
32044 +
32045 +void
32046 +gfs_unfreeze_fs(struct gfs_sbd *sdp)
32047 +{
32048 +       down(&sdp->sd_freeze_lock);
32049 +
32050 +       if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
32051 +               gfs_glock_dq_uninit(&sdp->sd_freeze_gh);
32052 +
32053 +       up(&sdp->sd_freeze_lock);
32054 +}
32055 diff -urN linux-orig/fs/gfs/super.h linux-patched/fs/gfs/super.h
32056 --- linux-orig/fs/gfs/super.h   1969-12-31 18:00:00.000000000 -0600
32057 +++ linux-patched/fs/gfs/super.h        2004-06-30 13:27:49.359707419 -0500
32058 @@ -0,0 +1,53 @@
32059 +/******************************************************************************
32060 +*******************************************************************************
32061 +**
32062 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
32063 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
32064 +**
32065 +**  This copyrighted material is made available to anyone wishing to use,
32066 +**  modify, copy, or redistribute it subject to the terms and conditions
32067 +**  of the GNU General Public License v.2.
32068 +**
32069 +*******************************************************************************
32070 +******************************************************************************/
32071 +
32072 +#ifndef __SUPER_DOT_H__
32073 +#define __SUPER_DOT_H__
32074 +
32075 +void gfs_init_tune_data(struct gfs_sbd *sdp);
32076 +
32077 +int gfs_check_sb(struct gfs_sbd *sdp, struct gfs_sb *sb, int silent);
32078 +int gfs_read_sb(struct gfs_sbd *sdp, struct gfs_glock *gl, int silent);
32079 +int gfs_do_upgrade(struct gfs_sbd *sdp, struct gfs_glock *gl_sb);
32080 +
32081 +static __inline__ unsigned int
32082 +gfs_num_journals(struct gfs_sbd *sdp)
32083 +{
32084 +       unsigned int num;
32085 +       down(&sdp->sd_jindex_lock);
32086 +       num = sdp->sd_journals;
32087 +       up(&sdp->sd_jindex_lock);
32088 +       return num;
32089 +}
32090 +
32091 +int gfs_jindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ji_gh);
32092 +void gfs_clear_journals(struct gfs_sbd *sdp);
32093 +
32094 +int gfs_get_jiinode(struct gfs_sbd *sdp);
32095 +int gfs_get_riinode(struct gfs_sbd *sdp);
32096 +int gfs_get_rootinode(struct gfs_sbd *sdp);
32097 +int gfs_get_qinode(struct gfs_sbd *sdp);
32098 +int gfs_get_linode(struct gfs_sbd *sdp);
32099 +
32100 +int gfs_make_fs_rw(struct gfs_sbd *sdp);
32101 +int gfs_make_fs_ro(struct gfs_sbd *sdp);
32102 +
32103 +int gfs_stat_gfs(struct gfs_sbd *sdp, struct gfs_usage *usage,
32104 +                int interruptible);
32105 +
32106 +int gfs_lock_fs_check_clean(struct gfs_sbd *sdp, unsigned int state,
32107 +                           struct gfs_holder *t_gh);
32108 +int gfs_freeze_fs(struct gfs_sbd *sdp);
32109 +void gfs_unfreeze_fs(struct gfs_sbd *sdp);
32110 +
32111 +#endif /* __SUPER_DOT_H__ */
32112 diff -urN linux-orig/fs/gfs/trans.c linux-patched/fs/gfs/trans.c
32113 --- linux-orig/fs/gfs/trans.c   1969-12-31 18:00:00.000000000 -0600
32114 +++ linux-patched/fs/gfs/trans.c        2004-06-30 13:27:49.359707419 -0500
32115 @@ -0,0 +1,410 @@
32116 +/******************************************************************************
32117 +*******************************************************************************
32118 +**
32119 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
32120 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
32121 +**
32122 +**  This copyrighted material is made available to anyone wishing to use,
32123 +**  modify, copy, or redistribute it subject to the terms and conditions
32124 +**  of the GNU General Public License v.2.
32125 +**
32126 +*******************************************************************************
32127 +******************************************************************************/
32128 +
32129 +#include <linux/sched.h>
32130 +#include <linux/slab.h>
32131 +#include <linux/smp_lock.h>
32132 +#include <linux/spinlock.h>
32133 +#include <asm/semaphore.h>
32134 +#include <linux/completion.h>
32135 +#include <linux/buffer_head.h>
32136 +
32137 +#include "gfs.h"
32138 +#include "dio.h"
32139 +#include "glock.h"
32140 +#include "log.h"
32141 +#include "lops.h"
32142 +#include "quota.h"
32143 +#include "trans.h"
32144 +#include "unlinked.h"
32145 +
32146 +/**
32147 + * gfs_trans_print - Print a transaction to the console
32148 + * @sdp: the filesystem
32149 + * @tr: The GFS transaction
32150 + * @where: Situation of transaction
32151 + *
32152 + */
32153 +
32154 +void
32155 +gfs_trans_print(struct gfs_sbd *sdp, struct gfs_trans *tr, unsigned int where)
32156 +{
32157 +       struct gfs_log_element *le;
32158 +       struct list_head *tmp, *head;
32159 +       unsigned int mblks = 0, eblks = 0;
32160 +
32161 +       LO_TRANS_SIZE(sdp, tr, &mblks, &eblks, NULL, NULL);
32162 +
32163 +       printk("Transaction:  (%s, %u)\n", tr->tr_file, tr->tr_line);
32164 +       printk("  tr_mblks_asked = %u, tr_eblks_asked = %u, tr_seg_reserved = %u\n",
32165 +              tr->tr_mblks_asked, tr->tr_eblks_asked, tr->tr_seg_reserved);
32166 +       printk("  mblks = %u, eblks = %u\n", mblks, eblks);
32167 +       printk("  tr_flags = 0x%.8X\n", tr->tr_flags);
32168 +
32169 +       for (head = &tr->tr_elements, tmp = head->next;
32170 +            tmp != head;
32171 +            tmp = tmp->next) {
32172 +               le = list_entry(tmp, struct gfs_log_element, le_list);
32173 +               LO_PRINT(sdp, le, where);
32174 +       }
32175 +
32176 +       printk("End Trans\n");
32177 +}
32178 +
32179 +/**
32180 + * gfs_trans_begin_i - Perpare to start a transaction
32181 + * @sdp: The GFS superblock
32182 + * @meta_blocks: Reserve this many metadata blocks in the log
32183 + * @extra_blocks: Number of non-metadata blocks to reserve
32184 + *
32185 + * Allocate the struct gfs_trans struct.  Do in-place and
32186 + * log reservations.
32187 + *
32188 + * Returns: 0 on success, -EXXX on failure
32189 + */
32190 +
32191 +int
32192 +gfs_trans_begin_i(struct gfs_sbd *sdp,
32193 +                 unsigned int meta_blocks, unsigned int extra_blocks,
32194 +                 char *file, unsigned int line)
32195 +{
32196 +       struct gfs_trans *tr;
32197 +       unsigned int blocks;
32198 +       int error;
32199 +
32200 +       tr = gmalloc(sizeof(struct gfs_trans));
32201 +       memset(tr, 0, sizeof(struct gfs_trans));
32202 +
32203 +       INIT_LIST_HEAD(&tr->tr_elements);
32204 +       INIT_LIST_HEAD(&tr->tr_free_bufs);
32205 +       INIT_LIST_HEAD(&tr->tr_free_bmem);
32206 +       INIT_LIST_HEAD(&tr->tr_bufs);
32207 +       INIT_LIST_HEAD(&tr->tr_ail_bufs);
32208 +
32209 +       tr->tr_file = file;
32210 +       tr->tr_line = line;
32211 +       tr->tr_t_gh = gfs_holder_get(sdp->sd_trans_gl, LM_ST_SHARED, 0);
32212 +
32213 +       error = gfs_glock_nq(tr->tr_t_gh);
32214 +       if (error)
32215 +               goto fail;
32216 +
32217 +       if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
32218 +               tr->tr_t_gh->gh_flags |= GL_NOCACHE;
32219 +               error = -EROFS;
32220 +               goto fail_gunlock;
32221 +       }
32222 +
32223 +       /*  Do log reservation  */
32224 +
32225 +       tr->tr_mblks_asked = meta_blocks;
32226 +       tr->tr_eblks_asked = extra_blocks;
32227 +
32228 +       blocks = 1;
32229 +       if (meta_blocks)
32230 +               blocks += gfs_struct2blk(sdp, meta_blocks,
32231 +                                        sizeof(struct gfs_block_tag)) +
32232 +                       meta_blocks;
32233 +       blocks += extra_blocks;
32234 +       tr->tr_seg_reserved = gfs_blk2seg(sdp, blocks);
32235 +
32236 +       error = gfs_log_reserve(sdp, tr->tr_seg_reserved, FALSE);
32237 +       if (error)
32238 +               goto fail_gunlock;
32239 +
32240 +       GFS_ASSERT_SBD(!current_transaction, sdp,);
32241 +       current_transaction = tr;
32242 +
32243 +       return 0;
32244 +
32245 + fail_gunlock:
32246 +       gfs_glock_dq(tr->tr_t_gh);
32247 +
32248 + fail:
32249 +       gfs_holder_put(tr->tr_t_gh);
32250 +       kfree(tr);
32251 +
32252 +       return error;
32253 +}
32254 +
32255 +/**
32256 + * gfs_trans_end - End a transaction
32257 + * @sdp: The GFS superblock
32258 + *
32259 + * If buffers were actually added to the transaction,
32260 + * commit it.
32261 + */
32262 +
32263 +void
32264 +gfs_trans_end(struct gfs_sbd *sdp)
32265 +{
32266 +       struct gfs_trans *tr;
32267 +       struct gfs_holder *t_gh;
32268 +       struct list_head *tmp, *head;
32269 +       struct gfs_log_element *le;
32270 +
32271 +       tr = current_transaction;
32272 +       GFS_ASSERT_SBD(tr, sdp,);
32273 +       current_transaction = NULL;
32274 +
32275 +       t_gh = tr->tr_t_gh;
32276 +       tr->tr_t_gh = NULL;
32277 +
32278 +       if (list_empty(&tr->tr_elements)) {
32279 +               gfs_log_release(sdp, tr->tr_seg_reserved);
32280 +               kfree(tr);
32281 +
32282 +               gfs_glock_dq(t_gh);
32283 +               gfs_holder_put(t_gh);
32284 +
32285 +               return;
32286 +       }
32287 +
32288 +       for (head = &tr->tr_elements, tmp = head->next;
32289 +            tmp != head;
32290 +            tmp = tmp->next) {
32291 +               le = list_entry(tmp, struct gfs_log_element, le_list);
32292 +               LO_TRANS_END(sdp, le);
32293 +       }
32294 +
32295 +       gfs_log_commit(sdp, tr);
32296 +
32297 +       gfs_glock_dq(t_gh);
32298 +       gfs_holder_put(t_gh);
32299 +
32300 +       if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
32301 +               gfs_log_flush(sdp);
32302 +}
32303 +
32304 +/**
32305 + * gfs_trans_add_gl - Add a glock to a transaction
32306 + * @gl: the glock
32307 + *
32308 + * Add the given glock to this process's transaction
32309 + */
32310 +
32311 +void
32312 +gfs_trans_add_gl(struct gfs_glock *gl)
32313 +{
32314 +       if (!gl->gl_new_le.le_trans) {
32315 +               GFS_ASSERT_GLOCK(gfs_glock_is_locked_by_me(gl) &&
32316 +                                gfs_glock_is_held_excl(gl), gl,);
32317 +               gfs_glock_hold(gl); /* Released in glock_trans_end() */
32318 +
32319 +               set_bit(GLF_DIRTY, &gl->gl_flags);
32320 +
32321 +               LO_ADD(gl->gl_sbd, &gl->gl_new_le);
32322 +               gl->gl_new_le.le_trans->tr_num_gl++;
32323 +       }
32324 +}
32325 +
32326 +/**
32327 + * gfs_trans_add_bh - Add a buffer to the current transaction
32328 + * @gl: the glock the buffer belongs to
32329 + * @bh: The buffer to add
32330 + *
32331 + * Add a buffer to the current transaction.  The glock for the buffer
32332 + * should be held.  This pins the buffer as well.
32333 + *
32334 + * Call this as many times as you want during transaction formation.
32335 + * It only does its work once.
32336 + *
32337 + */
32338 +
32339 +void
32340 +gfs_trans_add_bh(struct gfs_glock *gl, struct buffer_head *bh)
32341 +{
32342 +       struct gfs_sbd *sdp = gl->gl_sbd;
32343 +       struct gfs_bufdata *bd;
32344 +
32345 +       bd = bh2bd(bh);
32346 +       if (!bd) {
32347 +               gfs_attach_bufdata(bh, gl);
32348 +               bd = bh2bd(bh);
32349 +       }
32350 +
32351 +       if (bd->bd_new_le.le_trans)
32352 +               return;
32353 +
32354 +       gfs_meta_check(sdp, bh);
32355 +
32356 +       GFS_ASSERT_GLOCK(bd->bd_gl == gl, gl,);
32357 +
32358 +       if (!gl->gl_new_le.le_trans)
32359 +               gfs_trans_add_gl(gl);
32360 +
32361 +       gfs_dpin(sdp, bh);
32362 +
32363 +       LO_ADD(sdp, &bd->bd_new_le);
32364 +       bd->bd_new_le.le_trans->tr_num_buf++;
32365 +}
32366 +
32367 +/**
32368 + * gfs_trans_add_unlinked - Add a unlinked/dealloced tag to the current transaction
32369 + * @sdp: the filesystem
32370 + * @type: the type of entry
32371 + * @inum: the inode number
32372 + *
32373 + * Returns: the unlinked structure
32374 + */
32375 +
32376 +struct gfs_unlinked *
32377 +gfs_trans_add_unlinked(struct gfs_sbd *sdp, unsigned int type,
32378 +                      struct gfs_inum *inum)
32379 +{
32380 +       struct gfs_unlinked *ul;
32381 +
32382 +       ul = gfs_unlinked_get(sdp, inum, CREATE);
32383 +
32384 +       LO_ADD(sdp, &ul->ul_new_le);
32385 +
32386 +       switch (type) {
32387 +       case GFS_LOG_DESC_IUL:
32388 +               set_bit(ULF_NEW_UL, &ul->ul_flags);
32389 +               ul->ul_new_le.le_trans->tr_num_iul++;
32390 +               break;
32391 +       case GFS_LOG_DESC_IDA:
32392 +               clear_bit(ULF_NEW_UL, &ul->ul_flags);
32393 +               ul->ul_new_le.le_trans->tr_num_ida++;
32394 +               break;
32395 +       default:
32396 +               GFS_ASSERT_SBD(FALSE, sdp,);
32397 +               break;
32398 +       }
32399 +
32400 +       return ul;
32401 +}
32402 +
32403 +/**
32404 + * gfs_trans_add_quota - Add quota changes to a transaction
32405 + * @sdp: the filesystem
32406 + * @change: The number of blocks allocated (positive) or freed (negative)
32407 + * @uid: the user ID doing the change
32408 + * @gid: the group ID doing the change
32409 + *
32410 + */
32411 +
32412 +void
32413 +gfs_trans_add_quota(struct gfs_sbd *sdp, int64_t change,
32414 +                   uint32_t uid, uint32_t gid)
32415 +{
32416 +       struct gfs_trans *tr;
32417 +       struct list_head *tmp, *head, *next;
32418 +       struct gfs_log_element *le;
32419 +       struct gfs_quota_le *ql;
32420 +       int found_uid, found_gid;
32421 +       int error;
32422 +
32423 +       if (!sdp->sd_tune.gt_quota_account)
32424 +               return;
32425 +
32426 +       GFS_ASSERT_SBD(change, sdp,);
32427 +
32428 +       found_uid = (uid == NO_QUOTA_CHANGE);
32429 +       found_gid = (gid == NO_QUOTA_CHANGE);
32430 +
32431 +       GFS_ASSERT_SBD(!found_uid || !found_gid, sdp,);
32432 +
32433 +       tr = current_transaction;
32434 +       GFS_ASSERT_SBD(tr, sdp,);
32435 +
32436 +       for (head = &tr->tr_elements, tmp = head->next, next = tmp->next;
32437 +            tmp != head;
32438 +            tmp = next, next = next->next) {
32439 +               le = list_entry(tmp, struct gfs_log_element, le_list);
32440 +               if (le->le_ops != &gfs_quota_lops)
32441 +                       continue;
32442 +
32443 +               ql = container_of(le, struct gfs_quota_le, ql_le);
32444 +
32445 +               if (test_bit(QDF_USER, &ql->ql_data->qd_flags)) {
32446 +                       if (ql->ql_data->qd_id == uid) {
32447 +                               ql->ql_change += change;
32448 +
32449 +                               spin_lock(&sdp->sd_quota_lock);
32450 +                               ql->ql_data->qd_change_new += change;
32451 +                               spin_unlock(&sdp->sd_quota_lock);
32452 +
32453 +                               list_del(&le->le_list);
32454 +
32455 +                               if (ql->ql_change)
32456 +                                       list_add(&le->le_list,
32457 +                                                &tr->tr_elements);
32458 +                               else {
32459 +                                       gfs_quota_put(sdp, ql->ql_data);
32460 +                                       kfree(ql);
32461 +                                       tr->tr_num_q--;
32462 +                               }
32463 +
32464 +                               GFS_ASSERT_SBD(!found_uid, sdp,);
32465 +                               found_uid = TRUE;
32466 +                               if (found_gid)
32467 +                                       break;
32468 +                       }
32469 +               } else {
32470 +                       if (ql->ql_data->qd_id == gid) {
32471 +                               ql->ql_change += change;
32472 +
32473 +                               spin_lock(&sdp->sd_quota_lock);
32474 +                               ql->ql_data->qd_change_new += change;
32475 +                               spin_unlock(&sdp->sd_quota_lock);
32476 +
32477 +                               list_del(&le->le_list);
32478 +
32479 +                               if (ql->ql_change)
32480 +                                       list_add(&le->le_list,
32481 +                                                &tr->tr_elements);
32482 +                               else {
32483 +                                       gfs_quota_put(sdp, ql->ql_data);
32484 +                                       kfree(ql);
32485 +                                       tr->tr_num_q--;
32486 +                               }
32487 +
32488 +                               GFS_ASSERT_SBD(!found_gid, sdp,);
32489 +                               found_gid = TRUE;
32490 +                               if (found_uid)
32491 +                                       break;
32492 +                       }
32493 +               }
32494 +       }
32495 +
32496 +       while (!found_uid || !found_gid) {
32497 +               ql = gmalloc(sizeof(struct gfs_quota_le));
32498 +               memset(ql, 0, sizeof(struct gfs_quota_le));
32499 +
32500 +               INIT_LE(&ql->ql_le, &gfs_quota_lops);
32501 +
32502 +               if (found_uid) {
32503 +                       error = gfs_quota_get(sdp, FALSE, gid,
32504 +                                             NO_CREATE,
32505 +                                             &ql->ql_data);
32506 +                       found_gid = TRUE;
32507 +               } else {
32508 +                       error = gfs_quota_get(sdp, TRUE, uid,
32509 +                                             NO_CREATE,
32510 +                                             &ql->ql_data);
32511 +                       found_uid = TRUE;
32512 +               }
32513 +
32514 +               GFS_ASSERT_SBD(!error && ql->ql_data, sdp,);
32515 +
32516 +               ql->ql_change = change;
32517 +
32518 +               spin_lock(&sdp->sd_quota_lock);
32519 +               ql->ql_data->qd_change_new += change;
32520 +               spin_unlock(&sdp->sd_quota_lock);
32521 +
32522 +               LO_ADD(sdp, &ql->ql_le);
32523 +               tr->tr_num_q++;
32524 +       }
32525 +}
32526 diff -urN linux-orig/fs/gfs/trans.h linux-patched/fs/gfs/trans.h
32527 --- linux-orig/fs/gfs/trans.h   1969-12-31 18:00:00.000000000 -0600
32528 +++ linux-patched/fs/gfs/trans.h        2004-06-30 13:27:49.359707419 -0500
32529 @@ -0,0 +1,37 @@
32530 +/******************************************************************************
32531 +*******************************************************************************
32532 +**
32533 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
32534 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
32535 +**
32536 +**  This copyrighted material is made available to anyone wishing to use,
32537 +**  modify, copy, or redistribute it subject to the terms and conditions
32538 +**  of the GNU General Public License v.2.
32539 +**
32540 +*******************************************************************************
32541 +******************************************************************************/
32542 +
32543 +#ifndef __TRANS_DOT_H__
32544 +#define __TRANS_DOT_H__
32545 +
32546 +#define TRANS_IS_NEW            (53)
32547 +#define TRANS_IS_INCORE         (54)
32548 +void gfs_trans_print(struct gfs_sbd *sdp, struct gfs_trans *tr,
32549 +                    unsigned int where);
32550 +
32551 +int gfs_trans_begin_i(struct gfs_sbd *sdp,
32552 +                     unsigned int meta_blocks, unsigned int extra_blocks,
32553 +                     char *file, unsigned int line);
32554 +#define gfs_trans_begin(sdp, mb, eb) \
32555 +gfs_trans_begin_i((sdp), (mb), (eb), __FILE__, __LINE__)
32556 +
32557 +void gfs_trans_end(struct gfs_sbd *sdp);
32558 +
32559 +void gfs_trans_add_gl(struct gfs_glock *gl);
32560 +void gfs_trans_add_bh(struct gfs_glock *gl, struct buffer_head *bh);
32561 +struct gfs_unlinked *gfs_trans_add_unlinked(struct gfs_sbd *sdp, unsigned int type,
32562 +                                           struct gfs_inum *inum);
32563 +void gfs_trans_add_quota(struct gfs_sbd *sdp, int64_t change, uint32_t uid,
32564 +                        uint32_t gid);
32565 +
32566 +#endif /* __TRANS_DOT_H__ */
32567 diff -urN linux-orig/fs/gfs/unlinked.c linux-patched/fs/gfs/unlinked.c
32568 --- linux-orig/fs/gfs/unlinked.c        1969-12-31 18:00:00.000000000 -0600
32569 +++ linux-patched/fs/gfs/unlinked.c     2004-06-30 13:27:49.360707187 -0500
32570 @@ -0,0 +1,427 @@
32571 +/******************************************************************************
32572 +*******************************************************************************
32573 +**
32574 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
32575 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
32576 +**
32577 +**  This copyrighted material is made available to anyone wishing to use,
32578 +**  modify, copy, or redistribute it subject to the terms and conditions
32579 +**  of the GNU General Public License v.2.
32580 +**
32581 +*******************************************************************************
32582 +******************************************************************************/
32583 +
32584 +#include <linux/sched.h>
32585 +#include <linux/slab.h>
32586 +#include <linux/smp_lock.h>
32587 +#include <linux/spinlock.h>
32588 +#include <asm/semaphore.h>
32589 +#include <linux/completion.h>
32590 +#include <linux/buffer_head.h>
32591 +
32592 +#include "gfs.h"
32593 +#include "inode.h"
32594 +#include "log.h"
32595 +#include "lops.h"
32596 +#include "unlinked.h"
32597 +
32598 +/**
32599 + * gfs_unlinked_get - Get a structure to represent an unlinked inode
32600 + * @sdp: the filesystem
32601 + * @inum: the inode that's unlinked
32602 + * @create: if TRUE, create the structure, otherwise return NULL
32603 + *
32604 + * Returns: the structure, or NULL
32605 + */
32606 +
32607 +struct gfs_unlinked *
32608 +gfs_unlinked_get(struct gfs_sbd *sdp, struct gfs_inum *inum, int create)
32609 +{
32610 +       struct gfs_unlinked *ul = NULL, *new_ul = NULL;
32611 +       struct list_head *tmp, *head;
32612 +
32613 +       for (;;) {
32614 +               spin_lock(&sdp->sd_unlinked_lock);
32615 +
32616 +               for (head = &sdp->sd_unlinked_list, tmp = head->next;
32617 +                    tmp != head;
32618 +                    tmp = tmp->next) {
32619 +                       ul = list_entry(tmp, struct gfs_unlinked, ul_list);
32620 +                       if (gfs_inum_equal(&ul->ul_inum, inum)) {
32621 +                               ul->ul_count++;
32622 +                               break;
32623 +                       }
32624 +               }
32625 +
32626 +               if (tmp == head)
32627 +                       ul = NULL;
32628 +
32629 +               if (!ul && new_ul) {
32630 +                       ul = new_ul;
32631 +                       list_add(&ul->ul_list, &sdp->sd_unlinked_list);
32632 +                       new_ul = NULL;
32633 +               }
32634 +
32635 +               spin_unlock(&sdp->sd_unlinked_lock);
32636 +
32637 +               if (ul || !create) {
32638 +                       if (new_ul)
32639 +                               kfree(new_ul);
32640 +                       return ul;
32641 +               }
32642 +
32643 +               new_ul = gmalloc(sizeof(struct gfs_unlinked));
32644 +               memset(new_ul, 0, sizeof(struct gfs_unlinked));
32645 +
32646 +               new_ul->ul_count = 1;
32647 +               new_ul->ul_inum = *inum;
32648 +
32649 +               INIT_LE(&new_ul->ul_new_le, &gfs_unlinked_lops);
32650 +               INIT_LE(&new_ul->ul_incore_le, &gfs_unlinked_lops);
32651 +               INIT_LE(&new_ul->ul_ondisk_le, &gfs_unlinked_lops);
32652 +       }
32653 +}
32654 +
32655 +/**
32656 + * gfs_unlinked_hold - increment the usage count on a struct gfs_unlinked
32657 + * @sdp: the filesystem
32658 + * @ul: the structure
32659 + *
32660 + */
32661 +
32662 +void
32663 +gfs_unlinked_hold(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32664 +{
32665 +       spin_lock(&sdp->sd_unlinked_lock);
32666 +       ul->ul_count++;
32667 +       spin_unlock(&sdp->sd_unlinked_lock);
32668 +}
32669 +
32670 +/**
32671 + * gfs_unlinked_put - decrement the usage count on a struct gfs_unlinked
32672 + * @sdp: the filesystem
32673 + * @ul: the structure
32674 + *
32675 + * Free the structure if its reference count hits zero.
32676 + *
32677 + */
32678 +
32679 +void
32680 +gfs_unlinked_put(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32681 +{
32682 +       spin_lock(&sdp->sd_unlinked_lock);
32683 +
32684 +       GFS_ASSERT_SBD(ul->ul_count, sdp,);
32685 +       ul->ul_count--;
32686 +
32687 +       if (!ul->ul_count) {
32688 +               GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32689 +                              !test_bit(ULF_OD_LIST, &ul->ul_flags) &&
32690 +                              !test_bit(ULF_LOCK, &ul->ul_flags),
32691 +                              sdp,);
32692 +               list_del(&ul->ul_list);
32693 +               spin_unlock(&sdp->sd_unlinked_lock);
32694 +               kfree(ul);
32695 +       } else
32696 +               spin_unlock(&sdp->sd_unlinked_lock);
32697 +}
32698 +
32699 +/**
32700 + * unlinked_find - Find a inode to try to deallocate
32701 + * @sdp: the filesystem
32702 + *
32703 + * The returned structure is locked and needs to be unlocked
32704 + * with gfs_unlinked_unlock().
32705 + *
32706 + * Returns: A unlinked structure, or NULL
32707 + */
32708 +
32709 +struct gfs_unlinked *
32710 +unlinked_find(struct gfs_sbd *sdp)
32711 +{
32712 +       struct list_head *tmp, *head;
32713 +       struct gfs_unlinked *ul = NULL;
32714 +
32715 +       if (test_bit(SDF_ROFS, &sdp->sd_flags))
32716 +               return NULL;
32717 +
32718 +       gfs_log_lock(sdp);
32719 +       spin_lock(&sdp->sd_unlinked_lock);
32720 +
32721 +       if (!atomic_read(&sdp->sd_unlinked_ic_count))
32722 +               goto out;
32723 +
32724 +       for (head = &sdp->sd_unlinked_list, tmp = head->next;
32725 +            tmp != head;
32726 +            tmp = tmp->next) {
32727 +               ul = list_entry(tmp, struct gfs_unlinked, ul_list);
32728 +
32729 +               if (test_bit(ULF_LOCK, &ul->ul_flags))
32730 +                       continue;
32731 +               if (!test_bit(ULF_IC_LIST, &ul->ul_flags))
32732 +                       continue;
32733 +
32734 +               list_move_tail(&ul->ul_list, &sdp->sd_unlinked_list);
32735 +
32736 +               set_bit(ULF_LOCK, &ul->ul_flags);
32737 +               ul->ul_count++;
32738 +
32739 +               goto out;
32740 +       }
32741 +
32742 +       ul = NULL;
32743 +
32744 + out:
32745 +       spin_unlock(&sdp->sd_unlinked_lock);
32746 +       gfs_log_unlock(sdp);
32747 +
32748 +       return ul;
32749 +}
32750 +
32751 +/**
32752 + * gfs_unlinked_lock - lock a unlinked structure
32753 + * @sdp: the filesystem
32754 + * @ul: the unlinked inode structure
32755 + *
32756 + */
32757 +
32758 +void
32759 +gfs_unlinked_lock(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32760 +{
32761 +       spin_lock(&sdp->sd_unlinked_lock);
32762 +
32763 +       GFS_ASSERT_SBD(!test_bit(ULF_LOCK, &ul->ul_flags), sdp,);
32764 +       set_bit(ULF_LOCK, &ul->ul_flags);
32765 +
32766 +       ul->ul_count++;
32767 +
32768 +       spin_unlock(&sdp->sd_unlinked_lock);
32769 +}
32770 +
32771 +/**
32772 + * gfs_unlinked_unlock - drop and a reference on a unlinked structure
32773 + * @sdp: the filesystem
32774 + * @ul: the unlinked inode structure
32775 + *
32776 + */
32777 +
32778 +void
32779 +gfs_unlinked_unlock(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32780 +{
32781 +       spin_lock(&sdp->sd_unlinked_lock);
32782 +
32783 +       GFS_ASSERT_SBD(test_bit(ULF_LOCK, &ul->ul_flags), sdp,);
32784 +       clear_bit(ULF_LOCK, &ul->ul_flags);
32785 +
32786 +       GFS_ASSERT_SBD(ul->ul_count, sdp,);
32787 +       ul->ul_count--;
32788 +
32789 +       if (!ul->ul_count) {
32790 +               GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32791 +                              !test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
32792 +               list_del(&ul->ul_list);
32793 +               spin_unlock(&sdp->sd_unlinked_lock);
32794 +               kfree(ul);
32795 +       } else
32796 +               spin_unlock(&sdp->sd_unlinked_lock);
32797 +}
32798 +
32799 +/**
32800 + * gfs_unlinked_merge - add/remove a unlinked inode from the in-memory list
32801 + * @sdp: the filesystem
32802 + * @type: is this a unlink tag or a dealloc tag
32803 + * @inum: the inode number
32804 + *
32805 + */
32806 +
32807 +void
32808 +gfs_unlinked_merge(struct gfs_sbd *sdp, unsigned int type,
32809 +                  struct gfs_inum *inum)
32810 +{
32811 +       struct gfs_unlinked *ul;
32812 +
32813 +       GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count) ==
32814 +                      atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32815 +
32816 +       ul = gfs_unlinked_get(sdp, inum, CREATE);
32817 +
32818 +       gfs_log_lock(sdp);
32819 +
32820 +       switch (type) {
32821 +       case GFS_LOG_DESC_IUL:
32822 +               gfs_unlinked_hold(sdp, ul);
32823 +               gfs_unlinked_hold(sdp, ul);
32824 +               GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32825 +                              !test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
32826 +               set_bit(ULF_IC_LIST, &ul->ul_flags);
32827 +               set_bit(ULF_OD_LIST, &ul->ul_flags);
32828 +               atomic_inc(&sdp->sd_unlinked_ic_count);
32829 +               atomic_inc(&sdp->sd_unlinked_od_count);
32830 +
32831 +               break;
32832 +
32833 +       case GFS_LOG_DESC_IDA:
32834 +               GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32835 +                              test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
32836 +               clear_bit(ULF_IC_LIST, &ul->ul_flags);
32837 +               clear_bit(ULF_OD_LIST, &ul->ul_flags);
32838 +               gfs_unlinked_put(sdp, ul);
32839 +               gfs_unlinked_put(sdp, ul);
32840 +               GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count), sdp,);
32841 +               atomic_dec(&sdp->sd_unlinked_ic_count);
32842 +               GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32843 +               atomic_dec(&sdp->sd_unlinked_od_count);
32844 +
32845 +               break;
32846 +       }
32847 +
32848 +       gfs_log_unlock(sdp);
32849 +
32850 +       gfs_unlinked_put(sdp, ul);
32851 +}
32852 +
32853 +/**
32854 + * gfs_unlinked_cleanup - get rid of any extra struct gfs_unlinked structures
32855 + * @sdp: the filesystem
32856 + *
32857 + */
32858 +
32859 +void
32860 +gfs_unlinked_cleanup(struct gfs_sbd *sdp)
32861 +{
32862 +       struct gfs_unlinked *ul;
32863 +
32864 +      restart:
32865 +       gfs_log_lock(sdp);
32866 +
32867 +       GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count) ==
32868 +                      atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32869 +
32870 +       spin_lock(&sdp->sd_unlinked_lock);
32871 +
32872 +       while (!list_empty(&sdp->sd_unlinked_list)) {
32873 +               ul = list_entry(sdp->sd_unlinked_list.next,
32874 +                               struct gfs_unlinked, ul_list);
32875 +
32876 +               if (ul->ul_count > 2) {
32877 +                       spin_unlock(&sdp->sd_unlinked_lock);
32878 +                       gfs_log_unlock(sdp);
32879 +                       current->state = TASK_UNINTERRUPTIBLE;
32880 +                       schedule_timeout(HZ);
32881 +                       goto restart;
32882 +               }
32883 +               GFS_ASSERT_SBD(ul->ul_count == 2, sdp,);
32884 +
32885 +               GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32886 +                              test_bit(ULF_OD_LIST, &ul->ul_flags) &&
32887 +                              !test_bit(ULF_LOCK, &ul->ul_flags), sdp,);
32888 +
32889 +               list_del(&ul->ul_list);
32890 +
32891 +               atomic_dec(&sdp->sd_unlinked_ic_count);
32892 +               atomic_dec(&sdp->sd_unlinked_od_count);
32893 +
32894 +               spin_unlock(&sdp->sd_unlinked_lock);
32895 +               kfree(ul);
32896 +               spin_lock(&sdp->sd_unlinked_lock);
32897 +       }
32898 +
32899 +       spin_unlock(&sdp->sd_unlinked_lock);
32900 +
32901 +       GFS_ASSERT_SBD(!atomic_read(&sdp->sd_unlinked_ic_count) &&
32902 +                      !atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32903 +
32904 +       gfs_log_unlock(sdp);
32905 +}
32906 +
32907 +/**
32908 + * gfs_unlinked_limit - limit the number of inodes waiting to be deallocated
32909 + * @sdp: the filesystem
32910 + *
32911 + * Returns: 0 on success, -EXXX on failure;
32912 + */
32913 +
32914 +void
32915 +gfs_unlinked_limit(struct gfs_sbd *sdp)
32916 +{
32917 +       unsigned int tries = 0, min = 0;
32918 +       int error;
32919 +
32920 +       if (atomic_read(&sdp->sd_unlinked_ic_count) >=
32921 +           sdp->sd_tune.gt_ilimit2) {
32922 +               tries = sdp->sd_tune.gt_ilimit2_tries;
32923 +               min = sdp->sd_tune.gt_ilimit2_min;
32924 +       } else if (atomic_read(&sdp->sd_unlinked_ic_count) >=
32925 +                  sdp->sd_tune.gt_ilimit1) {
32926 +               tries = sdp->sd_tune.gt_ilimit1_tries;
32927 +               min = sdp->sd_tune.gt_ilimit1_min;
32928 +       }
32929 +
32930 +       while (tries--) {
32931 +               struct gfs_unlinked *ul = unlinked_find(sdp);
32932 +               if (!ul)
32933 +                       break;
32934 +
32935 +               error = gfs_inode_dealloc(sdp, &ul->ul_inum);
32936 +
32937 +               gfs_unlinked_unlock(sdp, ul);
32938 +
32939 +               if (!error) {
32940 +                       if (!--min)
32941 +                               break;
32942 +               } else if (error != 1)
32943 +                       break;
32944 +       }
32945 +}
32946 +
32947 +/**
32948 + * gfs_unlinked_dealloc - Go through the list of inodes to be deallocated
32949 + * @sdp: the filesystem
32950 + *
32951 + * Returns: 0 on success, -EXXX on failure
32952 + */
32953 +
32954 +void
32955 +gfs_unlinked_dealloc(struct gfs_sbd *sdp)
32956 +{
32957 +       unsigned int hits, strikes;
32958 +       int error;
32959 +
32960 +       for (;;) {
32961 +               hits = 0;
32962 +               strikes = 0;
32963 +
32964 +               for (;;) {
32965 +                       struct gfs_unlinked *ul = unlinked_find(sdp);
32966 +                       if (!ul)
32967 +                               return;
32968 +
32969 +                       error = gfs_inode_dealloc(sdp, &ul->ul_inum);
32970 +
32971 +                       gfs_unlinked_unlock(sdp, ul);
32972 +
32973 +                       if (!error) {
32974 +                               hits++;
32975 +                               if (strikes)
32976 +                                       strikes--;
32977 +                       } else if (error == 1) {
32978 +                               strikes++;
32979 +                               if (strikes >= atomic_read(&sdp->sd_unlinked_ic_count)) {
32980 +                                       error = 0;
32981 +                                       break;
32982 +                               }
32983 +                       } else
32984 +                               goto out;
32985 +               }
32986 +
32987 +               if (!hits || !test_bit(SDF_INODED_RUN, &sdp->sd_flags))
32988 +                       break;
32989 +
32990 +               cond_resched();
32991 +       }
32992 +
32993 + out:
32994 +       if (error && error != -EROFS)
32995 +               printk("GFS: fsid=%s: error deallocating inodes: %d\n",
32996 +                      sdp->sd_fsname, error);
32997 +}
32998 diff -urN linux-orig/fs/gfs/unlinked.h linux-patched/fs/gfs/unlinked.h
32999 --- linux-orig/fs/gfs/unlinked.h        1969-12-31 18:00:00.000000000 -0600
33000 +++ linux-patched/fs/gfs/unlinked.h     2004-06-30 13:27:49.360707187 -0500
33001 @@ -0,0 +1,32 @@
33002 +/******************************************************************************
33003 +*******************************************************************************
33004 +**
33005 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
33006 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
33007 +**
33008 +**  This copyrighted material is made available to anyone wishing to use,
33009 +**  modify, copy, or redistribute it subject to the terms and conditions
33010 +**  of the GNU General Public License v.2.
33011 +**
33012 +*******************************************************************************
33013 +******************************************************************************/
33014 +
33015 +#ifndef __UNLINKED_DOT_H__
33016 +#define __UNLINKED_DOT_H__
33017 +
33018 +struct gfs_unlinked *gfs_unlinked_get(struct gfs_sbd *sdp,
33019 +                                     struct gfs_inum *inum, int create);
33020 +void gfs_unlinked_hold(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33021 +void gfs_unlinked_put(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33022 +
33023 +void gfs_unlinked_lock(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33024 +void gfs_unlinked_unlock(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33025 +
33026 +void gfs_unlinked_merge(struct gfs_sbd *sdp, unsigned int type,
33027 +                       struct gfs_inum *inum);
33028 +void gfs_unlinked_cleanup(struct gfs_sbd *sdp);
33029 +
33030 +void gfs_unlinked_limit(struct gfs_sbd *sdp);
33031 +void gfs_unlinked_dealloc(struct gfs_sbd *sdp);
33032 +
33033 +#endif /* __UNLINKED_DOT_H__ */
33034 diff -urN linux-orig/fs/gfs/util.c linux-patched/fs/gfs/util.c
33035 --- linux-orig/fs/gfs/util.c    1969-12-31 18:00:00.000000000 -0600
33036 +++ linux-patched/fs/gfs/util.c 2004-06-30 13:27:49.360707187 -0500
33037 @@ -0,0 +1,324 @@
33038 +/******************************************************************************
33039 +*******************************************************************************
33040 +**
33041 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
33042 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
33043 +**
33044 +**  This copyrighted material is made available to anyone wishing to use,
33045 +**  modify, copy, or redistribute it subject to the terms and conditions
33046 +**  of the GNU General Public License v.2.
33047 +**
33048 +*******************************************************************************
33049 +******************************************************************************/
33050 +
33051 +#include <linux/sched.h>
33052 +#include <linux/slab.h>
33053 +#include <linux/smp_lock.h>
33054 +#include <linux/spinlock.h>
33055 +#include <asm/semaphore.h>
33056 +#include <linux/completion.h>
33057 +#include <linux/buffer_head.h>
33058 +
33059 +#include "gfs.h"
33060 +#include "glock.h"
33061 +
33062 +uint32_t gfs_random_number;
33063 +
33064 +volatile int gfs_in_panic = FALSE;
33065 +
33066 +kmem_cache_t *gfs_glock_cachep = NULL;
33067 +kmem_cache_t *gfs_inode_cachep = NULL;
33068 +kmem_cache_t *gfs_bufdata_cachep = NULL;
33069 +kmem_cache_t *gfs_mhc_cachep = NULL;
33070 +
33071 +/**
33072 + * gfs_random - Generate a random 32-bit number
33073 + *
33074 + * Generate a semi-crappy 32-bit pseudo-random number without using
33075 + * floating point.
33076 + *
33077 + * The PRNG is from "Numerical Recipes in C" (second edition), page 284.
33078 + *
33079 + * Returns: a 32-bit random number
33080 + */
33081 +
33082 +uint32_t
33083 +gfs_random(void)
33084 +{
33085 +       gfs_random_number = 0x0019660D * gfs_random_number + 0x3C6EF35F;
33086 +       return gfs_random_number;
33087 +}
33088 +
33089 +/**
33090 + * hash_more_internal - hash an array of data
33091 + * @data: the data to be hashed
33092 + * @len: the length of data to be hashed
33093 + * @hash: the hash from a previous call
33094 + *
33095 + * Take some data and convert it to a 32-bit hash.
33096 + *
33097 + * This is the 32-bit FNV-1a hash from:
33098 + * http://www.isthe.com/chongo/tech/comp/fnv/
33099 + *
33100 + * Hash guts
33101 + *
33102 + * Returns: the hash
33103 + */
33104 +
33105 +static __inline__ uint32_t
33106 +hash_more_internal(const void *data, unsigned int len, uint32_t hash)
33107 +{
33108 +       unsigned char *p = (unsigned char *)data;
33109 +       unsigned char *e = p + len;
33110 +       uint32_t h = hash;
33111 +
33112 +       while (p < e) {
33113 +               h ^= (uint32_t)(*p++);
33114 +               h *= 0x01000193;
33115 +       }
33116 +
33117 +       return h;
33118 +}
33119 +
33120 +/**
33121 + * gfs_hash - hash an array of data
33122 + * @data: the data to be hashed
33123 + * @len: the length of data to be hashed
33124 + *
33125 + * Take some data and convert it to a 32-bit hash.
33126 + *
33127 + * This is the 32-bit FNV-1a hash from:
33128 + * http://www.isthe.com/chongo/tech/comp/fnv/
33129 + *
33130 + * Returns: the hash
33131 + */
33132 +
33133 +uint32_t
33134 +gfs_hash(const void *data, unsigned int len)
33135 +{
33136 +       uint32_t h = 0x811C9DC5;
33137 +       h = hash_more_internal(data, len, h);
33138 +       return h;
33139 +}
33140 +
33141 +/**
33142 + * gfs_hash_more - hash an array of data
33143 + * @data: the data to be hashed
33144 + * @len: the length of data to be hashed
33145 + * @hash: the hash from a previous call
33146 + *
33147 + * Take some data and convert it to a 32-bit hash.
33148 + *
33149 + * This is the 32-bit FNV-1a hash from:
33150 + * http://www.isthe.com/chongo/tech/comp/fnv/
33151 + *
33152 + * This version let's you hash together discontinuous regions.
33153 + * For example, to compute the combined hash of the memory in
33154 + * (data1, len1), (data2, len2), and (data3, len3) you:
33155 + *
33156 + *   h = gfs_hash(data1, len1);
33157 + *   h = gfs_hash_more(data2, len2, h);
33158 + *   h = gfs_hash_more(data3, len3, h);
33159 + *
33160 + * Returns: the hash
33161 + */
33162 +
33163 +uint32_t
33164 +gfs_hash_more(const void *data, unsigned int len, uint32_t hash)
33165 +{
33166 +       uint32_t h;
33167 +       h = hash_more_internal(data, len, hash);
33168 +       return h;
33169 +}
33170 +
33171 +/* Byte-wise swap two items of size SIZE. */
33172 +
33173 +#define SWAP(a, b, size) \
33174 +do { \
33175 +       register size_t __size = (size); \
33176 +        register char *__a = (a), *__b = (b); \
33177 +        do { \
33178 +               char __tmp = *__a; \
33179 +               *__a++ = *__b; \
33180 +               *__b++ = __tmp; \
33181 +       } while (__size-- > 1); \
33182 +} while (0)
33183 +
33184 +/**
33185 + * gfs_sort - Sort base array using shell sort algorithm
33186 + * @base: the input array
33187 + * @num_elem: number of elements in array
33188 + * @size: size of each element in array
33189 + * @compar: fxn to compare array elements (returns negative
33190 + *          for lt, 0 for eq, and positive for gt
33191 + *
33192 + * Sorts the array passed in using the compar fxn to compare elements using
33193 + * the shell sort algorithm
33194 + */
33195 +
33196 +void
33197 +gfs_sort(void *base, unsigned int num_elem, unsigned int size,
33198 +        int (*compar) (const void *, const void *))
33199 +{
33200 +       register char *pbase = (char *)base;
33201 +       int i, j, k, h;
33202 +       int cols[16] = {1391376, 463792, 198768, 86961, 33936, 13776, 4592,
33203 +                       1968, 861, 336, 112, 48, 21, 7, 3, 1};
33204 +
33205 +       for (k = 0; k < 16; k++) {
33206 +               h = cols[k];
33207 +               for (i = h; i < num_elem; i++) {
33208 +                       j = i;
33209 +                       while (j >= h &&
33210 +                              (*compar)((void *)(pbase + size * (j - h)),
33211 +                                        (void *)(pbase + size * j)) > 0) {
33212 +                               SWAP(pbase + size * j,
33213 +                                    pbase + size * (j - h),
33214 +                                    size);
33215 +                               j = j - h;
33216 +                       }
33217 +               }
33218 +       }
33219 +}
33220 +
33221 +/**
33222 + * bitch_about -
33223 + * @sdp: the filesystem
33224 + * @last: the last time we bitched
33225 + * @about:
33226 + *
33227 + */
33228 +
33229 +void
33230 +bitch_about(struct gfs_sbd *sdp, unsigned long *last, char *about)
33231 +{
33232 +       if (time_after_eq(jiffies, *last + sdp->sd_tune.gt_complain_secs * HZ)) {
33233 +               printk("GFS: fsid=%s: %s by program \"%s\"\n",
33234 +                      sdp->sd_fsname, about, current->comm);
33235 +               *last = jiffies;
33236 +       }
33237 +}
33238 +
33239 +/**
33240 + * gfs_assert_i - Stop the machine
33241 + * @assertion: the assertion that failed
33242 + * @file: the file that called us
33243 + * @line: the line number of the file that called us
33244 + *
33245 + * Don't do ENTER() and EXIT() here.
33246 + *
33247 + */
33248 +
33249 +void
33250 +gfs_assert_i(char *assertion,
33251 +            unsigned int type, void *ptr,
33252 +            char *file, unsigned int line)
33253 +{
33254 +       gfs_in_panic = TRUE;
33255 +
33256 +       printk("\nGFS: Assertion failed on line %d of file %s\n"
33257 +              "GFS: assertion: \"%s\"\n"
33258 +              "GFS: time = %lu\n",
33259 +              line, file, assertion, get_seconds());
33260 +
33261 +       switch (type) {
33262 +       case GFS_ASSERT_TYPE_SBD:
33263 +       {
33264 +               struct gfs_sbd *sdp = (struct gfs_sbd *)ptr;
33265 +               printk("GFS: fsid=%s\n", sdp->sd_fsname);
33266 +       }
33267 +       break;
33268 +
33269 +       case GFS_ASSERT_TYPE_GLOCK:
33270 +       {
33271 +               struct gfs_glock *gl = (struct gfs_glock *)ptr;
33272 +               struct gfs_sbd *sdp = gl->gl_sbd;
33273 +               printk("GFS: fsid=%s: glock = (%u, %"PRIu64")\n",
33274 +                      sdp->sd_fsname,
33275 +                      gl->gl_name.ln_type,
33276 +                      gl->gl_name.ln_number);
33277 +       }
33278 +       break;
33279 +
33280 +       case GFS_ASSERT_TYPE_INODE:
33281 +       {
33282 +               struct gfs_inode *ip = (struct gfs_inode *)ptr;
33283 +               struct gfs_sbd *sdp = ip->i_sbd;
33284 +               printk("GFS: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n",
33285 +                      sdp->sd_fsname,
33286 +                      ip->i_num.no_formal_ino, ip->i_num.no_addr);
33287 +       }
33288 +       break;
33289 +
33290 +       case GFS_ASSERT_TYPE_RGRPD:
33291 +       {
33292 +               struct gfs_rgrpd *rgd = (struct gfs_rgrpd *)ptr;
33293 +               struct gfs_sbd *sdp = rgd->rd_sbd;
33294 +               printk("GFS: fsid=%s: rgroup = %"PRIu64"\n",
33295 +                      sdp->sd_fsname, rgd->rd_ri.ri_addr);
33296 +       }
33297 +       break;
33298 +       }
33299 +
33300 +       printk("\n");
33301 +#if 0
33302 +       printk("GFS: Record message above and reboot.\n");
33303 +       BUG();
33304 +#endif
33305 +       panic("GFS: Record message above and reboot.\n");
33306 +}
33307 +
33308 +/**
33309 + * gfs_io_errori - handle an I/O error
33310 + * @sdp: the filesystem
33311 + * @bh: the buffer the error happened on (can be NULL)
33312 + *
33313 + * This will do something other than panic, eventually.
33314 + *
33315 + */
33316 +
33317 +void gfs_io_error_i(struct gfs_sbd *sdp,
33318 +                   unsigned int type, void *ptr,
33319 +                   char *file, unsigned int line)
33320 +{
33321 +       switch (type) {
33322 +       case GFS_IO_ERROR_TYPE_BH:
33323 +       {
33324 +               struct buffer_head *bh = (struct buffer_head *)ptr;
33325 +               printk("GFS: fsid=%s: I/O error on block %"PRIu64"\n",
33326 +                      sdp->sd_fsname, (uint64_t)bh->b_blocknr);
33327 +       }
33328 +       break;
33329 +
33330 +       case GFS_IO_ERROR_TYPE_INODE:
33331 +       {
33332 +               struct gfs_inode *ip = (struct gfs_inode *)ptr;
33333 +               printk("GFS: fsid=%s: I/O error in inode %"PRIu64"/%"PRIu64"\n",
33334 +                      sdp->sd_fsname,
33335 +                      ip->i_num.no_formal_ino, ip->i_num.no_addr);
33336 +       }
33337 +       break;
33338 +
33339 +       default:
33340 +       printk("GFS: fsid=%s: I/O error\n", sdp->sd_fsname);
33341 +       break;
33342 +       }
33343 +
33344 +       GFS_ASSERT_SBD(FALSE, sdp,);
33345 +}
33346 +
33347 +/**
33348 + * gmalloc - malloc a small amount of memory
33349 + * @size: the number of bytes to malloc
33350 + *
33351 + * Returns: the memory
33352 + */
33353 +
33354 +void *
33355 +gmalloc(unsigned int size)
33356 +{
33357 +       void *p;
33358 +       RETRY_MALLOC(p = kmalloc(size, GFP_KERNEL), p);
33359 +       return p;
33360 +}
33361 +
33362 diff -urN linux-orig/fs/gfs/util.h linux-patched/fs/gfs/util.h
33363 --- linux-orig/fs/gfs/util.h    1969-12-31 18:00:00.000000000 -0600
33364 +++ linux-patched/fs/gfs/util.h 2004-06-30 13:27:49.360707187 -0500
33365 @@ -0,0 +1,156 @@
33366 +/******************************************************************************
33367 +*******************************************************************************
33368 +**
33369 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
33370 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
33371 +**
33372 +**  This copyrighted material is made available to anyone wishing to use,
33373 +**  modify, copy, or redistribute it subject to the terms and conditions
33374 +**  of the GNU General Public License v.2.
33375 +**
33376 +*******************************************************************************
33377 +******************************************************************************/
33378 +
33379 +#ifndef __UTIL_DOT_H__
33380 +#define __UTIL_DOT_H__
33381 +
33382 +
33383 +/* Utility functions */
33384 +
33385 +extern uint32_t gfs_random_number;
33386 +uint32_t gfs_random(void);
33387 +
33388 +uint32_t gfs_hash(const void *data, unsigned int len);
33389 +uint32_t gfs_hash_more(const void *data, unsigned int len, uint32_t hash);
33390 +
33391 +void gfs_sort(void *base, unsigned int num_elem, unsigned int size,
33392 +             int (*compar) (const void *, const void *));
33393 +
33394 +void bitch_about(struct gfs_sbd *sdp, unsigned long *last, char *about);
33395 +
33396 +
33397 +
33398 +/* Assertion stuff */
33399 +
33400 +#define GFS_ASSERT_TYPE_NONE      (18)
33401 +#define GFS_ASSERT_TYPE_SBD       (19)
33402 +#define GFS_ASSERT_TYPE_GLOCK     (20)
33403 +#define GFS_ASSERT_TYPE_INODE     (21)
33404 +#define GFS_ASSERT_TYPE_RGRPD     (22)
33405 +
33406 +#define GFS_ASSERT(x, todo) \
33407 +do \
33408 +{ \
33409 +  if (!(x)) \
33410 +  { \
33411 +    {todo} \
33412 +    gfs_assert_i(#x, GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__); \
33413 + } \
33414 +} \
33415 +while (0)
33416 +
33417 +#define GFS_ASSERT_SBD(x, sdp, todo) \
33418 +do \
33419 +{ \
33420 +  if (!(x)) \
33421 +  { \
33422 +    struct gfs_sbd *gfs_assert_sbd = (sdp); \
33423 +    {todo} \
33424 +    gfs_assert_i(#x, GFS_ASSERT_TYPE_SBD, gfs_assert_sbd, __FILE__, __LINE__); \
33425 +  } \
33426 +} \
33427 +while (0)
33428 +
33429 +#define GFS_ASSERT_GLOCK(x, gl, todo) \
33430 +do \
33431 +{ \
33432 +  if (!(x)) \
33433 +  { \
33434 +    struct gfs_glock *gfs_assert_glock = (gl); \
33435 +    {todo} \
33436 +    gfs_assert_i(#x, GFS_ASSERT_TYPE_GLOCK, gfs_assert_glock, __FILE__, __LINE__); \
33437 +  } \
33438 +} \
33439 +while (0)
33440 +
33441 +#define GFS_ASSERT_INODE(x, ip, todo) \
33442 +do \
33443 +{ \
33444 +  if (!(x)) \
33445 +  { \
33446 +    struct gfs_inode *gfs_assert_inode = (ip); \
33447 +    {todo} \
33448 +    gfs_assert_i(#x, GFS_ASSERT_TYPE_INODE, gfs_assert_inode, __FILE__, __LINE__); \
33449 +  } \
33450 +} \
33451 +while (0)
33452 +
33453 +#define GFS_ASSERT_RGRPD(x, rgd, todo) \
33454 +do \
33455 +{ \
33456 +  if (!(x)) \
33457 +  { \
33458 +    struct gfs_rgrpd *gfs_assert_rgrpd = (rgd); \
33459 +    {todo} \
33460 +    gfs_assert_i(#x, GFS_ASSERT_TYPE_RGRPD, gfs_assert_rgrpd, __FILE__, __LINE__); \
33461 +  } \
33462 +} \
33463 +while (0)
33464 +
33465 +extern volatile int gfs_in_panic;
33466 +void gfs_assert_i(char *assertion,
33467 +                 unsigned int type, void *ptr,
33468 +                 char *file, unsigned int line) __attribute__ ((noreturn));
33469 +
33470 +
33471 +/* I/O error stuff */
33472 +
33473 +#define GFS_IO_ERROR_TYPE_NONE    (118)
33474 +#define GFS_IO_ERROR_TYPE_BH      (119)
33475 +#define GFS_IO_ERROR_TYPE_INODE   (120)
33476 +
33477 +#define gfs_io_error(sdp) \
33478 +gfs_io_error_i((sdp), GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__);
33479 +
33480 +#define gfs_io_error_bh(sdp, bh) \
33481 +do \
33482 +{ \
33483 +  struct buffer_head *gfs_io_error_bh = (bh); \
33484 +  gfs_io_error_i((sdp), GFS_IO_ERROR_TYPE_BH, gfs_io_error_bh, __FILE__, __LINE__); \
33485 +} \
33486 +while (0)
33487 +
33488 +#define gfs_io_error_inode(ip) \
33489 +do \
33490 +{ \
33491 +  struct gfs_inode *gfs_io_error_inode = (ip); \
33492 +  gfs_io_error_i((ip)->i_sbd, GFS_IO_ERROR_TYPE_INODE, gfs_io_error_inode, __FILE__, __LINE__); \
33493 +} \
33494 +while (0)
33495 +
33496 +void gfs_io_error_i(struct gfs_sbd *sdp,
33497 +                   unsigned int type, void *ptr,
33498 +                   char *file, unsigned int line);
33499 +
33500 +
33501 +/* Memory stuff */
33502 +
33503 +#define RETRY_MALLOC(do_this, until_this) \
33504 +for (;;) \
33505 +{ \
33506 +  do { do_this; } while (0); \
33507 +  if (until_this) \
33508 +    break; \
33509 +  printk("GFS: out of memory: %s, %u\n", __FILE__, __LINE__); \
33510 +  yield();\
33511 +}
33512 +
33513 +extern kmem_cache_t *gfs_glock_cachep;
33514 +extern kmem_cache_t *gfs_inode_cachep;
33515 +extern kmem_cache_t *gfs_bufdata_cachep;
33516 +extern kmem_cache_t *gfs_mhc_cachep;
33517 +
33518 +void *gmalloc(unsigned int size);
33519 +
33520 +
33521 +#endif /* __UTIL_DOT_H__ */
33522 diff -urN linux-orig/include/linux/gfs_ioctl.h linux-patched/include/linux/gfs_ioctl.h
33523 --- linux-orig/include/linux/gfs_ioctl.h        1969-12-31 18:00:00.000000000 -0600
33524 +++ linux-patched/include/linux/gfs_ioctl.h     2004-06-30 13:27:49.340711826 -0500
33525 @@ -0,0 +1,218 @@
33526 +/******************************************************************************
33527 +*******************************************************************************
33528 +**
33529 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
33530 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
33531 +**
33532 +**  This copyrighted material is made available to anyone wishing to use,
33533 +**  modify, copy, or redistribute it subject to the terms and conditions
33534 +**  of the GNU General Public License v.2.
33535 +**
33536 +*******************************************************************************
33537 +******************************************************************************/
33538 +
33539 +#ifndef __GFS_IOCTL_DOT_H__
33540 +#define __GFS_IOCTL_DOT_H__
33541 +
33542 +#define GFS_IOCTL_VERSION (0)
33543 +
33544 +#define _GFSC_(x)               (('G' << 8) | (x))
33545 +
33546 +/*
33547 +   Ioctls implemented
33548 +
33549 +   Reserved Ioctls:  3, 7, 8, 9, 10, 4, 13
33550 +   Next Ioctl:  44
33551 +   */
33552 +
33553 +#define GFS_STACK_PRINT         _GFSC_(40)
33554 +
33555 +#define GFS_GET_META            _GFSC_(31)
33556 +#define GFS_FILE_STAT           _GFSC_(30)
33557 +
33558 +#define GFS_SHRINK              _GFSC_(5)
33559 +
33560 +#define GFS_GET_ARGS            _GFSC_(29)
33561 +#define GFS_GET_LOCKSTRUCT      _GFSC_(39)
33562 +#define GFS_GET_SUPER           _GFSC_(19)
33563 +#define GFS_JREAD               _GFSC_(23)
33564 +#define GFS_JWRITE              _GFSC_(24)
33565 +#define GFS_JSTAT               _GFSC_(20)
33566 +#define GFS_JTRUNC              _GFSC_(33)
33567 +
33568 +#define GFS_LOCK_DUMP           _GFSC_(11)
33569 +
33570 +#define GFS_STATGFS             _GFSC_(12)
33571 +
33572 +#define GFS_FREEZE              _GFSC_(14)
33573 +#define GFS_UNFREEZE            _GFSC_(15)
33574 +
33575 +#define GFS_RECLAIM_METADATA    _GFSC_(16)
33576 +
33577 +#define GFS_QUOTA_SYNC          _GFSC_(17)
33578 +#define GFS_QUOTA_REFRESH       _GFSC_(18)
33579 +#define GFS_QUOTA_READ          _GFSC_(32)
33580 +
33581 +#define GFS_GET_TUNE            _GFSC_(21)
33582 +#define GFS_SET_TUNE            _GFSC_(22)
33583 +
33584 +#define GFS_EATTR_GET           _GFSC_(26)
33585 +#define GFS_EATTR_SET           _GFSC_(27)
33586 +
33587 +#define GFS_WHERE_ARE_YOU       _GFSC_(35)
33588 +
33589 +#define GFS_SET_FLAG            _GFSC_(36)
33590 +#define GFS_CLEAR_FLAG          _GFSC_(37)
33591 +
33592 +#define GFS_GET_COUNTERS        _GFSC_(43)
33593 +
33594 +#define GFS_FILE_FLUSH          _GFSC_(42)
33595 +
33596 +struct gfs_user_buffer {
33597 +       char *ub_data;
33598 +       unsigned int ub_size;
33599 +       unsigned int ub_count;
33600 +};
33601 +
33602 +/*  Structure for jread/jwrite  */
33603 +
33604 +#define GFS_HIDDEN_JINDEX       (0x10342345)
33605 +#define GFS_HIDDEN_RINDEX       (0x10342346)
33606 +#define GFS_HIDDEN_QUOTA        (0x10342347)
33607 +#define GFS_HIDDEN_LICENSE      (0x10342348)
33608 +
33609 +struct gfs_jio {
33610 +       unsigned int jio_file;
33611 +
33612 +       uint32_t jio_size;
33613 +       uint64_t jio_offset;
33614 +       char *jio_data;
33615 +
33616 +       uint32_t jio_count;
33617 +};
33618 +
33619 +/*  Structure for better GFS-specific df  */
33620 +
33621 +struct gfs_usage {
33622 +       unsigned int gu_block_size;
33623 +       uint64_t gu_total_blocks;
33624 +       uint64_t gu_free;
33625 +       uint64_t gu_used_dinode;
33626 +       uint64_t gu_free_dinode;
33627 +       uint64_t gu_used_meta;
33628 +       uint64_t gu_free_meta;
33629 +};
33630 +
33631 +struct gfs_reclaim_stats {
33632 +       uint64_t rc_inodes;
33633 +       uint64_t rc_metadata;
33634 +};
33635 +
33636 +struct gfs_quota_name {
33637 +       int qn_user;
33638 +       uint32_t qn_id;
33639 +};
33640 +
33641 +/*
33642 + *  You can tune a filesystem, but you can't tune a yak.
33643 + */
33644 +
33645 +#define GFS_TUNE_VERSION ((GFS_IOCTL_VERSION << 16) | (138))
33646 +
33647 +struct gfs_tune {
33648 +       unsigned int gt_tune_version;
33649 +
33650 +       unsigned int gt_ilimit1;
33651 +       unsigned int gt_ilimit1_tries;
33652 +       unsigned int gt_ilimit1_min;
33653 +       unsigned int gt_ilimit2;
33654 +       unsigned int gt_ilimit2_tries;
33655 +       unsigned int gt_ilimit2_min;
33656 +       unsigned int gt_demote_secs;
33657 +       unsigned int gt_incore_log_blocks;
33658 +       unsigned int gt_jindex_refresh_secs;
33659 +       unsigned int gt_depend_secs;
33660 +       unsigned int gt_scand_secs;
33661 +       unsigned int gt_recoverd_secs;
33662 +       unsigned int gt_logd_secs;
33663 +       unsigned int gt_quotad_secs;
33664 +       unsigned int gt_inoded_secs;
33665 +       unsigned int gt_quota_simul_sync;
33666 +       unsigned int gt_quota_warn_period;
33667 +       unsigned int gt_atime_quantum;
33668 +       unsigned int gt_quota_quantum;
33669 +       unsigned int gt_quota_scale_num;
33670 +       unsigned int gt_quota_scale_den;
33671 +       unsigned int gt_quota_enforce;
33672 +       unsigned int gt_quota_account;
33673 +       unsigned int gt_new_files_jdata;
33674 +       unsigned int gt_new_files_directio;
33675 +       unsigned int gt_max_atomic_write;
33676 +       unsigned int gt_max_readahead;
33677 +       unsigned int gt_lockdump_size;
33678 +       unsigned int gt_stall_secs;
33679 +       unsigned int gt_complain_secs;
33680 +       unsigned int gt_reclaim_limit;
33681 +       unsigned int gt_entries_per_readdir;
33682 +       unsigned int gt_prefetch_secs;
33683 +       unsigned int gt_statfs_slots;
33684 +       unsigned int gt_max_mhc;
33685 +};
33686 +
33687 +/*
33688 + * Extended Attribute Ioctl structures
33689 + *
33690 + * Note: The name_len does not include a null character.
33691 + *
33692 + * Getting and setting EAs return the following errors that aren't
33693 + * what they seem
33694 + *
33695 + * ENODATA - No such extended attribute
33696 + * ERANGE - Extended attribute data is too large for the buffer
33697 + * ENOSPC - No space left for extended attributes
33698 + * EEXIST - Extended attribute already exists
33699 + */
33700 +
33701 +#define GFS_EACMD_SET       (0)
33702 +#define GFS_EACMD_CREATE    (1)
33703 +#define GFS_EACMD_REPLACE   (2)
33704 +#define GFS_EACMD_REMOVE    (3)
33705 +
33706 +struct gfs_eaget_io {
33707 +       char *eg_data;
33708 +       char *eg_name;
33709 +       char *eg_len;
33710 +       uint32_t eg_data_len;
33711 +       uint8_t eg_name_len;
33712 +       uint8_t eg_type;        /* GFS_EATYPE_... */
33713 +};
33714 +
33715 +struct gfs_easet_io {
33716 +       const char *es_data;
33717 +       char *es_name;
33718 +       uint16_t es_data_len;
33719 +       uint8_t es_name_len;    /* not counting the NULL */
33720 +       uint8_t es_cmd;         /* GFS_EACMD_...  */
33721 +       uint8_t es_type;        /* GFS_EATYPE_... */
33722 +};
33723 +
33724 +#define GFS_GLOCKD_DEFAULT (1)
33725 +#define GFS_GLOCKD_MAX (32)
33726 +
33727 +struct gfs_args {
33728 +       char ar_lockproto[256]; /* The name of the Lock Protocol */
33729 +       char ar_locktable[256]; /* The name of the Lock Table */
33730 +       char ar_hostdata[256];  /* The host specific data */
33731 +
33732 +       int ar_ignore_local_fs; /* Ignore the local_fs field in the struct lm_lockops */
33733 +       int ar_localflocks;     /* let the VFS do flock|fcntl locks for us */
33734 +       int ar_localcaching;    /* Local-style caching (dangerous on mulithost) */
33735 +
33736 +       int ar_upgrade;         /* Upgrade ondisk/multihost format */
33737 +
33738 +       unsigned int ar_num_glockd;
33739 +
33740 +       int ar_posixacls;       /* Enable posix acls */
33741 +};
33742 +
33743 +#endif /* ___GFS_IOCTL_DOT_H__ */
33744 diff -urN linux-orig/include/linux/gfs_ondisk.h linux-patched/include/linux/gfs_ondisk.h
33745 --- linux-orig/include/linux/gfs_ondisk.h       1969-12-31 18:00:00.000000000 -0600
33746 +++ linux-patched/include/linux/gfs_ondisk.h    2004-06-30 13:27:49.341711594 -0500
33747 @@ -0,0 +1,1720 @@
33748 +/******************************************************************************
33749 +*******************************************************************************
33750 +**
33751 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
33752 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
33753 +**
33754 +**  This copyrighted material is made available to anyone wishing to use,
33755 +**  modify, copy, or redistribute it subject to the terms and conditions
33756 +**  of the GNU General Public License v.2.
33757 +**
33758 +*******************************************************************************
33759 +******************************************************************************/
33760 +
33761 +/*
33762 +* NOTE:
33763 +* If you add 8 byte fields to these structures, they must be 8 byte
33764 +* aligned.  4 byte field must be 4 byte aligned, etc...
33765 +*
33766 +* All structures must be a multiple of 8 bytes long.
33767 +*
33768 +* GRIPES:
33769 +* We should have forgetten about supporting 512B FS block sizes
33770 +* and made the di_reserved field in the struct gfs_dinode structure
33771 +* much bigger.
33772 +*
33773 +* de_rec_len in struct gfs_dirent should really have been a 32-bit value
33774 +* as it now limits us to a 64k FS block size (with the current code
33775 +* in dir.c).
33776 +*/
33777 +
33778 +#ifndef __GFS_ONDISK_DOT_H__
33779 +#define __GFS_ONDISK_DOT_H__
33780 +
33781 +#define GFS_MAGIC               (0x01161970)
33782 +#define GFS_BASIC_BLOCK         (512)
33783 +#define GFS_BASIC_BLOCK_SHIFT   (9)
33784 +#define GFS_DUMPS_PER_LOG       (4)
33785 +
33786 +/*  Lock numbers of the LM_TYPE_NONDISK type  */
33787 +
33788 +#define GFS_MOUNT_LOCK          (0)
33789 +#define GFS_LIVE_LOCK           (1)
33790 +#define GFS_TRANS_LOCK          (2)
33791 +#define GFS_RENAME_LOCK         (3)
33792 +
33793 +/*  Format numbers for various metadata types  */
33794 +
33795 +#define GFS_FORMAT_SB           (100)
33796 +#define GFS_FORMAT_RG           (200)
33797 +#define GFS_FORMAT_RB           (300)
33798 +#define GFS_FORMAT_DI           (400)
33799 +#define GFS_FORMAT_IN           (500)
33800 +#define GFS_FORMAT_LF           (600)
33801 +#define GFS_FORMAT_JD           (700)
33802 +#define GFS_FORMAT_LH           (800)
33803 +#define GFS_FORMAT_LD           (900)
33804 +/*  These don't have actual struct gfs_meta_header structures to go with them  */
33805 +#define GFS_FORMAT_JI           (1000)
33806 +#define GFS_FORMAT_RI           (1100)
33807 +#define GFS_FORMAT_DE           (1200)
33808 +#define GFS_FORMAT_QU           (1500)
33809 +#define GFS_FORMAT_EA           (1600)
33810 +/*  These are part of the superblock  */
33811 +#define GFS_FORMAT_FS           (1309)
33812 +#define GFS_FORMAT_MULTI        (1401)
33813 +
33814 +/*
33815 + *  An on-disk inode number
33816 + */
33817 +
33818 +#define gfs_inum_equal(ino1, ino2) \
33819 +(((ino1)->no_formal_ino == (ino2)->no_formal_ino) && \
33820 + ((ino1)->no_addr == (ino2)->no_addr))
33821 +
33822 +struct gfs_inum {
33823 +       uint64_t no_formal_ino;
33824 +       uint64_t no_addr;
33825 +};
33826 +
33827 +/*
33828 + *  Generic metadata head structure
33829 + *
33830 + *  Every inplace buffer logged in the journal must start with this.
33831 + */
33832 +
33833 +#define GFS_METATYPE_NONE       (0)
33834 +#define GFS_METATYPE_SB         (1)
33835 +#define GFS_METATYPE_RG         (2)
33836 +#define GFS_METATYPE_RB         (3)
33837 +#define GFS_METATYPE_DI         (4)
33838 +#define GFS_METATYPE_IN         (5)
33839 +#define GFS_METATYPE_LF         (6)
33840 +#define GFS_METATYPE_JD         (7)
33841 +#define GFS_METATYPE_LH         (8)
33842 +#define GFS_METATYPE_LD         (9)
33843 +#define GFS_METATYPE_EA         (10)
33844 +
33845 +#define GFS_META_CLUMP          (64)
33846 +
33847 +struct gfs_meta_header {
33848 +       uint32_t mh_magic;      /* Magic number */
33849 +       uint32_t mh_type;       /* GFS_METATYPE_XX */
33850 +       uint64_t mh_generation; /* Generation number */
33851 +       uint32_t mh_format;     /* GFS_FORMAT_XX */
33852 +       uint32_t mh_incarn;
33853 +};
33854 +
33855 +/*
33856 + *  super-block structure
33857 + *
33858 + *  It's probably good if SIZEOF_SB <= GFS_BASIC_BLOCK
33859 + */
33860 +
33861 +/*  Address of SuperBlock in GFS basic blocks  */
33862 +#define GFS_SB_ADDR             (128)
33863 +/*  The lock number for the superblock (must be zero)  */
33864 +#define GFS_SB_LOCK             (0)
33865 +#define GFS_CRAP_LOCK           (1)
33866 +
33867 +/*  Requirement:  GFS_LOCKNAME_LEN % 8 == 0
33868 +    Includes: the fencing zero at the end  */
33869 +#define GFS_LOCKNAME_LEN        (64)
33870 +
33871 +struct gfs_sb {
33872 +       /*  Order is important  */
33873 +       struct gfs_meta_header sb_header;
33874 +
33875 +       uint32_t sb_fs_format;
33876 +       uint32_t sb_multihost_format;
33877 +       uint32_t sb_flags;
33878 +
33879 +       /*  Important information  */
33880 +       uint32_t sb_bsize;      /* fundamental fs block size in bytes */
33881 +       uint32_t sb_bsize_shift;        /* log2(sb_bsize) */
33882 +       uint32_t sb_seg_size;   /* Journal segment size in FS blocks */
33883 +
33884 +       struct gfs_inum sb_jindex_di;   /* journal index inode number (GFS_SB_LOCK) */
33885 +       struct gfs_inum sb_rindex_di;   /* resource index inode number (GFS_SB_LOCK) */
33886 +       struct gfs_inum sb_root_di;     /* root directory inode number (GFS_ROOT_LOCK) */
33887 +
33888 +       char sb_lockproto[GFS_LOCKNAME_LEN];    /* Type of locking this FS uses */
33889 +       char sb_locktable[GFS_LOCKNAME_LEN];    /* Name of lock table for this FS */
33890 +
33891 +       struct gfs_inum sb_quota_di;
33892 +       struct gfs_inum sb_license_di;
33893 +
33894 +       char sb_reserved[96];
33895 +};
33896 +
33897 +/*
33898 + *  journal index structure
33899 + */
33900 +
33901 +struct gfs_jindex {
33902 +       uint64_t ji_addr;       /* starting block of the journal */
33903 +       uint32_t ji_nsegment;   /* number of segments in journal */
33904 +       uint32_t ji_pad;
33905 +
33906 +       char ji_reserved[64];
33907 +};
33908 +
33909 +/*
33910 + *  resource index structure
33911 + */
33912 +
33913 +struct gfs_rindex {
33914 +       uint64_t ri_addr;       /* rgrp block disk address */
33915 +       uint32_t ri_length;     /* length of rgrp header in fs blocks */
33916 +       uint32_t ri_pad;
33917 +
33918 +       uint64_t ri_data1;      /* first data location */
33919 +       uint32_t ri_data;       /* num of data blocks in rgrp */
33920 +
33921 +       uint32_t ri_bitbytes;   /* number of bytes in data bitmaps */
33922 +
33923 +       char ri_reserved[64];
33924 +};
33925 +
33926 +/*
33927 + *  resource group header structure
33928 + *
33929 + */
33930 +
33931 +/* Number of blocks per byte in rgrp */
33932 +#define GFS_NBBY                (4)
33933 +#define GFS_BIT_SIZE            (2)
33934 +#define GFS_BIT_MASK            (0x00000003)
33935 +
33936 +#define GFS_BLKST_FREE          (0)
33937 +#define GFS_BLKST_USED          (1)
33938 +#define GFS_BLKST_FREEMETA      (2)
33939 +#define GFS_BLKST_USEDMETA      (3)
33940 +
33941 +struct gfs_rgrp {
33942 +       struct gfs_meta_header rg_header;
33943 +
33944 +       uint32_t rg_flags;      /* flags */
33945 +
33946 +       uint32_t rg_free;       /* number of free data blocks */
33947 +
33948 +       uint32_t rg_useddi;     /* number of dinodes */
33949 +       uint32_t rg_freedi;     /* number of unused dinodes */
33950 +       struct gfs_inum rg_freedi_list; /* list of free dinodes */
33951 +
33952 +       uint32_t rg_usedmeta;   /* number of used metadata blocks (not including dinodes) */
33953 +       uint32_t rg_freemeta;   /* number of unused metadata blocks */
33954 +
33955 +       char rg_reserved[64];
33956 +};
33957 +
33958 +/*
33959 + *  Quota Structures
33960 + */
33961 +
33962 +struct gfs_quota {
33963 +       uint64_t qu_limit;
33964 +       uint64_t qu_warn;
33965 +       int64_t qu_value;
33966 +
33967 +       char qu_reserved[64];
33968 +};
33969 +
33970 +/*
33971 + *  dinode structure
33972 + */
33973 +
33974 +#define GFS_MAX_META_HEIGHT     (10)
33975 +#define GFS_DIR_MAX_DEPTH       (17)
33976 +
33977 +/*  Dinode types  */
33978 +#define GFS_FILE_NON            (0)
33979 +#define GFS_FILE_REG            (1)
33980 +#define GFS_FILE_DIR            (2)
33981 +#define GFS_FILE_LNK            (5)
33982 +#define GFS_FILE_BLK            (7)
33983 +#define GFS_FILE_CHR            (8)
33984 +#define GFS_FILE_FIFO           (101)
33985 +#define GFS_FILE_SOCK           (102)
33986 +
33987 +/*  Dinode flags  */
33988 +#define GFS_DIF_JDATA               (0x00000001)
33989 +#define GFS_DIF_EXHASH              (0x00000002)
33990 +#define GFS_DIF_UNUSED              (0x00000004)
33991 +#define GFS_DIF_EA_INDIRECT         (0x00000008)
33992 +#define GFS_DIF_DIRECTIO            (0x00000010)
33993 +#define GFS_DIF_IMMUTABLE           (0x00000020)
33994 +#define GFS_DIF_APPENDONLY          (0x00000040)
33995 +#define GFS_DIF_NOATIME             (0x00000080)
33996 +#define GFS_DIF_SYNC                (0x00000100)
33997 +#define GFS_DIF_INHERIT_DIRECTIO    (0x40000000)
33998 +#define GFS_DIF_INHERIT_JDATA       (0x80000000)
33999 +
34000 +struct gfs_dinode {
34001 +       struct gfs_meta_header di_header;
34002 +
34003 +       struct gfs_inum di_num;
34004 +
34005 +       uint32_t di_mode;       /* mode of file */
34006 +       uint32_t di_uid;        /* owner's user id */
34007 +       uint32_t di_gid;        /* owner's group id */
34008 +       uint32_t di_nlink;      /* number of links to this file */
34009 +       uint64_t di_size;       /* number of bytes in file */
34010 +       uint64_t di_blocks;     /* number of blocks in file */
34011 +       int64_t di_atime;       /* time last accessed */
34012 +       int64_t di_mtime;       /* time last modified */
34013 +       int64_t di_ctime;       /* time last changed */
34014 +       uint32_t di_major;      /* device major number */
34015 +       uint32_t di_minor;      /* device minor number */
34016 +
34017 +       uint64_t di_rgrp;       /* dinode rgrp block number */
34018 +       uint64_t di_goal_rgrp;  /* rgrp to alloc from next */
34019 +       uint32_t di_goal_dblk;  /* data block goal */
34020 +       uint32_t di_goal_mblk;  /* metadata block goal */
34021 +       uint32_t di_flags;      /* flags */
34022 +       uint32_t di_payload_format;     /* struct gfs_rindex, struct gfs_jindex, or struct gfs_dirent */
34023 +       uint16_t di_type;       /* type of file */
34024 +       uint16_t di_height;     /* height of metadata */
34025 +       uint32_t di_incarn;     /* incarnation number */
34026 +       uint16_t di_pad;
34027 +
34028 +       /*  These only apply to directories  */
34029 +       uint16_t di_depth;      /* Number of bits in the table */
34030 +       uint32_t di_entries;    /* The number of entries in the directory */
34031 +
34032 +       /*  This only applies to unused inodes  */
34033 +       struct gfs_inum di_next_unused;
34034 +
34035 +       uint64_t di_eattr;      /* extended attribute block number */
34036 +
34037 +       char di_reserved[56];
34038 +};
34039 +
34040 +/*
34041 + *  indirect block header
34042 + */
34043 +
34044 +struct gfs_indirect {
34045 +       struct gfs_meta_header in_header;
34046 +
34047 +       char in_reserved[64];
34048 +};
34049 +
34050 +/*
34051 + *  directory structure - many of these per directory file
34052 + */
34053 +
34054 +#define GFS_FNAMESIZE               (255)
34055 +#define GFS_DIRENT_SIZE(name_len) ((sizeof(struct gfs_dirent) + (name_len) + 7) & ~7)
34056 +
34057 +struct gfs_dirent {
34058 +       struct gfs_inum de_inum;        /* Inode number */
34059 +       uint32_t de_hash;       /* hash of the filename */
34060 +       uint16_t de_rec_len;    /* the length of the dirent */
34061 +       uint16_t de_name_len;   /* the length of the name */
34062 +       uint16_t de_type;       /* type of dinode this points to */
34063 +
34064 +       char de_reserved[14];
34065 +};
34066 +
34067 +/*
34068 + *  Header of leaf directory nodes
34069 + */
34070 +
34071 +struct gfs_leaf {
34072 +       struct gfs_meta_header lf_header;
34073 +
34074 +       uint16_t lf_depth;      /* Depth of leaf */
34075 +       uint16_t lf_entries;    /* Number of dirents in leaf */
34076 +       uint32_t lf_dirent_format;      /* Format of the dirents */
34077 +       uint64_t lf_next;       /* Next leaf, if overflow */
34078 +
34079 +       char lf_reserved[64];
34080 +};
34081 +
34082 +/*
34083 + *  Log header structure
34084 + */
34085 +
34086 +#define GFS_LOG_HEAD_UNMOUNT    (0x00000001)
34087 +
34088 +struct gfs_log_header {
34089 +       struct gfs_meta_header lh_header;
34090 +
34091 +       uint32_t lh_flags;      /* Flags */
34092 +       uint32_t lh_pad;
34093 +
34094 +       uint64_t lh_first;      /* Block number of first header in this trans */
34095 +       uint64_t lh_sequence;   /* Sequence number of this transaction */
34096 +
34097 +       uint64_t lh_tail;       /* Block number of log tail */
34098 +       uint64_t lh_last_dump;  /* block number of last dump */
34099 +
34100 +       char lh_reserved[64];
34101 +};
34102 +
34103 +/*
34104 + *  Log type descriptor
34105 + */
34106 +
34107 +#define GFS_LOG_DESC_METADATA   (300)
34108 +/*  ld_data1 is the number of metadata blocks in the descriptor.
34109 +    ld_data2 is unused.
34110 +    */
34111 +
34112 +#define GFS_LOG_DESC_IUL        (400)
34113 +/*  ld_data1 is TRUE if this is a dump.
34114 +    ld_data2 is unused.
34115 +    FixMe!!!  ld_data1 should be the number of entries.
34116 +              ld_data2 should be "TRUE if this is a dump".
34117 +    */
34118 +
34119 +#define GFS_LOG_DESC_IDA        (401)
34120 +/*  ld_data1 is unused.
34121 +    ld_data2 is unused.
34122 +    FixMe!!!  ld_data1 should be the number of entries.
34123 +    */
34124 +
34125 +#define GFS_LOG_DESC_Q          (402)
34126 +/*  ld_data1 is the number of quota changes in the descriptor.
34127 +    ld_data2 is TRUE if this is a dump.
34128 +    */
34129 +
34130 +#define GFS_LOG_DESC_LAST       (500)
34131 +/*  ld_data1 is unused.
34132 +    ld_data2 is unused.
34133 +    */
34134 +
34135 +struct gfs_log_descriptor {
34136 +       struct gfs_meta_header ld_header;
34137 +
34138 +       uint32_t ld_type;       /* Type of data in this log chunk */
34139 +       uint32_t ld_length;     /* Number of buffers in this chunk */
34140 +       uint32_t ld_data1;      /* descriptor specific field */
34141 +       uint32_t ld_data2;      /* descriptor specific field */
34142 +
34143 +       char ld_reserved[64];
34144 +};
34145 +
34146 +/*
34147 + *  Metadata block tags
34148 + */
34149 +
34150 +struct gfs_block_tag {
34151 +       uint64_t bt_blkno;      /* inplace block number */
34152 +       uint32_t bt_flags;      /* flags */
34153 +       uint32_t bt_pad;
34154 +};
34155 +
34156 +/*
34157 + *  Quota Journal Tag
34158 + */
34159 +
34160 +#define GFS_QTF_USER            (0x00000001)
34161 +
34162 +struct gfs_quota_tag {
34163 +       int64_t qt_change;
34164 +       uint32_t qt_flags;
34165 +       uint32_t qt_id;
34166 +};
34167 +
34168 +/*
34169 + *  Extended attribute header format
34170 + */
34171 +
34172 +#define GFS_EA_MAX_NAME_LEN     (255)
34173 +#define GFS_EA_MAX_DATA_LEN     (65535)
34174 +
34175 +#define GFS_EATYPE_LAST                (2)
34176 +
34177 +#define GFS_EATYPE_UNUSED       (0)
34178 +#define GFS_EATYPE_USR          (1)
34179 +#define GFS_EATYPE_SYS          (2)
34180 +#define GFS_EATYPE_VALID(x)     ((x) && (x) <= GFS_EATYPE_LAST)        /* this is only
34181 +                                                                  for requests */
34182 +
34183 +#define GFS_EAFLAG_LAST         (0x01) /* last ea in block */
34184 +
34185 +struct gfs_ea_header {
34186 +       uint32_t ea_rec_len;
34187 +       uint32_t ea_data_len;
34188 +       uint8_t ea_name_len;    /* no NULL pointer after the string */
34189 +       uint8_t ea_type;        /* GFS_EATYPE_... */
34190 +       uint8_t ea_flags;
34191 +       uint8_t ea_num_ptrs;
34192 +       uint32_t ea_pad;
34193 +};
34194 +
34195 +/*  Endian functions  */
34196 +
34197 +#define GFS_ENDIAN_BIG
34198 +
34199 +#ifdef GFS_ENDIAN_BIG
34200 +
34201 +#define gfs16_to_cpu be16_to_cpu
34202 +#define gfs32_to_cpu be32_to_cpu
34203 +#define gfs64_to_cpu be64_to_cpu
34204 +
34205 +#define cpu_to_gfs16 cpu_to_be16
34206 +#define cpu_to_gfs32 cpu_to_be32
34207 +#define cpu_to_gfs64 cpu_to_be64
34208 +
34209 +#else                          /*  GFS_ENDIAN_BIG  */
34210 +
34211 +#define gfs16_to_cpu le16_to_cpu
34212 +#define gfs32_to_cpu le32_to_cpu
34213 +#define gfs64_to_cpu le64_to_cpu
34214 +
34215 +#define cpu_to_gfs16 cpu_to_le16
34216 +#define cpu_to_gfs32 cpu_to_le32
34217 +#define cpu_to_gfs64 cpu_to_le64
34218 +
34219 +#endif                         /*  GFS_ENDIAN_BIG  */
34220 +
34221 +/*  Translation functions  */
34222 +
34223 +void gfs_inum_in(struct gfs_inum *no, char *buf);
34224 +void gfs_inum_out(struct gfs_inum *no, char *buf);
34225 +void gfs_meta_header_in(struct gfs_meta_header *mh, char *buf);
34226 +void gfs_meta_header_out(struct gfs_meta_header *mh, char *buf);
34227 +void gfs_sb_in(struct gfs_sb *sb, char *buf);
34228 +void gfs_sb_out(struct gfs_sb *sb, char *buf);
34229 +void gfs_jindex_in(struct gfs_jindex *jindex, char *buf);
34230 +void gfs_jindex_out(struct gfs_jindex *jindex, char *buf);
34231 +void gfs_rindex_in(struct gfs_rindex *rindex, char *buf);
34232 +void gfs_rindex_out(struct gfs_rindex *rindex, char *buf);
34233 +void gfs_rgrp_in(struct gfs_rgrp *rgrp, char *buf);
34234 +void gfs_rgrp_out(struct gfs_rgrp *rgrp, char *buf);
34235 +void gfs_quota_in(struct gfs_quota *quota, char *buf);
34236 +void gfs_quota_out(struct gfs_quota *quota, char *buf);
34237 +void gfs_dinode_in(struct gfs_dinode *dinode, char *buf);
34238 +void gfs_dinode_out(struct gfs_dinode *dinode, char *buf);
34239 +void gfs_indirect_in(struct gfs_indirect *indirect, char *buf);
34240 +void gfs_indirect_out(struct gfs_indirect *indirect, char *buf);
34241 +void gfs_dirent_in(struct gfs_dirent *dirent, char *buf);
34242 +void gfs_dirent_out(struct gfs_dirent *dirent, char *buf);
34243 +void gfs_leaf_in(struct gfs_leaf *leaf, char *buf);
34244 +void gfs_leaf_out(struct gfs_leaf *leaf, char *buf);
34245 +void gfs_log_header_in(struct gfs_log_header *head, char *buf);
34246 +void gfs_log_header_out(struct gfs_log_header *head, char *buf);
34247 +void gfs_desc_in(struct gfs_log_descriptor *desc, char *buf);
34248 +void gfs_desc_out(struct gfs_log_descriptor *desc, char *buf);
34249 +void gfs_block_tag_in(struct gfs_block_tag *btag, char *buf);
34250 +void gfs_block_tag_out(struct gfs_block_tag *btag, char *buf);
34251 +void gfs_quota_tag_in(struct gfs_quota_tag *qtag, char *buf);
34252 +void gfs_quota_tag_out(struct gfs_quota_tag *qtag, char *buf);
34253 +void gfs_ea_header_in(struct gfs_ea_header *qtag, char *buf);
34254 +void gfs_ea_header_out(struct gfs_ea_header *qtag, char *buf);
34255 +
34256 +/*  Printing functions  */
34257 +
34258 +void gfs_inum_print(struct gfs_inum *no);
34259 +void gfs_meta_header_print(struct gfs_meta_header *mh);
34260 +void gfs_sb_print(struct gfs_sb *sb);
34261 +void gfs_jindex_print(struct gfs_jindex *jindex);
34262 +void gfs_rindex_print(struct gfs_rindex *rindex);
34263 +void gfs_rgrp_print(struct gfs_rgrp *rgrp);
34264 +void gfs_quota_print(struct gfs_quota *quota);
34265 +void gfs_dinode_print(struct gfs_dinode *dinode);
34266 +void gfs_indirect_print(struct gfs_indirect *indirect);
34267 +void gfs_dirent_print(struct gfs_dirent *dirent, char *name);
34268 +void gfs_leaf_print(struct gfs_leaf *leaf);
34269 +void gfs_log_header_print(struct gfs_log_header *head);
34270 +void gfs_desc_print(struct gfs_log_descriptor *desc);
34271 +void gfs_block_tag_print(struct gfs_block_tag *tag);
34272 +void gfs_quota_tag_print(struct gfs_quota_tag *tag);
34273 +void gfs_ea_header_print(struct gfs_ea_header *tag);
34274 +
34275 +/*  The hash function for ExHash directories  */
34276 +
34277 +uint32_t gfs_dir_hash(const char *data, int len);
34278 +
34279 +#endif /* __GFS_ONDISK_DOT_H__ */
34280 +
34281 +
34282 +
34283 +#ifdef WANT_GFS_CONVERSION_FUNCTIONS
34284 +
34285 +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));}
34286 +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));}
34287 +#define CPIN_16(s1, s2, member) {(s1->member) = gfs16_to_cpu((s2->member));}
34288 +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs16((s1->member));}
34289 +#define CPIN_32(s1, s2, member) {(s1->member) = gfs32_to_cpu((s2->member));}
34290 +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs32((s1->member));}
34291 +#define CPIN_64(s1, s2, member) {(s1->member) = gfs64_to_cpu((s2->member));}
34292 +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs64((s1->member));}
34293 +
34294 +#define pa(struct, member, count) print_array(#member, struct->member, count);
34295 +
34296 +/**
34297 + * print_array - Print out an array of bytes
34298 + * @title: what to print before the array
34299 + * @buf: the array
34300 + * @count: the number of bytes
34301 + *
34302 + */
34303 +
34304 +static void
34305 +print_array(char *title, char *buf, int count)
34306 +{
34307 +       int x;
34308 +
34309 +       printk("  %s =\n", title);
34310 +       for (x = 0; x < count; x++) {
34311 +               printk("%.2X ", (unsigned char)buf[x]);
34312 +               if (x % 16 == 15)
34313 +                       printk("\n");
34314 +       }
34315 +       if (x % 16)
34316 +               printk("\n");
34317 +}
34318 +
34319 +/**
34320 + * gfs_inum_in - Read in an inode number
34321 + * @no: the cpu-order structure
34322 + * @buf: the disk-order buffer
34323 + *
34324 + */
34325 +
34326 +void
34327 +gfs_inum_in(struct gfs_inum *no, char *buf)
34328 +{
34329 +       struct gfs_inum *str = (struct gfs_inum *)buf;
34330 +
34331 +       CPIN_64(no, str, no_formal_ino);
34332 +       CPIN_64(no, str, no_addr);
34333 +}
34334 +
34335 +/**
34336 + * gfs_inum_out - Write out an inode number
34337 + * @no: the cpu-order structure
34338 + * @buf: the disk-order buffer
34339 + *
34340 + */
34341 +
34342 +void
34343 +gfs_inum_out(struct gfs_inum *no, char *buf)
34344 +{
34345 +       struct gfs_inum *str = (struct gfs_inum *)buf;
34346 +
34347 +       CPOUT_64(no, str, no_formal_ino);
34348 +       CPOUT_64(no, str, no_addr);
34349 +}
34350 +
34351 +/**
34352 + * gfs_inum_print - Print out a inode number
34353 + * @no: the cpu-order buffer
34354 + *
34355 + */
34356 +
34357 +void
34358 +gfs_inum_print(struct gfs_inum *no)
34359 +{
34360 +       pv(no, no_formal_ino, "%"PRIu64);
34361 +       pv(no, no_addr, "%"PRIu64);
34362 +}
34363 +
34364 +/**
34365 + * gfs_meta_header_in - Read in a metadata header
34366 + * @mh: the cpu-order structure
34367 + * @buf: the disk-order buffer
34368 + *
34369 + */
34370 +
34371 +void
34372 +gfs_meta_header_in(struct gfs_meta_header *mh, char *buf)
34373 +{
34374 +       struct gfs_meta_header *str = (struct gfs_meta_header *)buf;
34375 +
34376 +       CPIN_32(mh, str, mh_magic);
34377 +       CPIN_32(mh, str, mh_type);
34378 +       CPIN_64(mh, str, mh_generation);
34379 +       CPIN_32(mh, str, mh_format);
34380 +       CPIN_32(mh, str, mh_incarn);
34381 +}
34382 +
34383 +/**
34384 + * gfs_meta_header_in - Write out a metadata header
34385 + * @mh: the cpu-order structure
34386 + * @buf: the disk-order buffer
34387 + *
34388 + * Don't ever change the generation number in this routine.
34389 + * It's done manually in increment_generation().
34390 + */
34391 +
34392 +void
34393 +gfs_meta_header_out(struct gfs_meta_header *mh, char *buf)
34394 +{
34395 +       struct gfs_meta_header *str = (struct gfs_meta_header *)buf;
34396 +
34397 +       CPOUT_32(mh, str, mh_magic);
34398 +       CPOUT_32(mh, str, mh_type);
34399 +#if 0
34400 +       /* Don't do this!
34401 +          Mh_generation should only be change manually. */
34402 +       CPOUT_64(mh, str, mh_generation);
34403 +#endif
34404 +       CPOUT_32(mh, str, mh_format);
34405 +       CPOUT_32(mh, str, mh_incarn);
34406 +}
34407 +
34408 +/**
34409 + * gfs_meta_header_print - Print out a metadata header
34410 + * @mh: the cpu-order buffer
34411 + *
34412 + */
34413 +
34414 +void
34415 +gfs_meta_header_print(struct gfs_meta_header *mh)
34416 +{
34417 +       pv(mh, mh_magic, "0x%.8X");
34418 +       pv(mh, mh_type, "%u");
34419 +       pv(mh, mh_generation, "%"PRIu64);
34420 +       pv(mh, mh_format, "%u");
34421 +       pv(mh, mh_incarn, "%u");
34422 +}
34423 +
34424 +/**
34425 + * gfs_sb_in - Read in a superblock
34426 + * @sb: the cpu-order structure
34427 + * @buf: the disk-order buffer
34428 + *
34429 + */
34430 +
34431 +void
34432 +gfs_sb_in(struct gfs_sb *sb, char *buf)
34433 +{
34434 +       struct gfs_sb *str = (struct gfs_sb *)buf;
34435 +
34436 +       gfs_meta_header_in(&sb->sb_header, buf);
34437 +
34438 +       CPIN_32(sb, str, sb_fs_format);
34439 +       CPIN_32(sb, str, sb_multihost_format);
34440 +       CPIN_32(sb, str, sb_flags);
34441 +
34442 +       CPIN_32(sb, str, sb_bsize);
34443 +       CPIN_32(sb, str, sb_bsize_shift);
34444 +       CPIN_32(sb, str, sb_seg_size);
34445 +
34446 +       gfs_inum_in(&sb->sb_jindex_di, (char *)&str->sb_jindex_di);
34447 +       gfs_inum_in(&sb->sb_rindex_di, (char *)&str->sb_rindex_di);
34448 +       gfs_inum_in(&sb->sb_root_di, (char *)&str->sb_root_di);
34449 +
34450 +       CPIN_08(sb, str, sb_lockproto, GFS_LOCKNAME_LEN);
34451 +       CPIN_08(sb, str, sb_locktable, GFS_LOCKNAME_LEN);
34452 +
34453 +       gfs_inum_in(&sb->sb_quota_di, (char *)&str->sb_quota_di);
34454 +       gfs_inum_in(&sb->sb_license_di, (char *)&str->sb_license_di);
34455 +
34456 +       CPIN_08(sb, str, sb_reserved, 96);
34457 +}
34458 +
34459 +/**
34460 + * gfs_sb_out - Write out a superblock
34461 + * @sb: the cpu-order structure
34462 + * @buf: the disk-order buffer
34463 + *
34464 + */
34465 +
34466 +void
34467 +gfs_sb_out(struct gfs_sb *sb, char *buf)
34468 +{
34469 +       struct gfs_sb *str = (struct gfs_sb *)buf;
34470 +
34471 +       gfs_meta_header_out(&sb->sb_header, buf);
34472 +
34473 +       CPOUT_32(sb, str, sb_fs_format);
34474 +       CPOUT_32(sb, str, sb_multihost_format);
34475 +       CPOUT_32(sb, str, sb_flags);
34476 +
34477 +       CPOUT_32(sb, str, sb_bsize);
34478 +       CPOUT_32(sb, str, sb_bsize_shift);
34479 +       CPOUT_32(sb, str, sb_seg_size);
34480 +
34481 +       gfs_inum_out(&sb->sb_jindex_di, (char *)&str->sb_jindex_di);
34482 +       gfs_inum_out(&sb->sb_rindex_di, (char *)&str->sb_rindex_di);
34483 +       gfs_inum_out(&sb->sb_root_di, (char *)&str->sb_root_di);
34484 +
34485 +       CPOUT_08(sb, str, sb_lockproto, GFS_LOCKNAME_LEN);
34486 +       CPOUT_08(sb, str, sb_locktable, GFS_LOCKNAME_LEN);
34487 +
34488 +       gfs_inum_out(&sb->sb_quota_di, (char *)&str->sb_quota_di);
34489 +       gfs_inum_out(&sb->sb_license_di, (char *)&str->sb_license_di);
34490 +
34491 +       CPOUT_08(sb, str, sb_reserved, 96);
34492 +}
34493 +
34494 +/**
34495 + * gfs_sb_print - Print out a superblock
34496 + * @sb: the cpu-order buffer
34497 + *
34498 + */
34499 +
34500 +void
34501 +gfs_sb_print(struct gfs_sb *sb)
34502 +{
34503 +       gfs_meta_header_print(&sb->sb_header);
34504 +
34505 +       pv(sb, sb_fs_format, "%u");
34506 +       pv(sb, sb_multihost_format, "%u");
34507 +       pv(sb, sb_flags, "%u");
34508 +
34509 +       pv(sb, sb_bsize, "%u");
34510 +       pv(sb, sb_bsize_shift, "%u");
34511 +       pv(sb, sb_seg_size, "%u");
34512 +
34513 +       gfs_inum_print(&sb->sb_jindex_di);
34514 +       gfs_inum_print(&sb->sb_rindex_di);
34515 +       gfs_inum_print(&sb->sb_root_di);
34516 +
34517 +       pv(sb, sb_lockproto, "%s");
34518 +       pv(sb, sb_locktable, "%s");
34519 +
34520 +       gfs_inum_print(&sb->sb_quota_di);
34521 +       gfs_inum_print(&sb->sb_license_di);
34522 +
34523 +       pa(sb, sb_reserved, 96);
34524 +}
34525 +
34526 +/**
34527 + * gfs_jindex_in - Read in a journal index structure
34528 + * @jindex: the cpu-order structure
34529 + * @buf: the disk-order buffer
34530 + *
34531 + */
34532 +
34533 +void
34534 +gfs_jindex_in(struct gfs_jindex *jindex, char *buf)
34535 +{
34536 +       struct gfs_jindex *str = (struct gfs_jindex *)buf;
34537 +
34538 +       CPIN_64(jindex, str, ji_addr);
34539 +       CPIN_32(jindex, str, ji_nsegment);
34540 +       CPIN_32(jindex, str, ji_pad);
34541 +
34542 +       CPIN_08(jindex, str, ji_reserved, 64);
34543 +}
34544 +
34545 +/**
34546 + * gfs_jindex_out - Write out a journal index structure
34547 + * @jindex: the cpu-order structure
34548 + * @buf: the disk-order buffer
34549 + *
34550 + */
34551 +
34552 +void
34553 +gfs_jindex_out(struct gfs_jindex *jindex, char *buf)
34554 +{
34555 +       struct gfs_jindex *str = (struct gfs_jindex *)buf;
34556 +
34557 +       CPOUT_64(jindex, str, ji_addr);
34558 +       CPOUT_32(jindex, str, ji_nsegment);
34559 +       CPOUT_32(jindex, str, ji_pad);
34560 +
34561 +       CPOUT_08(jindex, str, ji_reserved, 64);
34562 +}
34563 +
34564 +/**
34565 + * gfs_jindex_print - Print out a journal index structure
34566 + * @ji: the cpu-order buffer
34567 + *
34568 + */
34569 +
34570 +void
34571 +gfs_jindex_print(struct gfs_jindex *ji)
34572 +{
34573 +       pv(ji, ji_addr, "%"PRIu64);
34574 +       pv(ji, ji_nsegment, "%u");
34575 +       pv(ji, ji_pad, "%u");
34576 +
34577 +       pa(ji, ji_reserved, 64);
34578 +}
34579 +
34580 +/**
34581 + * gfs_rindex_in - Read in a resource index structure
34582 + * @rindex: the cpu-order structure
34583 + * @buf: the disk-order buffer
34584 + *
34585 + */
34586 +
34587 +void
34588 +gfs_rindex_in(struct gfs_rindex *rindex, char *buf)
34589 +{
34590 +       struct gfs_rindex *str = (struct gfs_rindex *)buf;
34591 +
34592 +       CPIN_64(rindex, str, ri_addr);
34593 +       CPIN_32(rindex, str, ri_length);
34594 +       CPIN_32(rindex, str, ri_pad);
34595 +
34596 +       CPIN_64(rindex, str, ri_data1);
34597 +       CPIN_32(rindex, str, ri_data);
34598 +
34599 +       CPIN_32(rindex, str, ri_bitbytes);
34600 +
34601 +       CPIN_08(rindex, str, ri_reserved, 64);
34602 +}
34603 +
34604 +/**
34605 + * gfs_rindex_out - Write out a resource index structure
34606 + * @rindex: the cpu-order structure
34607 + * @buf: the disk-order buffer
34608 + *
34609 + */
34610 +
34611 +void
34612 +gfs_rindex_out(struct gfs_rindex *rindex, char *buf)
34613 +{
34614 +       struct gfs_rindex *str = (struct gfs_rindex *)buf;
34615 +
34616 +       CPOUT_64(rindex, str, ri_addr);
34617 +       CPOUT_32(rindex, str, ri_length);
34618 +       CPOUT_32(rindex, str, ri_pad);
34619 +
34620 +       CPOUT_64(rindex, str, ri_data1);
34621 +       CPOUT_32(rindex, str, ri_data);
34622 +
34623 +       CPOUT_32(rindex, str, ri_bitbytes);
34624 +
34625 +       CPOUT_08(rindex, str, ri_reserved, 64);
34626 +}
34627 +
34628 +/**
34629 + * gfs_rindex_print - Print out a resource index structure
34630 + * @ri: the cpu-order buffer
34631 + *
34632 + */
34633 +
34634 +void
34635 +gfs_rindex_print(struct gfs_rindex *ri)
34636 +{
34637 +       pv(ri, ri_addr, "%"PRIu64);
34638 +       pv(ri, ri_length, "%u");
34639 +       pv(ri, ri_pad, "%u");
34640 +
34641 +       pv(ri, ri_data1, "%"PRIu64);
34642 +       pv(ri, ri_data, "%u");
34643 +
34644 +       pv(ri, ri_bitbytes, "%u");
34645 +
34646 +       pa(ri, ri_reserved, 64);
34647 +}
34648 +
34649 +/**
34650 + * gfs_rgrp_in - Read in a resource group header
34651 + * @rgrp: the cpu-order structure
34652 + * @buf: the disk-order buffer
34653 + *
34654 + */
34655 +
34656 +void
34657 +gfs_rgrp_in(struct gfs_rgrp *rgrp, char *buf)
34658 +{
34659 +       struct gfs_rgrp *str = (struct gfs_rgrp *)buf;
34660 +
34661 +       gfs_meta_header_in(&rgrp->rg_header, buf);
34662 +
34663 +       CPIN_32(rgrp, str, rg_flags);
34664 +
34665 +       CPIN_32(rgrp, str, rg_free);
34666 +
34667 +       CPIN_32(rgrp, str, rg_useddi);
34668 +       CPIN_32(rgrp, str, rg_freedi);
34669 +       gfs_inum_in(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list);
34670 +
34671 +       CPIN_32(rgrp, str, rg_usedmeta);
34672 +       CPIN_32(rgrp, str, rg_freemeta);
34673 +
34674 +       CPIN_08(rgrp, str, rg_reserved, 64);
34675 +}
34676 +
34677 +/**
34678 + * gfs_rgrp_out - Write out a resource group header
34679 + * @rgrp: the cpu-order structure
34680 + * @buf: the disk-order buffer
34681 + *
34682 + */
34683 +
34684 +void
34685 +gfs_rgrp_out(struct gfs_rgrp *rgrp, char *buf)
34686 +{
34687 +       struct gfs_rgrp *str = (struct gfs_rgrp *)buf;
34688 +
34689 +       gfs_meta_header_out(&rgrp->rg_header, buf);
34690 +
34691 +       CPOUT_32(rgrp, str, rg_flags);
34692 +
34693 +       CPOUT_32(rgrp, str, rg_free);
34694 +
34695 +       CPOUT_32(rgrp, str, rg_useddi);
34696 +       CPOUT_32(rgrp, str, rg_freedi);
34697 +       gfs_inum_out(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list);
34698 +
34699 +       CPOUT_32(rgrp, str, rg_usedmeta);
34700 +       CPOUT_32(rgrp, str, rg_freemeta);
34701 +
34702 +       CPOUT_08(rgrp, str, rg_reserved, 64);
34703 +}
34704 +
34705 +/**
34706 + * gfs_rgrp_print - Print out a resource group header
34707 + * @rg: the cpu-order buffer
34708 + *
34709 + */
34710 +
34711 +void
34712 +gfs_rgrp_print(struct gfs_rgrp *rg)
34713 +{
34714 +       gfs_meta_header_print(&rg->rg_header);
34715 +
34716 +       pv(rg, rg_flags, "%u");
34717 +
34718 +       pv(rg, rg_free, "%u");
34719 +
34720 +       pv(rg, rg_useddi, "%u");
34721 +       pv(rg, rg_freedi, "%u");
34722 +       gfs_inum_print(&rg->rg_freedi_list);
34723 +
34724 +       pv(rg, rg_usedmeta, "%u");
34725 +       pv(rg, rg_freemeta, "%u");
34726 +
34727 +       pa(rg, rg_reserved, 64);
34728 +}
34729 +
34730 +/**
34731 + * gfs_quota_in - Read in a quota structures
34732 + * @quota: the cpu-order structure
34733 + * @buf: the disk-order buffer
34734 + *
34735 + */
34736 +
34737 +void
34738 +gfs_quota_in(struct gfs_quota *quota, char *buf)
34739 +{
34740 +       struct gfs_quota *str = (struct gfs_quota *)buf;
34741 +
34742 +       CPIN_64(quota, str, qu_limit);
34743 +       CPIN_64(quota, str, qu_warn);
34744 +       CPIN_64(quota, str, qu_value);
34745 +
34746 +       CPIN_08(quota, str, qu_reserved, 64);
34747 +}
34748 +
34749 +/**
34750 + * gfs_quota_out - Write out a quota structure
34751 + * @quota: the cpu-order structure
34752 + * @buf: the disk-order buffer
34753 + *
34754 + */
34755 +
34756 +void
34757 +gfs_quota_out(struct gfs_quota *quota, char *buf)
34758 +{
34759 +       struct gfs_quota *str = (struct gfs_quota *)buf;
34760 +
34761 +       CPOUT_64(quota, str, qu_limit);
34762 +       CPOUT_64(quota, str, qu_warn);
34763 +       CPOUT_64(quota, str, qu_value);
34764 +
34765 +       CPOUT_08(quota, str, qu_reserved, 64);
34766 +}
34767 +
34768 +/**
34769 + * gfs_quota_print - Print out a quota structure
34770 + * @quota: the cpu-order buffer
34771 + *
34772 + */
34773 +
34774 +void
34775 +gfs_quota_print(struct gfs_quota *quota)
34776 +{
34777 +       pv(quota, qu_limit, "%"PRIu64);
34778 +       pv(quota, qu_warn, "%"PRIu64);
34779 +       pv(quota, qu_value, "%"PRId64);
34780 +
34781 +       pa(quota, qu_reserved, 64);
34782 +}
34783 +
34784 +/**
34785 + * gfs_dinode_in - Read in a dinode
34786 + * @dinode: the cpu-order structure
34787 + * @buf: the disk-order buffer
34788 + *
34789 + */
34790 +
34791 +void
34792 +gfs_dinode_in(struct gfs_dinode *dinode, char *buf)
34793 +{
34794 +       struct gfs_dinode *str = (struct gfs_dinode *)buf;
34795 +
34796 +       gfs_meta_header_in(&dinode->di_header, buf);
34797 +
34798 +       gfs_inum_in(&dinode->di_num, (char *)&str->di_num);
34799 +
34800 +       CPIN_32(dinode, str, di_mode);
34801 +       CPIN_32(dinode, str, di_uid);
34802 +       CPIN_32(dinode, str, di_gid);
34803 +       CPIN_32(dinode, str, di_nlink);
34804 +       CPIN_64(dinode, str, di_size);
34805 +       CPIN_64(dinode, str, di_blocks);
34806 +       CPIN_64(dinode, str, di_atime);
34807 +       CPIN_64(dinode, str, di_mtime);
34808 +       CPIN_64(dinode, str, di_ctime);
34809 +       CPIN_32(dinode, str, di_major);
34810 +       CPIN_32(dinode, str, di_minor);
34811 +
34812 +       CPIN_64(dinode, str, di_rgrp);
34813 +       CPIN_64(dinode, str, di_goal_rgrp);
34814 +       CPIN_32(dinode, str, di_goal_dblk);
34815 +       CPIN_32(dinode, str, di_goal_mblk);
34816 +       CPIN_32(dinode, str, di_flags);
34817 +       CPIN_32(dinode, str, di_payload_format);
34818 +       CPIN_16(dinode, str, di_type);
34819 +       CPIN_16(dinode, str, di_height);
34820 +       CPIN_32(dinode, str, di_incarn);
34821 +       CPIN_16(dinode, str, di_pad);
34822 +
34823 +       CPIN_16(dinode, str, di_depth);
34824 +       CPIN_32(dinode, str, di_entries);
34825 +
34826 +       gfs_inum_in(&dinode->di_next_unused, (char *)&str->di_next_unused);
34827 +
34828 +       CPIN_64(dinode, str, di_eattr);
34829 +
34830 +       CPIN_08(dinode, str, di_reserved, 56);
34831 +}
34832 +
34833 +/**
34834 + * gfs_dinode_out - Write out a dinode
34835 + * @dinode: the cpu-order structure
34836 + * @buf: the disk-order buffer
34837 + *
34838 + */
34839 +
34840 +void
34841 +gfs_dinode_out(struct gfs_dinode *dinode, char *buf)
34842 +{
34843 +       struct gfs_dinode *str = (struct gfs_dinode *)buf;
34844 +
34845 +       gfs_meta_header_out(&dinode->di_header, buf);
34846 +
34847 +       gfs_inum_out(&dinode->di_num, (char *)&str->di_num);
34848 +
34849 +       CPOUT_32(dinode, str, di_mode);
34850 +       CPOUT_32(dinode, str, di_uid);
34851 +       CPOUT_32(dinode, str, di_gid);
34852 +       CPOUT_32(dinode, str, di_nlink);
34853 +       CPOUT_64(dinode, str, di_size);
34854 +       CPOUT_64(dinode, str, di_blocks);
34855 +       CPOUT_64(dinode, str, di_atime);
34856 +       CPOUT_64(dinode, str, di_mtime);
34857 +       CPOUT_64(dinode, str, di_ctime);
34858 +       CPOUT_32(dinode, str, di_major);
34859 +       CPOUT_32(dinode, str, di_minor);
34860 +
34861 +       CPOUT_64(dinode, str, di_rgrp);
34862 +       CPOUT_64(dinode, str, di_goal_rgrp);
34863 +       CPOUT_32(dinode, str, di_goal_dblk);
34864 +       CPOUT_32(dinode, str, di_goal_mblk);
34865 +       CPOUT_32(dinode, str, di_flags);
34866 +       CPOUT_32(dinode, str, di_payload_format);
34867 +       CPOUT_16(dinode, str, di_type);
34868 +       CPOUT_16(dinode, str, di_height);
34869 +       CPOUT_32(dinode, str, di_incarn);
34870 +       CPOUT_16(dinode, str, di_pad);
34871 +
34872 +       CPOUT_16(dinode, str, di_depth);
34873 +       CPOUT_32(dinode, str, di_entries);
34874 +
34875 +       gfs_inum_out(&dinode->di_next_unused, (char *)&str->di_next_unused);
34876 +
34877 +       CPOUT_64(dinode, str, di_eattr);
34878 +
34879 +       CPOUT_08(dinode, str, di_reserved, 56);
34880 +}
34881 +
34882 +/**
34883 + * gfs_dinode_print - Print out a dinode
34884 + * @di: the cpu-order buffer
34885 + *
34886 + */
34887 +
34888 +void
34889 +gfs_dinode_print(struct gfs_dinode *di)
34890 +{
34891 +       gfs_meta_header_print(&di->di_header);
34892 +
34893 +       gfs_inum_print(&di->di_num);
34894 +
34895 +       pv(di, di_mode, "0%o");
34896 +       pv(di, di_uid, "%u");
34897 +       pv(di, di_gid, "%u");
34898 +       pv(di, di_nlink, "%u");
34899 +       pv(di, di_size, "%"PRIu64);
34900 +       pv(di, di_blocks, "%"PRIu64);
34901 +       pv(di, di_atime, "%"PRId64);
34902 +       pv(di, di_mtime, "%"PRId64);
34903 +       pv(di, di_ctime, "%"PRId64);
34904 +       pv(di, di_major, "%u");
34905 +       pv(di, di_minor, "%u");
34906 +
34907 +       pv(di, di_rgrp, "%"PRIu64);
34908 +       pv(di, di_goal_rgrp, "%"PRIu64);
34909 +       pv(di, di_goal_dblk, "%u");
34910 +       pv(di, di_goal_mblk, "%u");
34911 +       pv(di, di_flags, "0x%.8X");
34912 +       pv(di, di_payload_format, "%u");
34913 +       pv(di, di_type, "%u");
34914 +       pv(di, di_height, "%u");
34915 +       pv(di, di_incarn, "%u");
34916 +       pv(di, di_pad, "%u");
34917 +
34918 +       pv(di, di_depth, "%u");
34919 +       pv(di, di_entries, "%u");
34920 +
34921 +       gfs_inum_print(&di->di_next_unused);
34922 +
34923 +       pv(di, di_eattr, "%"PRIu64);
34924 +
34925 +       pa(di, di_reserved, 56);
34926 +}
34927 +
34928 +/**
34929 + * gfs_indirect_in - copy in the header of an indirect block
34930 + * @indirect: the in memory copy
34931 + * @buf: the buffer copy
34932 + *
34933 + */
34934 +
34935 +void
34936 +gfs_indirect_in(struct gfs_indirect *indirect, char *buf)
34937 +{
34938 +       struct gfs_indirect *str = (struct gfs_indirect *)buf;
34939 +
34940 +       gfs_meta_header_in(&indirect->in_header, buf);
34941 +
34942 +       CPIN_08(indirect, str, in_reserved, 64);
34943 +}
34944 +
34945 +/**
34946 + * gfs_indirect_out - copy out the header of an indirect block
34947 + * @indirect: the in memory copy
34948 + * @buf: the buffer copy
34949 + *
34950 + */
34951 +
34952 +void
34953 +gfs_indirect_out(struct gfs_indirect *indirect, char *buf)
34954 +{
34955 +       struct gfs_indirect *str = (struct gfs_indirect *)buf;
34956 +
34957 +       gfs_meta_header_out(&indirect->in_header, buf);
34958 +
34959 +       CPOUT_08(indirect, str, in_reserved, 64);
34960 +}
34961 +
34962 +/**
34963 + * gfs_indirect_print - Print out a indirect block header
34964 + * @indirect: the cpu-order buffer
34965 + *
34966 + */
34967 +
34968 +void
34969 +gfs_indirect_print(struct gfs_indirect *indirect)
34970 +{
34971 +       gfs_meta_header_print(&indirect->in_header);
34972 +
34973 +       pa(indirect, in_reserved, 64);
34974 +}
34975 +
34976 +/**
34977 + * gfs_dirent_in - Read in a directory entry
34978 + * @dirent: the cpu-order structure
34979 + * @buf: the disk-order buffer
34980 + *
34981 + */
34982 +
34983 +void
34984 +gfs_dirent_in(struct gfs_dirent *dirent, char *buf)
34985 +{
34986 +       struct gfs_dirent *str = (struct gfs_dirent *)buf;
34987 +
34988 +       gfs_inum_in(&dirent->de_inum, (char *)&str->de_inum);
34989 +       CPIN_32(dirent, str, de_hash);
34990 +       CPIN_16(dirent, str, de_rec_len);
34991 +       CPIN_16(dirent, str, de_name_len);
34992 +       CPIN_16(dirent, str, de_type);
34993 +
34994 +       CPIN_08(dirent, str, de_reserved, 14);
34995 +}
34996 +
34997 +/**
34998 + * gfs_dirent_out - Write out a directory entry
34999 + * @dirent: the cpu-order structure
35000 + * @buf: the disk-order buffer
35001 + *
35002 + */
35003 +
35004 +void
35005 +gfs_dirent_out(struct gfs_dirent *dirent, char *buf)
35006 +{
35007 +       struct gfs_dirent *str = (struct gfs_dirent *)buf;
35008 +
35009 +       gfs_inum_out(&dirent->de_inum, (char *)&str->de_inum);
35010 +       CPOUT_32(dirent, str, de_hash);
35011 +       CPOUT_16(dirent, str, de_rec_len);
35012 +       CPOUT_16(dirent, str, de_name_len);
35013 +       CPOUT_16(dirent, str, de_type);
35014 +
35015 +       CPOUT_08(dirent, str, de_reserved, 14);
35016 +}
35017 +
35018 +/**
35019 + * gfs_dirent_print - Print out a directory entry
35020 + * @de: the cpu-order buffer
35021 + * @name: the filename
35022 + *
35023 + */
35024 +
35025 +void
35026 +gfs_dirent_print(struct gfs_dirent *de, char *name)
35027 +{
35028 +       char buf[GFS_FNAMESIZE + 1];
35029 +
35030 +       gfs_inum_print(&de->de_inum);
35031 +       pv(de, de_hash, "0x%.8X");
35032 +       pv(de, de_rec_len, "%u");
35033 +       pv(de, de_name_len, "%u");
35034 +       pv(de, de_type, "%u");
35035 +
35036 +       pa(de, de_reserved, 14);
35037 +
35038 +       memset(buf, 0, GFS_FNAMESIZE + 1);
35039 +       memcpy(buf, name, de->de_name_len);
35040 +       printk("  name = %s\n", buf);
35041 +}
35042 +
35043 +/**
35044 + * gfs_leaf_in - Read in a directory leaf header
35045 + * @leaf: the cpu-order structure
35046 + * @buf: the disk-order buffer
35047 + *
35048 + */
35049 +
35050 +void
35051 +gfs_leaf_in(struct gfs_leaf *leaf, char *buf)
35052 +{
35053 +       struct gfs_leaf *str = (struct gfs_leaf *)buf;
35054 +
35055 +       gfs_meta_header_in(&leaf->lf_header, buf);
35056 +
35057 +       CPIN_16(leaf, str, lf_depth);
35058 +       CPIN_16(leaf, str, lf_entries);
35059 +       CPIN_32(leaf, str, lf_dirent_format);
35060 +       CPIN_64(leaf, str, lf_next);
35061 +
35062 +       CPIN_08(leaf, str, lf_reserved, 64);
35063 +}
35064 +
35065 +/**
35066 + * gfs_leaf_out - Write out a directory leaf header
35067 + * @leaf: the cpu-order structure
35068 + * @buf: the disk-order buffer
35069 + *
35070 + */
35071 +
35072 +void
35073 +gfs_leaf_out(struct gfs_leaf *leaf, char *buf)
35074 +{
35075 +       struct gfs_leaf *str = (struct gfs_leaf *)buf;
35076 +
35077 +       gfs_meta_header_out(&leaf->lf_header, buf);
35078 +
35079 +       CPOUT_16(leaf, str, lf_depth);
35080 +       CPOUT_16(leaf, str, lf_entries);
35081 +       CPOUT_32(leaf, str, lf_dirent_format);
35082 +       CPOUT_64(leaf, str, lf_next);
35083 +
35084 +       CPOUT_08(leaf, str, lf_reserved, 64);
35085 +}
35086 +
35087 +/**
35088 + * gfs_leaf_print - Print out a directory leaf header
35089 + * @lf: the cpu-order buffer
35090 + *
35091 + */
35092 +
35093 +void
35094 +gfs_leaf_print(struct gfs_leaf *lf)
35095 +{
35096 +       gfs_meta_header_print(&lf->lf_header);
35097 +
35098 +       pv(lf, lf_depth, "%u");
35099 +       pv(lf, lf_entries, "%u");
35100 +       pv(lf, lf_dirent_format, "%u");
35101 +       pv(lf, lf_next, "%"PRIu64);
35102 +
35103 +       pa(lf, lf_reserved, 64);
35104 +}
35105 +
35106 +/**
35107 + * gfs_log_header_in - Read in a log header
35108 + * @head: the cpu-order structure
35109 + * @buf: the disk-order buffer
35110 + *
35111 + */
35112 +
35113 +void
35114 +gfs_log_header_in(struct gfs_log_header *head, char *buf)
35115 +{
35116 +       struct gfs_log_header *str = (struct gfs_log_header *)buf;
35117 +
35118 +       gfs_meta_header_in(&head->lh_header, buf);
35119 +
35120 +       CPIN_32(head, str, lh_flags);
35121 +       CPIN_32(head, str, lh_pad);
35122 +
35123 +       CPIN_64(head, str, lh_first);
35124 +       CPIN_64(head, str, lh_sequence);
35125 +
35126 +       CPIN_64(head, str, lh_tail);
35127 +       CPIN_64(head, str, lh_last_dump);
35128 +
35129 +       CPIN_08(head, str, lh_reserved, 64);
35130 +}
35131 +
35132 +/**
35133 + * gfs_log_header_out - Write out a log header
35134 + * @head: the cpu-order structure
35135 + * @buf: the disk-order buffer
35136 + *
35137 + */
35138 +
35139 +void
35140 +gfs_log_header_out(struct gfs_log_header *head, char *buf)
35141 +{
35142 +       struct gfs_log_header *str = (struct gfs_log_header *)buf;
35143 +
35144 +       gfs_meta_header_out(&head->lh_header, buf);
35145 +
35146 +       CPOUT_32(head, str, lh_flags);
35147 +       CPOUT_32(head, str, lh_pad);
35148 +
35149 +       CPOUT_64(head, str, lh_first);
35150 +       CPOUT_64(head, str, lh_sequence);
35151 +
35152 +       CPOUT_64(head, str, lh_tail);
35153 +       CPOUT_64(head, str, lh_last_dump);
35154 +
35155 +       CPOUT_08(head, str, lh_reserved, 64);
35156 +}
35157 +
35158 +/**
35159 + * gfs_log_header_print - Print out a log header
35160 + * @head: the cpu-order buffer
35161 + *
35162 + */
35163 +
35164 +void
35165 +gfs_log_header_print(struct gfs_log_header *lh)
35166 +{
35167 +       gfs_meta_header_print(&lh->lh_header);
35168 +
35169 +       pv(lh, lh_flags, "0x%.8X");
35170 +       pv(lh, lh_pad, "%u");
35171 +
35172 +       pv(lh, lh_first, "%"PRIu64);
35173 +       pv(lh, lh_sequence, "%"PRIu64);
35174 +
35175 +       pv(lh, lh_tail, "%"PRIu64);
35176 +       pv(lh, lh_last_dump, "%"PRIu64);
35177 +
35178 +       pa(lh, lh_reserved, 64);
35179 +}
35180 +
35181 +/**
35182 + * gfs_desc_in - Read in a log descriptor
35183 + * @desc: the cpu-order structure
35184 + * @buf: the disk-order buffer
35185 + *
35186 + */
35187 +
35188 +void
35189 +gfs_desc_in(struct gfs_log_descriptor *desc, char *buf)
35190 +{
35191 +       struct gfs_log_descriptor *str = (struct gfs_log_descriptor *)buf;
35192 +
35193 +       gfs_meta_header_in(&desc->ld_header, buf);
35194 +
35195 +       CPIN_32(desc, str, ld_type);
35196 +       CPIN_32(desc, str, ld_length);
35197 +       CPIN_32(desc, str, ld_data1);
35198 +       CPIN_32(desc, str, ld_data2);
35199 +
35200 +       CPIN_08(desc, str, ld_reserved, 64);
35201 +}
35202 +
35203 +/**
35204 + * gfs_desc_out - Write out a log descriptor
35205 + * @desc: the cpu-order structure
35206 + * @buf: the disk-order buffer
35207 + *
35208 + */
35209 +
35210 +void
35211 +gfs_desc_out(struct gfs_log_descriptor *desc, char *buf)
35212 +{
35213 +       struct gfs_log_descriptor *str = (struct gfs_log_descriptor *)buf;
35214 +
35215 +       gfs_meta_header_out(&desc->ld_header, buf);
35216 +
35217 +       CPOUT_32(desc, str, ld_type);
35218 +       CPOUT_32(desc, str, ld_length);
35219 +       CPOUT_32(desc, str, ld_data1);
35220 +       CPOUT_32(desc, str, ld_data2);
35221 +
35222 +       CPOUT_08(desc, str, ld_reserved, 64);
35223 +}
35224 +
35225 +/**
35226 + * gfs_desc_print - Print out a log descriptor
35227 + * @ld: the cpu-order buffer
35228 + *
35229 + */
35230 +
35231 +void
35232 +gfs_desc_print(struct gfs_log_descriptor *ld)
35233 +{
35234 +       gfs_meta_header_print(&ld->ld_header);
35235 +
35236 +       pv(ld, ld_type, "%u");
35237 +       pv(ld, ld_length, "%u");
35238 +       pv(ld, ld_data1, "%u");
35239 +       pv(ld, ld_data2, "%u");
35240 +
35241 +       pa(ld, ld_reserved, 64);
35242 +}
35243 +
35244 +/**
35245 + * gfs_block_tag_in - Read in a block tag
35246 + * @tag: the cpu-order structure
35247 + * @buf: the disk-order buffer
35248 + *
35249 + */
35250 +
35251 +void
35252 +gfs_block_tag_in(struct gfs_block_tag *tag, char *buf)
35253 +{
35254 +       struct gfs_block_tag *str = (struct gfs_block_tag *)buf;
35255 +
35256 +       CPIN_64(tag, str, bt_blkno);
35257 +       CPIN_32(tag, str, bt_flags);
35258 +       CPIN_32(tag, str, bt_pad);
35259 +}
35260 +
35261 +/**
35262 + * gfs_block_tag_out - Write out a block tag
35263 + * @tag: the cpu-order structure
35264 + * @buf: the disk-order buffer
35265 + *
35266 + */
35267 +
35268 +void
35269 +gfs_block_tag_out(struct gfs_block_tag *tag, char *buf)
35270 +{
35271 +       struct gfs_block_tag *str = (struct gfs_block_tag *)buf;
35272 +
35273 +       CPOUT_64(tag, str, bt_blkno);
35274 +       CPOUT_32(tag, str, bt_flags);
35275 +       CPOUT_32(tag, str, bt_pad);
35276 +}
35277 +
35278 +/**
35279 + * gfs_block_tag_print - Print out a block tag
35280 + * @tag: the cpu-order buffer
35281 + *
35282 + */
35283 +
35284 +void
35285 +gfs_block_tag_print(struct gfs_block_tag *tag)
35286 +{
35287 +       pv(tag, bt_blkno, "%"PRIu64);
35288 +       pv(tag, bt_flags, "%u");
35289 +       pv(tag, bt_pad, "%u");
35290 +}
35291 +
35292 +/**
35293 + * gfs_quota_tag_in - Read in a quota tag
35294 + * @tag: the cpu-order structure
35295 + * @buf: the disk-order buffer
35296 + *
35297 + */
35298 +
35299 +void
35300 +gfs_quota_tag_in(struct gfs_quota_tag *tag, char *buf)
35301 +{
35302 +       struct gfs_quota_tag *str = (struct gfs_quota_tag *)buf;
35303 +
35304 +       CPIN_64(tag, str, qt_change);
35305 +       CPIN_32(tag, str, qt_flags);
35306 +       CPIN_32(tag, str, qt_id);
35307 +}
35308 +
35309 +/**
35310 + * gfs_quota_tag_out - Write out a quota tag
35311 + * @tag: the cpu-order structure
35312 + * @buf: the disk-order buffer
35313 + *
35314 + */
35315 +
35316 +void
35317 +gfs_quota_tag_out(struct gfs_quota_tag *tag, char *buf)
35318 +{
35319 +       struct gfs_quota_tag *str = (struct gfs_quota_tag *)buf;
35320 +
35321 +       CPOUT_64(tag, str, qt_change);
35322 +       CPOUT_32(tag, str, qt_flags);
35323 +       CPOUT_32(tag, str, qt_id);
35324 +}
35325 +
35326 +/**
35327 + * gfs_quota_tag_print - Print out a quota tag
35328 + * @tag: the cpu-order buffer
35329 + *
35330 + */
35331 +
35332 +void
35333 +gfs_quota_tag_print(struct gfs_quota_tag *tag)
35334 +{
35335 +       pv(tag, qt_change, "%"PRId64);
35336 +       pv(tag, qt_flags, "0x%.8X");
35337 +       pv(tag, qt_id, "%u");
35338 +}
35339 +
35340 +/**
35341 + * gfs_ea_header_in - Read in a Extended Attribute header
35342 + * @tag: the cpu-order structure
35343 + * @buf: the disk-order buffer
35344 + *
35345 + */
35346 +
35347 +void
35348 +gfs_ea_header_in(struct gfs_ea_header *ea, char *buf)
35349 +{
35350 +       struct gfs_ea_header *str = (struct gfs_ea_header *)buf;
35351 +
35352 +       CPIN_32(ea, str, ea_rec_len);
35353 +       CPIN_32(ea, str, ea_data_len);
35354 +       ea->ea_name_len = str->ea_name_len;
35355 +       ea->ea_type = str->ea_type;
35356 +       ea->ea_flags = str->ea_flags;
35357 +       ea->ea_num_ptrs = str->ea_num_ptrs;
35358 +       CPIN_32(ea, str, ea_pad);
35359 +}
35360 +
35361 +/**
35362 + * gfs_ea_header_out - Write out a Extended Attribute header
35363 + * @ea: the cpu-order structure
35364 + * @buf: the disk-order buffer
35365 + *
35366 + */
35367 +
35368 +void
35369 +gfs_ea_header_out(struct gfs_ea_header *ea, char *buf)
35370 +{
35371 +       struct gfs_ea_header *str = (struct gfs_ea_header *)buf;
35372 +
35373 +       CPOUT_32(ea, str, ea_rec_len);
35374 +       CPOUT_32(ea, str, ea_data_len);
35375 +       str->ea_name_len = ea->ea_name_len;
35376 +       str->ea_type = ea->ea_type;
35377 +       str->ea_flags = ea->ea_flags;
35378 +       str->ea_num_ptrs = ea->ea_num_ptrs;
35379 +       CPOUT_32(ea, str, ea_pad);
35380 +}
35381 +
35382 +/**
35383 + * gfs_ea_header_printt - Print out a Extended Attribute header
35384 + * @ea: the cpu-order buffer
35385 + *
35386 + */
35387 +
35388 +void
35389 +gfs_ea_header_print(struct gfs_ea_header *ea)
35390 +{
35391 +       pv(ea, ea_rec_len, "%u");
35392 +       pv(ea, ea_data_len, "%u");
35393 +       pv(ea, ea_name_len, "%u");
35394 +       pv(ea, ea_type, "%u");
35395 +       pv(ea, ea_flags, "%u");
35396 +       pv(ea, ea_num_ptrs, "%u");
35397 +       pv(ea, ea_pad, "%u");
35398 +}
35399 +
35400 +static const uint32_t crc_32_tab[] =
35401 +{
35402 +  0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
35403 +  0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
35404 +  0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
35405 +  0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
35406 +  0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
35407 +  0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
35408 +  0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
35409 +  0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
35410 +  0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
35411 +  0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
35412 +  0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
35413 +  0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
35414 +  0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
35415 +  0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
35416 +  0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
35417 +  0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
35418 +  0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
35419 +  0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
35420 +  0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
35421 +  0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
35422 +  0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
35423 +  0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
35424 +  0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
35425 +  0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
35426 +  0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
35427 +  0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
35428 +  0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
35429 +  0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
35430 +  0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
35431 +  0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
35432 +  0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
35433 +  0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
35434 +};
35435 +
35436 +/**
35437 + * gfs_dir_hash - hash an array of data
35438 + * @data: the data to be hashed
35439 + * @len: the length of data to be hashed
35440 + *
35441 + * Take some data and convert it to a 32-bit hash.
35442 + *
35443 + * The hash function is a 32-bit CRC of the data.  The algorithm uses
35444 + * the crc_32_tab table above.
35445 + *
35446 + * This may not be the fastest hash function, but it does a fair bit better
35447 + * at providing uniform results than the others I've looked at.  That's
35448 + * really important for efficient directories.
35449 + *
35450 + * Returns: the hash
35451 + */
35452 +
35453 +uint32_t
35454 +gfs_dir_hash(const char *data, int len)
35455 +{
35456 +       uint32_t hash = 0xFFFFFFFF;
35457 +
35458 +       for (; len--; data++)
35459 +               hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
35460 +
35461 +       hash = ~hash;
35462 +
35463 +       return hash;
35464 +}
35465 +
35466 +#endif  /* WANT_GFS_CONVERSION_FUNCTIONS */
35467 +
35468 diff -urN linux-orig/fs/gfs_locking/lock_dlm/group.c linux-patched/fs/gfs_locking/lock_dlm/group.c
35469 --- linux-orig/fs/gfs_locking/lock_dlm/group.c  1969-12-31 18:00:00.000000000 -0600
35470 +++ linux-patched/fs/gfs_locking/lock_dlm/group.c       2004-06-16 12:03:17.967822065 -0500
35471 @@ -0,0 +1,776 @@
35472 +/******************************************************************************
35473 +*******************************************************************************
35474 +**
35475 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
35476 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
35477 +**
35478 +**  This copyrighted material is made available to anyone wishing to use,
35479 +**  modify, copy, or redistribute it subject to the terms and conditions
35480 +**  of the GNU General Public License v.2.
35481 +**
35482 +*******************************************************************************
35483 +******************************************************************************/
35484 +
35485 +#include <linux/socket.h>
35486 +#include <net/sock.h>
35487 +
35488 +#include "lock_dlm.h"
35489 +#include <cluster/cnxman.h>
35490 +#include <cluster/service.h>
35491 +
35492 +
35493 +struct kcl_service_ops mg_ops;
35494 +
35495 +/*
35496 + * Get the node struct for a given nodeid.
35497 + */
35498 +
35499 +static dlm_node_t *find_node_by_nodeid(dlm_t *dlm, uint32_t nodeid)
35500 +{
35501 +       dlm_node_t *node;
35502 +
35503 +       list_for_each_entry(node, &dlm->mg_nodes, list) {
35504 +               if (node->nodeid == nodeid)
35505 +                       return node;
35506 +       }
35507 +       return NULL;
35508 +}
35509 +
35510 +/*
35511 + * Get the node struct for a given journalid.
35512 + */
35513 +
35514 +static dlm_node_t *find_node_by_jid(dlm_t *dlm, uint32_t jid)
35515 +{
35516 +       dlm_node_t *node;
35517 +
35518 +       list_for_each_entry(node, &dlm->mg_nodes, list) {
35519 +               if (node->jid == jid)
35520 +                       return node;
35521 +       }
35522 +       return NULL;
35523 +}
35524 +
35525 +/*
35526 + * If the given ID is clear, get it, setting to the given VALUE.  The ID is a
35527 + * journalid, the VALUE is our nodeid.  When successful, the held ID-lock is
35528 + * returned (in shared mode).  As long as this ID-lock is held, the journalid
35529 + * is owned.
35530 + */
35531 +
35532 +static int id_test_and_set(dlm_t *dlm, uint32_t id, uint32_t val,
35533 +                          dlm_lock_t **lp_set)
35534 +{
35535 +       dlm_lock_t *lp = NULL;
35536 +       struct lm_lockname name;
35537 +       lm_lock_t *lock;
35538 +       char *lvb;
35539 +       uint32_t exist_val, beval;
35540 +       int error;
35541 +
35542 +       name.ln_type = LM_TYPE_JID;
35543 +       name.ln_number = id;
35544 +
35545 +       error = lm_dlm_get_lock(dlm, &name, &lock);
35546 +       if (error)
35547 +               goto fail;
35548 +
35549 +       error = lm_dlm_hold_lvb(lock, &lvb);
35550 +       if (error)
35551 +               goto fail_put;
35552 +
35553 +       lp = (dlm_lock_t *) lock;
35554 +       set_bit(LFL_IDLOCK, &lp->flags);
35555 +
35556 + retry:
35557 +
35558 +       error = lm_dlm_lock_sync(lock, LM_ST_UNLOCKED, LM_ST_SHARED,
35559 +                                LM_FLAG_TRY | LM_FLAG_NOEXP);
35560 +       if (error == -EAGAIN) {
35561 +               current->state = TASK_UNINTERRUPTIBLE;
35562 +               schedule_timeout(HZ);
35563 +               goto retry;
35564 +       }
35565 +       if (error)
35566 +               goto fail_unhold;
35567 +
35568 +       memcpy(&beval, lvb, sizeof(beval));
35569 +       exist_val = be32_to_cpu(beval);
35570 +
35571 +       if (!exist_val) {
35572 +               /*
35573 +                * This id is unused.  Attempt to claim it by getting EX mode
35574 +                * and writing our nodeid into the lvb.
35575 +                */
35576 +               error = lm_dlm_lock_sync(lock, LM_ST_SHARED, LM_ST_EXCLUSIVE,
35577 +                                        LM_FLAG_TRY | LM_FLAG_NOEXP);
35578 +               if (error == -EAGAIN) {
35579 +                       lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35580 +                       current->state = TASK_UNINTERRUPTIBLE;
35581 +                       schedule_timeout(HZ);
35582 +                       goto retry;
35583 +               }
35584 +               if (error)
35585 +                       goto fail_unlock;
35586 +
35587 +               beval = cpu_to_be32(val);
35588 +               memcpy(lvb, &beval, sizeof(beval));
35589 +
35590 +               error = lm_dlm_lock_sync(lock, LM_ST_EXCLUSIVE, LM_ST_SHARED,
35591 +                                        LM_FLAG_NOEXP);
35592 +               DLM_ASSERT(!error,);
35593 +
35594 +               *lp_set = lp;
35595 +               error = 0;
35596 +       } else {
35597 +               /*
35598 +                * This id is already used. It has a non-zero nodeid in the lvb
35599 +                */
35600 +               lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35601 +               lm_dlm_unhold_lvb(lock, lvb);
35602 +               lm_dlm_put_lock(lock);
35603 +               error = exist_val;
35604 +       }
35605 +
35606 +       return error;
35607 +
35608 + fail_unlock:
35609 +       lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35610 +
35611 + fail_unhold:
35612 +       lm_dlm_unhold_lvb(lock, lvb);
35613 +
35614 + fail_put:
35615 +       lm_dlm_put_lock(lock);
35616 +
35617 + fail:
35618 +       return error;
35619 +}
35620 +
35621 +/*
35622 + * Release a held ID-lock clearing its VALUE.  We have to acquire the lock in
35623 + * EX again so we can write out a zeroed lvb.
35624 + */
35625 +
35626 +static void id_clear(dlm_t *dlm, dlm_lock_t *lp)
35627 +{
35628 +       lm_lock_t *lock = (lm_lock_t *) lp;
35629 +       int error;
35630 +
35631 +       /*
35632 +        * This flag means that DLM_LKF_CONVDEADLK should not be used.
35633 +        */
35634 +       set_bit(LFL_FORCE_PROMOTE, &lp->flags);
35635 +
35636 + retry:
35637 +
35638 +       error = lm_dlm_lock_sync(lock, LM_ST_SHARED, LM_ST_EXCLUSIVE,
35639 +                                LM_FLAG_TRY | LM_FLAG_NOEXP);
35640 +       if (error == -EAGAIN) {
35641 +               schedule();
35642 +               goto retry;
35643 +       }
35644 +       if (error)
35645 +               goto end;
35646 +
35647 +       memset(lp->lvb, 0, DLM_LVB_LEN);
35648 +       lm_dlm_unlock_sync(lock, LM_ST_EXCLUSIVE);
35649 +
35650 + end:
35651 +       lm_dlm_unhold_lvb(lock, lp->lvb);
35652 +       lm_dlm_put_lock(lock);
35653 +}
35654 +
35655 +/*
35656 + * Get the VALUE for a given ID.  The ID is a journalid, the VALUE is a nodeid.
35657 + */
35658 +
35659 +static int id_value(dlm_t *dlm, uint32_t id, uint32_t *val)
35660 +{
35661 +       dlm_lock_t *lp = NULL;
35662 +       struct lm_lockname name;
35663 +       lm_lock_t *lock;
35664 +       char *lvb;
35665 +       uint32_t beval;
35666 +       int error;
35667 +
35668 +       name.ln_type = LM_TYPE_JID;
35669 +       name.ln_number = id;
35670 +
35671 +       error = lm_dlm_get_lock(dlm, &name, &lock);
35672 +       if (error)
35673 +               goto out;
35674 +
35675 +       error = lm_dlm_hold_lvb(lock, &lvb);
35676 +       if (error)
35677 +               goto out_put;
35678 +
35679 +       lp = (dlm_lock_t *) lock;
35680 +       set_bit(LFL_IDLOCK, &lp->flags);
35681 +
35682 +      retry:
35683 +
35684 +       error = lm_dlm_lock_sync(lock, LM_ST_UNLOCKED, LM_ST_SHARED,
35685 +                                LM_FLAG_TRY | LM_FLAG_NOEXP);
35686 +       if (error == -EAGAIN) {
35687 +               current->state = TASK_UNINTERRUPTIBLE;
35688 +               schedule_timeout(HZ);
35689 +               goto retry;
35690 +       }
35691 +       if (error)
35692 +               goto out_unhold;
35693 +
35694 +       memcpy(&beval, lvb, sizeof(beval));
35695 +       *val = be32_to_cpu(beval);
35696 +
35697 +       lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35698 +
35699 +       error = 0;
35700 +
35701 + out_unhold:
35702 +       lm_dlm_unhold_lvb(lock, lvb);
35703 +
35704 + out_put:
35705 +       lm_dlm_put_lock(lock);
35706 +
35707 + out:
35708 +       return error;
35709 +}
35710 +
35711 +/*
35712 + * Find an ID with a given VALUE.  The ID is a journalid, the VALUE is a
35713 + * nodeid.
35714 + */
35715 +
35716 +static int id_find(dlm_t *dlm, uint32_t value, uint32_t *id_out)
35717 +{
35718 +       uint32_t val, id;
35719 +       int error = 0, found = FALSE;
35720 +
35721 +       for (id = 0; id < dlm->max_nodes; id++) {
35722 +               error = id_value(dlm, id, &val);
35723 +               if (error)
35724 +                       break;
35725 +
35726 +               if (val == value) {
35727 +                       *id_out = id;
35728 +                       error = 0;
35729 +                       found = TRUE;
35730 +                       break;
35731 +               }
35732 +       }
35733 +
35734 +       if (!error && !found)
35735 +               error = -ENOENT;
35736 +
35737 +       return error;
35738 +}
35739 +
35740 +/*
35741 + * Get a journalid to use.  The journalid must be owned exclusively as long as
35742 + * this fs is mounted.  Other nodes must be able to discover our nodeid as the
35743 + * owner of the journalid.  The journalid we claim should have the lowest value
35744 + * of all unused journalids.
35745 + */
35746 +
35747 +static int claim_jid(dlm_t *dlm)
35748 +{
35749 +       dlm_node_t *node;
35750 +       uint32_t id;
35751 +       int error = 0;
35752 +
35753 +       DLM_ASSERT(dlm->our_nodeid,);
35754 +
35755 +       /*
35756 +        * Search an arbitrary number (8) past max nodes so we're sure to find
35757 +        * one so we can let the GFS handle the "too big jid" error and fail
35758 +        * the mount.
35759 +        */
35760 +
35761 +       for (id = 0; id < dlm->max_nodes + 8; id++) {
35762 +               error = id_test_and_set(dlm, id, dlm->our_nodeid, &dlm->jid_lock);
35763 +               if (error < 0)
35764 +                       break;
35765 +               if (error > 0)
35766 +                       continue;
35767 +
35768 +               dlm->jid = id;
35769 +               node = find_node_by_nodeid(dlm, dlm->our_nodeid);
35770 +               node->jid = id;
35771 +               set_bit(NFL_HAVE_JID, &node->flags);
35772 +               break;
35773 +       }
35774 +
35775 +       /*
35776 +        * If we have a problem getting a jid, pick a bogus one which should
35777 +        * cause GFS to complain and fail to mount.
35778 +        */
35779 +
35780 +       if (error) {
35781 +               printk("lock_dlm: %s: no journal id available (%d)\n",
35782 +                      dlm->fsname, error);
35783 +               dlm->jid = dlm->max_nodes + dlm->our_nodeid;
35784 +       }
35785 +
35786 +       log_debug("claim_jid %u", dlm->jid);
35787 +       return 0;
35788 +}
35789 +
35790 +/*
35791 + * Release our journalid, allowing it to be used by a node subsequently
35792 + * mounting the fs.
35793 + */
35794 +
35795 +static void release_jid(dlm_t *dlm)
35796 +{
35797 +       id_clear(dlm, dlm->jid_lock);
35798 +       dlm->jid_lock = NULL;
35799 +}
35800 +
35801 +/*
35802 + * For all nodes in the mountgroup, find the journalid being used by each.
35803 + */
35804 +
35805 +static int discover_jids(dlm_t *dlm)
35806 +{
35807 +       dlm_node_t *node;
35808 +       uint32_t id;
35809 +       int error, notfound = 0;
35810 +
35811 +       list_for_each_entry(node, &dlm->mg_nodes, list) {
35812 +               if (test_bit(NFL_HAVE_JID, &node->flags))
35813 +                       continue;
35814 +
35815 +               error = id_find(dlm, node->nodeid, &id);
35816 +               if (error) {
35817 +                       log_debug("jid for node %d not found", node->nodeid);
35818 +                       notfound++;
35819 +                       continue;
35820 +               }
35821 +
35822 +               node->jid = id;
35823 +               set_bit(NFL_HAVE_JID, &node->flags);
35824 +       }
35825 +
35826 +       return notfound;
35827 +}
35828 +
35829 +/*
35830 + * Discover the nodeid that we've been assigned by the cluster manager.
35831 + */
35832 +
35833 +static int get_our_nodeid(dlm_t *dlm)
35834 +{
35835 +       LIST_HEAD(cur_memb);
35836 +       struct kcl_cluster_node *cur_node;
35837 +
35838 +       kcl_get_members(&cur_memb);
35839 +
35840 +       list_for_each_entry(cur_node, &cur_memb, list) {
35841 +               if (cur_node->us) {
35842 +                       dlm->our_nodeid = cur_node->node_id;
35843 +                       break;
35844 +               }
35845 +       }
35846 +
35847 +       while (!list_empty(&cur_memb)) {
35848 +               cur_node = list_entry(cur_memb.next, struct kcl_cluster_node,
35849 +                                     list);
35850 +               list_del(&cur_node->list);
35851 +               kfree(cur_node);
35852 +       }
35853 +
35854 +       return 0;
35855 +}
35856 +
35857 +/*
35858 + * Run in dlm_async thread
35859 + */
35860 +
35861 +void process_start(dlm_t *dlm, dlm_start_t *ds)
35862 +{
35863 +       dlm_node_t *node;
35864 +       uint32_t nodeid;
35865 +       int last_stop, last_start, error, i, new = FALSE, found;
35866 +
35867 +
35868 +       log_debug("start c %d type %d e %d", ds->count, ds->type, ds->event_id);
35869 +
35870 +       /*
35871 +        * gfs won't do journal recoveries once it's sent us an unmount
35872 +        */
35873 +
35874 +       if (test_bit(DFL_UMOUNT, &dlm->flags)) {
35875 +               log_debug("process_start %d skip for umount", ds->event_id);
35876 +               kcl_start_done(dlm->mg_local_id, ds->event_id);
35877 +               goto out;
35878 +       }
35879 +
35880 +       /*
35881 +        * check if first start
35882 +        */
35883 +
35884 +       if (!test_and_set_bit(DFL_GOT_NODEID, &dlm->flags)) {
35885 +               get_our_nodeid(dlm);
35886 +               if (ds->count == 1)
35887 +                       set_bit(DFL_FIRST_MOUNT, &dlm->flags);
35888 +       }
35889 +
35890 +       down(&dlm->mg_nodes_lock);
35891 +
35892 +       /*
35893 +        * find nodes which are gone
35894 +        */
35895 +
35896 +       list_for_each_entry(node, &dlm->mg_nodes, list) {
35897 +               found = FALSE;
35898 +               for (i = 0; i < ds->count; i++) {
35899 +                       if (node->nodeid != ds->nodeids[i])
35900 +                               continue;
35901 +                       found = TRUE;
35902 +                       break;
35903 +               }
35904 +
35905 +               /* node is still a member */
35906 +               if (found)
35907 +                       continue;
35908 +
35909 +               set_bit(NFL_NOT_MEMBER, &node->flags);
35910 +
35911 +               /* no gfs recovery needed for nodes that left cleanly */
35912 +               if (ds->type != SERVICE_NODE_FAILED)
35913 +                       continue;
35914 +
35915 +               /* callbacks sent only for nodes in last completed MG */
35916 +               if (!test_bit(NFL_LAST_FINISH, &node->flags))
35917 +                       continue;
35918 +
35919 +               /* only send a single callback per node */
35920 +               if (test_and_set_bit(NFL_SENT_CB, &node->flags))
35921 +                       continue;
35922 +
35923 +               dlm->fscb(dlm->fsdata, LM_CB_NEED_RECOVERY, &node->jid);
35924 +               set_bit(DFL_NEED_STARTDONE, &dlm->flags);
35925 +               log_debug("cb_need_recovery jid %u", node->jid);
35926 +       }
35927 +
35928 +       /*
35929 +        * add new nodes
35930 +        */
35931 +
35932 +       for (i = 0; i < ds->count; i++) {
35933 +               nodeid = ds->nodeids[i];
35934 +
35935 +               node = find_node_by_nodeid(dlm, nodeid);
35936 +               if (node)
35937 +                       continue;
35938 +
35939 +               DLM_RETRY(node = kmalloc(sizeof(dlm_node_t), GFP_KERNEL), node);
35940 +
35941 +               memset(node, 0, sizeof(dlm_node_t));
35942 +
35943 +               node->nodeid = nodeid;
35944 +               list_add(&node->list, &dlm->mg_nodes);
35945 +               new = TRUE;
35946 +       }
35947 +
35948 +       up(&dlm->mg_nodes_lock);
35949 +
35950 +       /*
35951 +        * get a jid for ourself when started for first time
35952 +        */
35953 +
35954 +       if (!test_and_set_bit(DFL_HAVE_JID, &dlm->flags))
35955 +               claim_jid(dlm);
35956 +       else if (new) {
35957 +               /* give new nodes a little time to claim a jid */
35958 +               current->state = TASK_INTERRUPTIBLE;
35959 +               schedule_timeout(HZ);
35960 +       }
35961 +
35962 +       /*
35963 +        * find jid's of new nodes
35964 +        */
35965 +
35966 +       for (;;) {
35967 +               /* we don't need to do these jid lookups if this start has been
35968 +                  followed by a stop event (and thus cancelled) */
35969 +
35970 +               spin_lock(&dlm->async_lock);
35971 +               last_stop = dlm->mg_last_stop;
35972 +               last_start = dlm->mg_last_start;
35973 +               spin_unlock(&dlm->async_lock);
35974 +
35975 +               if (last_stop >= ds->event_id)
35976 +                       break;
35977 +
35978 +               error = discover_jids(dlm);
35979 +               if (error) {
35980 +                       /* Not all jids were found.  Wait for a time to let all
35981 +                          new nodes claim_jid, then try to scan for jids
35982 +                          again. */
35983 +                       current->state = TASK_INTERRUPTIBLE;
35984 +                       schedule_timeout(HZ);
35985 +                       continue;
35986 +               }
35987 +               break;
35988 +       }
35989 +
35990 +       /*
35991 +        * tell SM we're done if there are no GFS recoveries to wait for
35992 +        */
35993 +
35994 +       if (last_start > last_stop) {
35995 +               error = 0;
35996 +               down(&dlm->mg_nodes_lock);
35997 +
35998 +               list_for_each_entry(node, &dlm->mg_nodes, list) {
35999 +                       if (!test_bit(NFL_SENT_CB, &node->flags))
36000 +                               continue;
36001 +                       error = 1;
36002 +                       break;
36003 +               }
36004 +               up(&dlm->mg_nodes_lock);
36005 +
36006 +               if (!error)
36007 +                       kcl_start_done(dlm->mg_local_id, ds->event_id);
36008 +       }
36009 +
36010 + out:
36011 +       kfree(ds->nodeids);
36012 +       kfree(ds);
36013 +}
36014 +
36015 +void process_finish(dlm_t *dlm)
36016 +{
36017 +       struct list_head *tmp, *tmpsafe;
36018 +       dlm_node_t *node;
36019 +       dlm_lock_t *lp;
36020 +
36021 +       spin_lock(&dlm->async_lock);
36022 +       clear_bit(DFL_BLOCK_LOCKS, &dlm->flags);
36023 +
36024 +       list_for_each_safe(tmp, tmpsafe, &dlm->delayed) {
36025 +               lp = list_entry(tmp, dlm_lock_t, dlist);
36026 +
36027 +               if (lp->type != QUEUE_LOCKS_BLOCKED)
36028 +                       continue;
36029 +
36030 +               lp->type = 0;
36031 +               list_del(&lp->dlist);
36032 +               list_add_tail(&lp->slist, &dlm->submit);
36033 +
36034 +               clear_bit(LFL_DLIST, &lp->flags);
36035 +               set_bit(LFL_SLIST, &lp->flags);
36036 +       }
36037 +       spin_unlock(&dlm->async_lock);
36038 +
36039 +       down(&dlm->mg_nodes_lock);
36040 +
36041 +       list_for_each_safe(tmp, tmpsafe, &dlm->mg_nodes) {
36042 +               node = list_entry(tmp, dlm_node_t, list);
36043 +
36044 +               if (test_bit(NFL_NOT_MEMBER, &node->flags)) {
36045 +                       list_del(&node->list);
36046 +                       kfree(node);
36047 +               } else
36048 +                       set_bit(NFL_LAST_FINISH, &node->flags);
36049 +       }
36050 +       up(&dlm->mg_nodes_lock);
36051 +
36052 +       wake_up(&dlm->wait);
36053 +}
36054 +
36055 +/*
36056 + * Run in user process
36057 + */
36058 +
36059 +int init_mountgroup(dlm_t *dlm)
36060 +{
36061 +       int error;
36062 +       int id;
36063 +
36064 +       error = kcl_register_service(dlm->fsname, dlm->fnlen, SERVICE_LEVEL_GFS,
36065 +                                    &mg_ops, TRUE, (void *) dlm, &id);
36066 +       if (error)
36067 +               goto out;
36068 +
36069 +       dlm->mg_local_id = id;
36070 +
36071 +       /* BLOCK_LOCKS is cleared when the join is finished */
36072 +       set_bit(DFL_BLOCK_LOCKS, &dlm->flags);
36073 +
36074 +       error = kcl_join_service(id);
36075 +       if (error)
36076 +               goto out_unreg;
36077 +
36078 +       if (test_bit(DFL_START_ERROR, &dlm->flags))
36079 +               goto out_leave;
36080 +
36081 +       return 0;
36082 +
36083 + out_leave:
36084 +       kcl_leave_service(dlm->mg_local_id);
36085 +
36086 + out_unreg:
36087 +       kcl_unregister_service(id);
36088 +
36089 + out:
36090 +       printk("lock_dlm: service error %d\n", error);
36091 +       return error;
36092 +}
36093 +
36094 +void release_mountgroup(dlm_t *dlm)
36095 +{
36096 +       int last_start, last_stop;
36097 +
36098 +       /* this flag causes a kcl_start_done() to be sent right away for
36099 +          any start callbacks we get from SM */
36100 +
36101 +       log_debug("umount flags %lx", dlm->flags);
36102 +       set_bit(DFL_UMOUNT, &dlm->flags);
36103 +
36104 +       /* gfs has done a unmount and will not call jid_recovery_done()
36105 +          any longer so make necessary kcl_start_done() calls so
36106 +          kcl_leave_service() will complete */
36107 +
36108 +       spin_lock(&dlm->async_lock);
36109 +       last_start = dlm->mg_last_start;
36110 +       last_stop = dlm->mg_last_stop;
36111 +       spin_unlock(&dlm->async_lock);
36112 +
36113 +       if ((last_start > last_stop) &&
36114 +           test_and_clear_bit(DFL_NEED_STARTDONE, &dlm->flags)) {
36115 +               log_debug("umount doing start_done %d", last_start);
36116 +               kcl_start_done(dlm->mg_local_id, last_start);
36117 +       }
36118 +
36119 +       kcl_leave_service(dlm->mg_local_id);
36120 +       kcl_unregister_service(dlm->mg_local_id);
36121 +       release_jid(dlm);
36122 +}
36123 +
36124 +/*
36125 + * Run in GFS thread
36126 + */
36127 +
36128 +void jid_recovery_done(dlm_t *dlm, unsigned int jid, unsigned int message)
36129 +{
36130 +       dlm_node_t *node;
36131 +       int last_start, last_stop;
36132 +       int remain = 0;
36133 +
36134 +       log_debug("recovery_done jid %u msg %u", jid, message);
36135 +
36136 +       node = find_node_by_jid(dlm, jid);
36137 +       if (!node)
36138 +               goto out;
36139 +
36140 +       log_debug("recovery_done %u,%u f %lx", jid, node->nodeid, node->flags);
36141 +
36142 +       if (!test_bit(NFL_SENT_CB, &node->flags))
36143 +               goto out;
36144 +
36145 +       if (!test_bit(NFL_NOT_MEMBER, &node->flags))
36146 +               goto out;
36147 +
36148 +       set_bit(NFL_RECOVERY_DONE, &node->flags);
36149 +
36150 +       /*
36151 +        * when recovery is done for all nodes, we're done with the start
36152 +        */
36153 +
36154 +       down(&dlm->mg_nodes_lock);
36155 +
36156 +       list_for_each_entry(node, &dlm->mg_nodes, list) {
36157 +               if (test_bit(NFL_SENT_CB, &node->flags) &&
36158 +                   !test_bit(NFL_RECOVERY_DONE, &node->flags))
36159 +                       remain++;
36160 +       }
36161 +       up(&dlm->mg_nodes_lock);
36162 +
36163 +       if (!remain) {
36164 +               /* don't send a start_done if there's since been a stop which
36165 +                * cancels this start */
36166 +
36167 +               spin_lock(&dlm->async_lock);
36168 +               last_start = dlm->mg_last_start;
36169 +               last_stop = dlm->mg_last_stop;
36170 +               spin_unlock(&dlm->async_lock);
36171 +
36172 +               if (last_start > last_stop) {
36173 +                       log_debug("recovery_done start_done %d", last_start);
36174 +                       kcl_start_done(dlm->mg_local_id, last_start);
36175 +                       clear_bit(DFL_NEED_STARTDONE, &dlm->flags);
36176 +               }
36177 +       }
36178 +
36179 + out:
36180 +       return;
36181 +}
36182 +
36183 +/*
36184 + * Run in CMAN SM thread
36185 + */
36186 +
36187 +static void queue_start(dlm_t *dlm, uint32_t *nodeids, int count,
36188 +                       int event_id, int type)
36189 +{
36190 +       dlm_start_t *ds;
36191 +
36192 +       DLM_RETRY(ds = kmalloc(sizeof(dlm_start_t), GFP_KERNEL), ds);
36193 +
36194 +       memset(ds, 0, sizeof(dlm_start_t));
36195 +
36196 +       ds->nodeids = nodeids;
36197 +       ds->count = count;
36198 +       ds->event_id = event_id;
36199 +       ds->type = type;
36200 +
36201 +       spin_lock(&dlm->async_lock);
36202 +       dlm->mg_last_start = event_id;
36203 +       list_add_tail(&ds->list, &dlm->starts);
36204 +       spin_unlock(&dlm->async_lock);
36205 +
36206 +       wake_up(&dlm->wait);
36207 +}
36208 +
36209 +static int mg_stop(void *data)
36210 +{
36211 +       dlm_t *dlm = (dlm_t *) data;
36212 +
36213 +       spin_lock(&dlm->async_lock);
36214 +       set_bit(DFL_BLOCK_LOCKS, &dlm->flags);
36215 +       dlm->mg_last_stop = dlm->mg_last_start;
36216 +       spin_unlock(&dlm->async_lock);
36217 +
36218 +       return 0;
36219 +}
36220 +
36221 +static int mg_start(void *data, uint32_t *nodeids, int count, int event_id,
36222 +                   int type)
36223 +{
36224 +       dlm_t *dlm = (dlm_t *) data;
36225 +
36226 +       queue_start(dlm, nodeids, count, event_id, type);
36227 +
36228 +       return 0;
36229 +}
36230 +
36231 +static void mg_finish(void *data, int event_id)
36232 +{
36233 +       dlm_t *dlm = (dlm_t *) data;
36234 +
36235 +       spin_lock(&dlm->async_lock);
36236 +       dlm->mg_last_finish = event_id;
36237 +       set_bit(DFL_MG_FINISH, &dlm->flags);
36238 +       spin_unlock(&dlm->async_lock);
36239 +
36240 +       wake_up(&dlm->wait);
36241 +}
36242 +
36243 +struct kcl_service_ops mg_ops = {
36244 +       .stop = mg_stop,
36245 +       .start = mg_start,
36246 +       .finish = mg_finish
36247 +};
36248 diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock.c linux-patched/fs/gfs_locking/lock_dlm/lock.c
36249 --- linux-orig/fs/gfs_locking/lock_dlm/lock.c   1969-12-31 18:00:00.000000000 -0600
36250 +++ linux-patched/fs/gfs_locking/lock_dlm/lock.c        2004-06-16 12:03:17.967822065 -0500
36251 @@ -0,0 +1,561 @@
36252 +/******************************************************************************
36253 +*******************************************************************************
36254 +**
36255 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
36256 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
36257 +**
36258 +**  This copyrighted material is made available to anyone wishing to use,
36259 +**  modify, copy, or redistribute it subject to the terms and conditions
36260 +**  of the GNU General Public License v.2.
36261 +**
36262 +*******************************************************************************
36263 +******************************************************************************/
36264 +
36265 +#include "lock_dlm.h"
36266 +
36267 +/*
36268 + * Run in DLM thread
36269 + */
36270 +
36271 +static void queue_complete(dlm_lock_t *lp)
36272 +{
36273 +       dlm_t *dlm = lp->dlm;
36274 +
36275 +       clear_bit(LFL_WAIT_COMPLETE, &lp->flags);
36276 +
36277 +       spin_lock(&dlm->async_lock);
36278 +       list_add_tail(&lp->clist, &dlm->complete);
36279 +       set_bit(LFL_CLIST, &lp->flags);
36280 +       spin_unlock(&dlm->async_lock);
36281 +       wake_up(&dlm->wait);
36282 +}
36283 +
36284 +static void queue_blocking(dlm_lock_t *lp, int mode)
36285 +{
36286 +       dlm_t *dlm = lp->dlm;
36287 +
36288 +       if (test_bit(LFL_WAIT_COMPLETE, &lp->flags)) {
36289 +               /* We often receive basts for EX while we're promoting
36290 +                  from SH to EX. */
36291 +               /* printk("lock_dlm: bast before complete %x,%"PRIx64" "
36292 +                      "gr=%d rq=%d bast=%d\n", lp->lockname.ln_type,
36293 +                      lp->lockname.ln_number, lp->cur, lp->req, mode); */
36294 +               return;
36295 +       }
36296 +
36297 +       spin_lock(&dlm->async_lock);
36298 +
36299 +       if (!lp->bast_mode) {
36300 +               list_add_tail(&lp->blist, &dlm->blocking);
36301 +               set_bit(LFL_BLIST, &lp->flags);
36302 +               lp->bast_mode = mode;
36303 +       } else if (lp->bast_mode < mode)
36304 +               lp->bast_mode = mode;
36305 +
36306 +       spin_unlock(&dlm->async_lock);
36307 +       wake_up(&dlm->wait);
36308 +}
36309 +
36310 +static __inline__ void lock_ast(void *astargs)
36311 +{
36312 +       dlm_lock_t *lp = (dlm_lock_t *) astargs;
36313 +       queue_complete(lp);
36314 +}
36315 +
36316 +static __inline__ void lock_bast(void *astargs, int mode)
36317 +{
36318 +       dlm_lock_t *lp = (dlm_lock_t *) astargs;
36319 +       queue_blocking(lp, mode);
36320 +}
36321 +
36322 +/*
36323 + * Run in GFS or user thread
36324 + */
36325 +
36326 +/**
36327 + * queue_delayed - add request to queue to be submitted later
36328 + * @lp: DLM lock
36329 + * @type: the reason the lock is blocked
36330 + *
36331 + * Queue of locks which need submitting sometime later.  Locks here
36332 + * due to BLOCKED_LOCKS are moved to request queue when recovery is
36333 + * done.  Locks here due to an ERROR are moved to request queue after
36334 + * some delay.  This could also be called from dlm_async thread.
36335 + */
36336 +
36337 +void queue_delayed(dlm_lock_t *lp, int type)
36338 +{
36339 +       dlm_t *dlm = lp->dlm;
36340 +
36341 +       lp->type = type;
36342 +
36343 +       spin_lock(&dlm->async_lock);
36344 +       list_add_tail(&lp->dlist, &dlm->delayed);
36345 +       set_bit(LFL_DLIST, &lp->flags);
36346 +       spin_unlock(&dlm->async_lock);
36347 +}
36348 +
36349 +/**
36350 + * make_mode - convert to DLM_LOCK_
36351 + * @lmstate: GFS lock state
36352 + *
36353 + * Returns: DLM lock mode
36354 + */
36355 +
36356 +static int16_t make_mode(int16_t lmstate)
36357 +{
36358 +       switch (lmstate) {
36359 +       case LM_ST_UNLOCKED:
36360 +               return DLM_LOCK_NL;
36361 +       case LM_ST_EXCLUSIVE:
36362 +               return DLM_LOCK_EX;
36363 +       case LM_ST_DEFERRED:
36364 +               return DLM_LOCK_CW;
36365 +       case LM_ST_SHARED:
36366 +               return DLM_LOCK_PR;
36367 +       default:
36368 +               DLM_ASSERT(0, printk("unknown LM state %d\n", lmstate););
36369 +       }
36370 +}
36371 +
36372 +/**
36373 + * make_lmstate - convert to LM_ST_
36374 + * @dlmmode: DLM lock mode
36375 + *
36376 + * Returns: GFS lock state
36377 + */
36378 +
36379 +int16_t make_lmstate(int16_t dlmmode)
36380 +{
36381 +       switch (dlmmode) {
36382 +       case DLM_LOCK_IV:
36383 +       case DLM_LOCK_NL:
36384 +               return LM_ST_UNLOCKED;
36385 +       case DLM_LOCK_EX:
36386 +               return LM_ST_EXCLUSIVE;
36387 +       case DLM_LOCK_CW:
36388 +               return LM_ST_DEFERRED;
36389 +       case DLM_LOCK_PR:
36390 +               return LM_ST_SHARED;
36391 +       default:
36392 +               DLM_ASSERT(0, printk("unknown DLM mode %d\n", dlmmode););
36393 +       }
36394 +}
36395 +
36396 +/**
36397 + * check_cur_state - verify agreement with GFS on the current lock state
36398 + * @lp: the DLM lock
36399 + * @cur_state: the current lock state from GFS
36400 + *
36401 + * NB: DLM_LOCK_NL and DLM_LOCK_IV are both considered
36402 + * LM_ST_UNLOCKED by GFS.
36403 + *
36404 + */
36405 +
36406 +static void check_cur_state(dlm_lock_t *lp, unsigned int cur_state)
36407 +{
36408 +       int16_t cur = make_mode(cur_state);
36409 +       if (lp->cur != DLM_LOCK_IV)
36410 +               DLM_ASSERT(lp->cur == cur, printk("%d, %d\n", lp->cur, cur););
36411 +}
36412 +
36413 +/**
36414 + * make_flags - put together necessary DLM flags
36415 + * @lp: DLM lock
36416 + * @gfs_flags: GFS flags
36417 + * @cur: current DLM lock mode
36418 + * @req: requested DLM lock mode
36419 + *
36420 + * Returns: DLM flags
36421 + */
36422 +
36423 +static unsigned int make_flags(dlm_lock_t *lp, unsigned int gfs_flags,
36424 +                              int16_t cur, int16_t req)
36425 +{
36426 +       unsigned int lkf = 0;
36427 +
36428 +       if (gfs_flags & LM_FLAG_TRY)
36429 +               lkf |= DLM_LKF_NOQUEUE;
36430 +
36431 +       if (gfs_flags & LM_FLAG_TRY_1CB) {
36432 +               lkf |= DLM_LKF_NOQUEUE;
36433 +               lkf |= DLM_LKF_NOQUEUEBAST;
36434 +       }
36435 +
36436 +       if (lp->lksb.sb_lkid != 0) {
36437 +               lkf |= DLM_LKF_CONVERT;
36438 +
36439 +               if (gfs_flags & LM_FLAG_PRIORITY)
36440 +                       lkf |= DLM_LKF_EXPEDITE;
36441 +               else if (req > cur)
36442 +                       lkf |= DLM_LKF_QUECVT;
36443 +
36444 +               /* Conversion deadlock avoidance by DLM */
36445 +
36446 +               if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
36447 +                   cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
36448 +                       lkf |= DLM_LKF_CONVDEADLK;
36449 +       }
36450 +
36451 +       if (lp->lvb)
36452 +               lkf |= DLM_LKF_VALBLK;
36453 +
36454 +       return lkf;
36455 +}
36456 +
36457 +/**
36458 + * make_strname - convert GFS lock numbers to string
36459 + * @lockname: the lock type/number
36460 + * @str: the lock string/length
36461 + *
36462 + */
36463 +
36464 +static __inline__ void make_strname(struct lm_lockname *lockname,
36465 +                                   strname_t *str)
36466 +{
36467 +       sprintf(str->name, "%8x%16"PRIx64, lockname->ln_type,
36468 +               lockname->ln_number);
36469 +       str->namelen = LOCK_DLM_STRNAME_BYTES;
36470 +}
36471 +
36472 +int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp)
36473 +{
36474 +       dlm_lock_t *lp;
36475 +
36476 +       lp = kmalloc(sizeof(dlm_lock_t), GFP_KERNEL);
36477 +       if (!lp)
36478 +               return -ENOMEM;
36479 +
36480 +       memset(lp, 0, sizeof(dlm_lock_t));
36481 +       lp->lockname = *name;
36482 +       lp->dlm = dlm;
36483 +       lp->cur = DLM_LOCK_IV;
36484 +       init_completion(&lp->uast_wait);
36485 +       *lpp = lp;
36486 +       return 0;
36487 +}
36488 +
36489 +/**
36490 + * dlm_get_lock - get a lm_lock_t given a descripton of the lock
36491 + * @lockspace: the lockspace the lock lives in
36492 + * @name: the name of the lock
36493 + * @lockp: return the lm_lock_t here
36494 + *
36495 + * Returns: 0 on success, -EXXX on failure
36496 + */
36497 +
36498 +int lm_dlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
36499 +                   lm_lock_t **lockp)
36500 +{
36501 +       dlm_lock_t *lp;
36502 +       int error;
36503 +
36504 +       error = create_lp((dlm_t *) lockspace, name, &lp);
36505 +
36506 +       *lockp = (lm_lock_t *) lp;
36507 +       return error;
36508 +}
36509 +
36510 +int do_unlock(dlm_lock_t *lp)
36511 +{
36512 +       int error;
36513 +
36514 +       init_completion(&lp->uast_wait);
36515 +
36516 +       set_bit(LFL_DLM_UNLOCK, &lp->flags);
36517 +
36518 +       error = dlm_unlock(lp->dlm->gdlm_lsp, lp->lksb.sb_lkid, 0, &lp->lksb,
36519 +                           (void *) lp);
36520 +
36521 +       DLM_ASSERT(!error, printk("%s: error=%d num=%x,%"PRIx64"\n",
36522 +                             lp->dlm->fsname, error, lp->lockname.ln_type,
36523 +                             lp->lockname.ln_number););
36524 +
36525 +       wait_for_completion(&lp->uast_wait);
36526 +
36527 +       spin_lock(&lp->dlm->async_lock);
36528 +       if (test_bit(LFL_CLIST, &lp->flags)) {
36529 +               printk("lock_dlm: dlm_put_lock lp on clist num=%x,%"PRIx64"\n",                lp->lockname.ln_type, lp->lockname.ln_number);
36530 +               list_del(&lp->clist);
36531 +       }
36532 +       if (test_bit(LFL_BLIST, &lp->flags)) {
36533 +               printk("lock_dlm: dlm_put_lock lp on blist num=%x,%"PRIx64"\n",
36534 +                       lp->lockname.ln_type, lp->lockname.ln_number);
36535 +               list_del(&lp->blist);
36536 +       }
36537 +       if (test_bit(LFL_DLIST, &lp->flags)) {
36538 +               printk("lock_dlm: dlm_put_lock lp on dlist num=%x,%"PRIx64"\n",
36539 +                      lp->lockname.ln_type, lp->lockname.ln_number);
36540 +               list_del(&lp->dlist);
36541 +       }
36542 +       if (test_bit(LFL_SLIST, &lp->flags)) {
36543 +               printk("lock_dlm: dlm_put_lock lp on slist num=%x,%"PRIx64"\n",
36544 +                      lp->lockname.ln_type, lp->lockname.ln_number);
36545 +               list_del(&lp->slist);
36546 +       }
36547 +       spin_unlock(&lp->dlm->async_lock);
36548 +
36549 +       return 0;
36550 +}
36551 +
36552 +/**
36553 + * dlm_put_lock - get rid of a lock structure
36554 + * @lock: the lock to throw away
36555 + *
36556 + */
36557 +
36558 +void lm_dlm_put_lock(lm_lock_t *lock)
36559 +{
36560 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36561 +
36562 +       if (lp->cur != DLM_LOCK_IV) {
36563 +               do_unlock(lp);
36564 +               kfree(lp);
36565 +       }
36566 +}
36567 +
36568 +/**
36569 + * do_lock - acquire a lock
36570 + * @lp: the DLM lock
36571 + * @range: optional range
36572 + */
36573 +
36574 +void do_lock(dlm_lock_t *lp, struct dlm_range *range)
36575 +{
36576 +       dlm_t *dlm = lp->dlm;
36577 +       strname_t str;
36578 +       int error;
36579 +
36580 +       /*
36581 +        * When recovery is in progress, delay lock requests for submission
36582 +        * once recovery is done.  Requests for recovery (NOEXP) and unlocks
36583 +        * can pass.
36584 +        */
36585 +
36586 +       if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) &&
36587 +           !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
36588 +               queue_delayed(lp, QUEUE_LOCKS_BLOCKED);
36589 +               return;
36590 +       }
36591 +
36592 +       /*
36593 +        * Submit the actual lock request.
36594 +        */
36595 +
36596 +       make_strname(&lp->lockname, &str);
36597 +
36598 +       set_bit(LFL_WAIT_COMPLETE, &lp->flags);
36599 +
36600 +       error = dlm_lock(dlm->gdlm_lsp, lp->req, &lp->lksb, lp->lkf, str.name,
36601 +                         str.namelen, 0, lock_ast, (void *) lp,
36602 +                         lp->posix ? NULL : lock_bast, range);
36603 +
36604 +       if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
36605 +               lp->lksb.sb_status = -EAGAIN;
36606 +               queue_complete(lp);
36607 +               error = 0;
36608 +       }
36609 +
36610 +       DLM_ASSERT(!error,
36611 +                  printk("%s: num=%x,%"PRIx64" err=%d cur=%d req=%d lkf=%x\n",
36612 +                         dlm->fsname, lp->lockname.ln_type,
36613 +                         lp->lockname.ln_number, error, lp->cur, lp->req,
36614 +                         lp->lkf););
36615 +}
36616 +
36617 +/**
36618 + * lm_dlm_lock - acquire a lock
36619 + * @lock: the lock to manipulate
36620 + * @cur_state: the current state
36621 + * @req_state: the requested state
36622 + * @flags: modifier flags
36623 + *
36624 + * Returns: A bitmap of LM_OUT_* on success, -EXXX on failure
36625 + */
36626 +
36627 +unsigned int lm_dlm_lock(lm_lock_t *lock, unsigned int cur_state,
36628 +                        unsigned int req_state, unsigned int flags)
36629 +{
36630 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36631 +
36632 +       if (flags & LM_FLAG_NOEXP)
36633 +               set_bit(LFL_NOBLOCK, &lp->flags);
36634 +
36635 +       check_cur_state(lp, cur_state);
36636 +       lp->req = make_mode(req_state);
36637 +       lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
36638 +
36639 +       do_lock(lp, NULL);
36640 +       return LM_OUT_ASYNC;
36641 +}
36642 +
36643 +int lm_dlm_lock_sync(lm_lock_t *lock, unsigned int cur_state,
36644 +                    unsigned int req_state, unsigned int flags)
36645 +{
36646 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36647 +
36648 +       init_completion(&lp->uast_wait);
36649 +       lm_dlm_lock(lock, cur_state, req_state, flags);
36650 +       wait_for_completion(&lp->uast_wait);
36651 +
36652 +       return lp->lksb.sb_status;
36653 +}
36654 +
36655 +/**
36656 + * lm_dlm_unlock - unlock a lock
36657 + * @lock: the lock to manipulate
36658 + * @cur_state: the current state
36659 + *
36660 + * Returns: 0 on success, -EXXX on failure
36661 + */
36662 +
36663 +unsigned int lm_dlm_unlock(lm_lock_t *lock, unsigned int cur_state)
36664 +{
36665 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36666 +
36667 +       check_cur_state(lp, cur_state);
36668 +       lp->req = DLM_LOCK_NL;
36669 +       lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
36670 +
36671 +       do_lock(lp, NULL);
36672 +
36673 +       return LM_OUT_ASYNC;
36674 +}
36675 +
36676 +void lm_dlm_unlock_sync(lm_lock_t *lock, unsigned int cur_state)
36677 +{
36678 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36679 +
36680 +       init_completion(&lp->uast_wait);
36681 +       lm_dlm_unlock(lock, cur_state);
36682 +       wait_for_completion(&lp->uast_wait);
36683 +}
36684 +
36685 +/**
36686 + * dlm_cancel - cancel a request that is blocked due to DFL_BLOCK_LOCKS
36687 + * @lock: the lock to cancel request for
36688 + *
36689 + */
36690 +
36691 +void lm_dlm_cancel(lm_lock_t *lock)
36692 +{
36693 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36694 +       int dlist = FALSE;
36695 +
36696 +       printk("lock_dlm: cancel num=%x,%"PRIx64"\n",
36697 +              lp->lockname.ln_type, lp->lockname.ln_number);
36698 +
36699 +       spin_lock(&lp->dlm->async_lock);
36700 +       if (test_and_clear_bit(LFL_DLIST, &lp->flags)) {
36701 +               list_del(&lp->dlist);
36702 +               lp->type = 0;
36703 +               dlist = TRUE;
36704 +       }
36705 +       spin_unlock(&lp->dlm->async_lock);
36706 +
36707 +       if (dlist) {
36708 +               set_bit(LFL_CANCEL, &lp->flags);
36709 +               queue_complete(lp);
36710 +       }
36711 +}
36712 +
36713 +/**
36714 + * dlm_hold_lvb - hold on to a lock value block
36715 + * @lock: the lock the LVB is associated with
36716 + * @lvbp: return the lvb memory here
36717 + *
36718 + * Returns: 0 on success, -EXXX on failure
36719 + */
36720 +
36721 +int lm_dlm_hold_lvb(lm_lock_t *lock, char **lvbp)
36722 +{
36723 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36724 +       char *lvb;
36725 +
36726 +       lvb = kmalloc(DLM_LVB_SIZE, GFP_KERNEL);
36727 +       if (!lvb)
36728 +               return -ENOMEM;
36729 +
36730 +       memset(lvb, 0, DLM_LVB_SIZE);
36731 +
36732 +       lp->lksb.sb_lvbptr = lvb;
36733 +       lp->lvb = lvb;
36734 +       *lvbp = lvb;
36735 +
36736 +       return 0;
36737 +}
36738 +
36739 +/**
36740 + * dlm_unhold_lvb - release a LVB
36741 + * @lock: the lock the LVB is associated with
36742 + * @lvb: the lock value block
36743 + *
36744 + */
36745 +
36746 +void lm_dlm_unhold_lvb(lm_lock_t *lock, char *lvb)
36747 +{
36748 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36749 +       kfree(lvb);
36750 +       lp->lvb = NULL;
36751 +       lp->lksb.sb_lvbptr = NULL;
36752 +}
36753 +
36754 +/**
36755 + * dlm_sync_lvb - sync out the value of a lvb
36756 + * @lock: the lock the LVB is associated with
36757 + * @lvb: the lock value block
36758 + *
36759 + */
36760 +
36761 +void lm_dlm_sync_lvb(lm_lock_t *lock, char *lvb)
36762 +{
36763 +       dlm_lock_t *lp = (dlm_lock_t *) lock;
36764 +
36765 +       if (lp->cur != DLM_LOCK_EX)
36766 +               return;
36767 +
36768 +       init_completion(&lp->uast_wait);
36769 +       set_bit(LFL_SYNC_LVB, &lp->flags);
36770 +
36771 +       lp->req = DLM_LOCK_EX;
36772 +       lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
36773 +
36774 +       do_lock(lp, NULL);
36775 +       wait_for_completion(&lp->uast_wait);
36776 +}
36777 +
36778 +/**
36779 + * dlm_recovery_done - reset the expired locks for a given jid
36780 + * @lockspace: the lockspace
36781 + * @jid: the jid
36782 + *
36783 + */
36784 +
36785 +void lm_dlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
36786 +                         unsigned int message)
36787 +{
36788 +       jid_recovery_done((dlm_t *) lockspace, jid, message);
36789 +}
36790 +
36791 +/*
36792 + * Run in dlm_async
36793 + */
36794 +
36795 +/**
36796 + * process_submit - make DLM lock requests from dlm_async thread
36797 + * @lp: DLM Lock
36798 + *
36799 + */
36800 +
36801 +void process_submit(dlm_lock_t *lp)
36802 +{
36803 +       struct dlm_range range, *r = NULL;
36804 +
36805 +       if (lp->posix) {
36806 +               range.ra_start = lp->posix->start;
36807 +               range.ra_end = lp->posix->end;
36808 +               r = &range;
36809 +       }
36810 +
36811 +       do_lock(lp, r);
36812 +}
36813 diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h
36814 --- linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h       1969-12-31 18:00:00.000000000 -0600
36815 +++ linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h    2004-06-16 12:03:17.967822065 -0500
36816 @@ -0,0 +1,323 @@
36817 +/******************************************************************************
36818 +*******************************************************************************
36819 +**
36820 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
36821 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
36822 +**
36823 +**  This copyrighted material is made available to anyone wishing to use,
36824 +**  modify, copy, or redistribute it subject to the terms and conditions
36825 +**  of the GNU General Public License v.2.
36826 +**
36827 +*******************************************************************************
36828 +******************************************************************************/
36829 +
36830 +#ifndef LOCK_DLM_DOT_H
36831 +#define LOCK_DLM_DOT_H
36832 +
36833 +#include <linux/module.h>
36834 +#include <linux/slab.h>
36835 +#include <linux/spinlock.h>
36836 +#include <linux/module.h>
36837 +#include <linux/types.h>
36838 +#include <linux/string.h>
36839 +#include <linux/list.h>
36840 +#include <linux/lm_interface.h>
36841 +#include <cluster/dlm.h>
36842 +
36843 +/* We take a shortcut and use lm_lockname structs for internal locks.  This
36844 +   means we must be careful to keep these types different from those used in
36845 +   lm_interface.h. */
36846 +
36847 +#define LM_TYPE_JID            (0x10)
36848 +#define LM_TYPE_PLOCK_UPDATE   (0x11)
36849 +
36850 +#define DLM_LVB_SIZE           (DLM_LVB_LEN)
36851 +
36852 +/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
36853 +   We sprintf these numbers into a 24 byte string of hex values to make them
36854 +   human-readable (to make debugging simpler.) */
36855 +
36856 +#define LOCK_DLM_STRNAME_BYTES (24)
36857 +
36858 +#define LOCK_DLM_MAX_NODES     (128)
36859 +
36860 +struct dlm;
36861 +struct dlm_lock;
36862 +struct dlm_node;
36863 +struct dlm_start;
36864 +struct strname;
36865 +
36866 +typedef struct dlm dlm_t;
36867 +typedef struct dlm_lock dlm_lock_t;
36868 +typedef struct dlm_node dlm_node_t;
36869 +typedef struct dlm_start dlm_start_t;
36870 +typedef struct strname strname_t;
36871 +
36872 +#define DFL_FIRST_MOUNT         0
36873 +#define DFL_THREAD_STOP         1
36874 +#define DFL_GOT_NODEID          2
36875 +#define DFL_MG_FINISH           3
36876 +#define DFL_HAVE_JID            4
36877 +#define DFL_BLOCK_LOCKS         5
36878 +#define DFL_START_ERROR         6
36879 +#define DFL_UMOUNT             7
36880 +#define DFL_NEED_STARTDONE     8
36881 +
36882 +struct dlm {
36883 +       uint32_t                jid;
36884 +       uint32_t                our_nodeid;
36885 +       unsigned long           flags;
36886 +
36887 +       int                     cnlen;
36888 +       char *                  clustername;
36889 +       int                     fnlen;
36890 +       char *                  fsname;
36891 +       int                     max_nodes;
36892 +
36893 +       dlm_lockspace_t *       gdlm_lsp;
36894 +
36895 +       lm_callback_t           fscb;
36896 +       lm_fsdata_t *           fsdata;
36897 +       dlm_lock_t *            jid_lock;
36898 +
36899 +       spinlock_t              async_lock;
36900 +       struct list_head        complete;
36901 +       struct list_head        blocking;
36902 +       struct list_head        delayed;
36903 +       struct list_head        submit;
36904 +       struct list_head        starts;
36905 +
36906 +       wait_queue_head_t       wait;
36907 +       atomic_t                threads;
36908 +
36909 +       int                     mg_local_id;
36910 +       int                     mg_last_start;
36911 +       int                     mg_last_stop;
36912 +       int                     mg_last_finish;
36913 +       struct list_head        mg_nodes;
36914 +       struct semaphore        mg_nodes_lock;
36915 +
36916 +       struct list_head        resources;
36917 +       struct semaphore        res_lock;
36918 +};
36919 +
36920 +struct dlm_resource {
36921 +       dlm_t *                 dlm;
36922 +       struct list_head        list;           /* list of resources */
36923 +       struct lm_lockname      name;           /* the resource name */
36924 +       struct semaphore        sema;
36925 +       struct list_head        locks;          /* one lock for each range */
36926 +       int                     count;
36927 +       dlm_lock_t *            update;
36928 +       struct list_head        async_locks;
36929 +       spinlock_t              async_spin;
36930 +};
36931 +
36932 +struct posix_lock {
36933 +       struct list_head        list;           /* resource locks list */
36934 +       struct list_head        async_list;     /* resource async_locks list */
36935 +       struct dlm_resource *   resource;
36936 +       dlm_lock_t *            lp;
36937 +       unsigned long           owner;
36938 +       uint64_t                start;
36939 +       uint64_t                end;
36940 +       int                     count;
36941 +       int                     ex;
36942 +};
36943 +
36944 +#define LFL_NOBLOCK             0
36945 +#define LFL_NOCACHE             1
36946 +#define LFL_UNLOCK_RECOVERY     2
36947 +#define LFL_DLM_UNLOCK          3
36948 +#define LFL_TRYFAILED           4
36949 +#define LFL_SYNC_LVB            5
36950 +#define LFL_FORCE_PROMOTE       6
36951 +#define LFL_REREQUEST           7
36952 +#define LFL_WAIT_COMPLETE       8
36953 +#define LFL_CLIST               9
36954 +#define LFL_BLIST               10
36955 +#define LFL_DLIST               11
36956 +#define LFL_SLIST               12
36957 +#define LFL_IDLOCK              13
36958 +#define LFL_CANCEL              14
36959 +
36960 +struct dlm_lock {
36961 +       dlm_t *                 dlm;
36962 +       struct lm_lockname      lockname;
36963 +       char *                  lvb;
36964 +       struct dlm_lksb         lksb;
36965 +
36966 +       int16_t                 cur;
36967 +       int16_t                 req;
36968 +       int16_t                 prev_req;
36969 +       unsigned int            lkf;
36970 +       unsigned int            type;
36971 +       unsigned long           flags;
36972 +
36973 +       int                     bast_mode;      /* protected by async_lock */
36974 +       struct completion       uast_wait;
36975 +
36976 +       struct list_head        clist;          /* complete */
36977 +       struct list_head        blist;          /* blocking */
36978 +       struct list_head        dlist;          /* delayed */
36979 +       struct list_head        slist;          /* submit */
36980 +
36981 +       struct posix_lock *     posix;
36982 +};
36983 +
36984 +#define NFL_SENT_CB             0
36985 +#define NFL_NOT_MEMBER          1
36986 +#define NFL_RECOVERY_DONE       2
36987 +#define NFL_LAST_FINISH         3
36988 +#define NFL_HAVE_JID            4
36989 +
36990 +struct dlm_node {
36991 +       uint32_t                nodeid;
36992 +       uint32_t                jid;
36993 +       unsigned long           flags;
36994 +       struct list_head        list;
36995 +};
36996 +
36997 +#define QUEUE_LOCKS_BLOCKED     1
36998 +#define QUEUE_ERROR_UNLOCK      2
36999 +#define QUEUE_ERROR_LOCK        3
37000 +#define QUEUE_ERROR_RETRY       4
37001 +
37002 +struct strname {
37003 +       unsigned char           name[LOCK_DLM_STRNAME_BYTES];
37004 +       unsigned short          namelen;
37005 +};
37006 +
37007 +struct dlm_start {
37008 +       uint32_t *              nodeids;
37009 +       int                     count;
37010 +       int                     type;
37011 +       int                     event_id;
37012 +       struct list_head        list;
37013 +};
37014 +
37015 +#ifndef TRUE
37016 +#define TRUE (1)
37017 +#endif
37018 +
37019 +#ifndef FALSE
37020 +#define FALSE (0)
37021 +#endif
37022 +
37023 +#if (BITS_PER_LONG == 64)
37024 +#define PRIu64 "lu"
37025 +#define PRId64 "ld"
37026 +#define PRIo64 "lo"
37027 +#define PRIx64 "lx"
37028 +#define PRIX64 "lX"
37029 +#define SCNu64 "lu"
37030 +#define SCNd64 "ld"
37031 +#define SCNo64 "lo"
37032 +#define SCNx64 "lx"
37033 +#define SCNX64 "lX"
37034 +#else
37035 +#define PRIu64 "Lu"
37036 +#define PRId64 "Ld"
37037 +#define PRIo64 "Lo"
37038 +#define PRIx64 "Lx"
37039 +#define PRIX64 "LX"
37040 +#define SCNu64 "Lu"
37041 +#define SCNd64 "Ld"
37042 +#define SCNo64 "Lo"
37043 +#define SCNx64 "Lx"
37044 +#define SCNX64 "LX"
37045 +#endif
37046 +
37047 +extern struct lm_lockops lock_dlm_ops;
37048 +
37049 +/* group.c */
37050 +
37051 +int init_mountgroup(dlm_t * dlm);
37052 +void release_mountgroup(dlm_t * dlm);
37053 +void process_start(dlm_t * dlm, dlm_start_t * ds);
37054 +void process_finish(dlm_t * dlm);
37055 +void jid_recovery_done(dlm_t * dlm, unsigned int jid, unsigned int message);
37056 +
37057 +/* thread.c */
37058 +
37059 +int init_async_thread(dlm_t * dlm);
37060 +void release_async_thread(dlm_t * dlm);
37061 +
37062 +/* lock.c */
37063 +
37064 +int16_t make_lmstate(int16_t dlmmode);
37065 +void queue_delayed(dlm_lock_t * lp, int type);
37066 +void process_submit(dlm_lock_t * lp);
37067 +int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp);
37068 +void do_lock(dlm_lock_t *lp, struct dlm_range *range);
37069 +int do_unlock(dlm_lock_t *lp);
37070 +
37071 +int lm_dlm_get_lock(lm_lockspace_t * lockspace, struct lm_lockname * name,
37072 +                lm_lock_t ** lockp);
37073 +void lm_dlm_put_lock(lm_lock_t * lock);
37074 +unsigned int lm_dlm_lock(lm_lock_t * lock, unsigned int cur_state,
37075 +                     unsigned int req_state, unsigned int flags);
37076 +int lm_dlm_lock_sync(lm_lock_t * lock, unsigned int cur_state,
37077 +                 unsigned int req_state, unsigned int flags);
37078 +unsigned int lm_dlm_unlock(lm_lock_t * lock, unsigned int cur_state);
37079 +void lm_dlm_unlock_sync(lm_lock_t * lock, unsigned int cur_state);
37080 +void lm_dlm_cancel(lm_lock_t * lock);
37081 +int lm_dlm_hold_lvb(lm_lock_t * lock, char **lvbp);
37082 +void lm_dlm_unhold_lvb(lm_lock_t * lock, char *lvb);
37083 +void lm_dlm_sync_lvb(lm_lock_t * lock, char *lvb);
37084 +void lm_dlm_recovery_done(lm_lockspace_t * lockspace, unsigned int jid,
37085 +                      unsigned int message);
37086 +
37087 +/* plock.c */
37088 +
37089 +int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
37090 +              unsigned long owner, int wait, int ex, uint64_t start,
37091 +              uint64_t end);
37092 +
37093 +int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
37094 +                unsigned long owner, uint64_t start, uint64_t end);
37095 +
37096 +int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
37097 +                  unsigned long owner, uint64_t *start, uint64_t *end,
37098 +                  int *ex, unsigned long *rowner);
37099 +
37100 +/* main.c */
37101 +
37102 +void lock_dlm_debug_log(const char *fmt, ...);
37103 +void lock_dlm_debug_dump(void);
37104 +
37105 +
37106 +#define LOCK_DLM_DEBUG
37107 +
37108 +#ifdef LOCK_DLM_DEBUG
37109 +#define log_debug(fmt, args...) lock_dlm_debug_log(fmt, ##args)
37110 +#else
37111 +#define log_debug(fmt, args...)
37112 +#endif
37113 +
37114 +#define DLM_ASSERT(x, do) \
37115 +{ \
37116 +  if (!(x)) \
37117 +  { \
37118 +    lock_dlm_debug_dump(); \
37119 +    printk("\nlock_dlm:  Assertion failed on line %d of file %s\n" \
37120 +           "lock_dlm:  assertion:  \"%s\"\n" \
37121 +          "lock_dlm:  time = %lu\n", \
37122 +          __LINE__, __FILE__, #x, jiffies); \
37123 +    {do} \
37124 +    printk("\n"); \
37125 +    panic("lock_dlm:  Record message above and reboot.\n"); \
37126 +  } \
37127 +}
37128 +
37129 +#define DLM_RETRY(do_this, until_this) \
37130 +for (;;) \
37131 +{ \
37132 +  do { do_this; } while (0); \
37133 +  if (until_this) \
37134 +    break; \
37135 +  printk("lock_dlm:  out of memory:  %s, %u\n", __FILE__, __LINE__); \
37136 +  schedule();\
37137 +}
37138 +
37139 +#endif
37140 diff -urN linux-orig/fs/gfs_locking/lock_dlm/main.c linux-patched/fs/gfs_locking/lock_dlm/main.c
37141 --- linux-orig/fs/gfs_locking/lock_dlm/main.c   1969-12-31 18:00:00.000000000 -0600
37142 +++ linux-patched/fs/gfs_locking/lock_dlm/main.c        2004-06-16 12:03:17.967822065 -0500
37143 @@ -0,0 +1,192 @@
37144 +/******************************************************************************
37145 +*******************************************************************************
37146 +**
37147 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
37148 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
37149 +**
37150 +**  This copyrighted material is made available to anyone wishing to use,
37151 +**  modify, copy, or redistribute it subject to the terms and conditions
37152 +**  of the GNU General Public License v.2.
37153 +**
37154 +*******************************************************************************
37155 +******************************************************************************/
37156 +
37157 +#include "lock_dlm.h"
37158 +#include <linux/init.h>
37159 +#include <linux/proc_fs.h>
37160 +
37161 +#if defined(LOCK_DLM_DEBUG)
37162 +#define LOCK_DLM_DEBUG_SIZE     (1024)
37163 +#define MAX_DEBUG_MSG_LEN       (64)
37164 +#else
37165 +#define LOCK_DLM_DEBUG_SIZE     (0)
37166 +#define MAX_DEBUG_MSG_LEN       (0)
37167 +#endif
37168 +
37169 +static char *                   debug_buf;
37170 +static unsigned int             debug_size;
37171 +static unsigned int             debug_point;
37172 +static int                      debug_wrap;
37173 +static spinlock_t               debug_lock;
37174 +static struct proc_dir_entry *  debug_proc_entry = NULL;
37175 +
37176 +
37177 +void lock_dlm_debug_log(const char *fmt, ...)
37178 +{
37179 +       va_list va;
37180 +       int i, n, size, len;
37181 +       char buf[MAX_DEBUG_MSG_LEN+1];
37182 +
37183 +       spin_lock(&debug_lock);
37184 +
37185 +       if (!debug_buf)
37186 +               goto out;
37187 +
37188 +       size = MAX_DEBUG_MSG_LEN;
37189 +       memset(buf, 0, size+1);
37190 +
37191 +       n = 0;
37192 +       /* n = snprintf(buf, size, "%s ", dlm->fsname); */
37193 +       size -= n;
37194 +
37195 +       va_start(va, fmt);
37196 +       vsnprintf(buf+n, size, fmt, va);
37197 +       va_end(va);
37198 +
37199 +       len = strlen(buf);
37200 +       if (len > MAX_DEBUG_MSG_LEN-1)
37201 +               len = MAX_DEBUG_MSG_LEN-1;
37202 +       buf[len] = '\n';
37203 +       buf[len+1] = '\0';
37204 +
37205 +       for (i = 0; i < strlen(buf); i++) {
37206 +               debug_buf[debug_point++] = buf[i];
37207 +
37208 +               if (debug_point == debug_size) {
37209 +                       debug_point = 0;
37210 +                       debug_wrap = 1;
37211 +               }
37212 +       }
37213 + out:
37214 +       spin_unlock(&debug_lock);
37215 +}
37216 +
37217 +static void debug_setup(int size)
37218 +{
37219 +       char *b = NULL;
37220 +
37221 +       if (size > PAGE_SIZE)
37222 +               size = PAGE_SIZE;
37223 +       if (size)
37224 +               b = kmalloc(size, GFP_KERNEL);
37225 +
37226 +       spin_lock(&debug_lock);
37227 +       if (debug_buf)
37228 +               kfree(debug_buf);
37229 +       if (!size || !b)
37230 +               goto out;
37231 +       debug_size = size;
37232 +       debug_point = 0;
37233 +       debug_wrap = 0;
37234 +       debug_buf = b;
37235 +       memset(debug_buf, 0, debug_size);
37236 + out:
37237 +       spin_unlock(&debug_lock);
37238 +}
37239 +
37240 +static void debug_init(void)
37241 +{
37242 +       debug_buf = NULL;
37243 +       debug_size = 0;
37244 +       debug_point = 0;
37245 +       debug_wrap = 0;
37246 +       spin_lock_init(&debug_lock);
37247 +       debug_setup(LOCK_DLM_DEBUG_SIZE);
37248 +}
37249 +
37250 +void lock_dlm_debug_dump(void)
37251 +{
37252 +       int i;
37253 +
37254 +       spin_lock(&debug_lock);
37255 +
37256 +       if (debug_wrap) {
37257 +               for (i = debug_point; i < debug_size; i++)
37258 +                       printk("%c", debug_buf[i]);
37259 +       }
37260 +       for (i = 0; i < debug_point; i++)
37261 +               printk("%c", debug_buf[i]);
37262 +
37263 +       spin_unlock(&debug_lock);
37264 +}
37265 +
37266 +#ifdef CONFIG_PROC_FS
37267 +int lock_dlm_debug_info(char *b, char **start, off_t offset, int length)
37268 +{
37269 +       int i, n = 0;
37270 +
37271 +       spin_lock(&debug_lock);
37272 +
37273 +       if (debug_wrap) {
37274 +               for (i = debug_point; i < debug_size; i++)
37275 +                       n += sprintf(b + n, "%c", debug_buf[i]);
37276 +       }
37277 +       for (i = 0; i < debug_point; i++)
37278 +               n += sprintf(b + n, "%c", debug_buf[i]);
37279 +
37280 +       spin_unlock(&debug_lock);
37281 +
37282 +       return n;
37283 +}
37284 +#endif
37285 +
37286 +/**
37287 + * init_dlm - Initialize the dlm module
37288 + *
37289 + * Returns: 0 on success, -EXXX on failure
37290 + */
37291 +
37292 +int __init init_lock_dlm(void)
37293 +{
37294 +       int error;
37295 +
37296 +       error = lm_register_proto(&lock_dlm_ops);
37297 +       if (error) {
37298 +               printk("lock_dlm:  can't register protocol: (%d)\n", error);
37299 +               return error;
37300 +       }
37301 +
37302 +#ifdef CONFIG_PROC_FS
37303 +       debug_proc_entry = create_proc_entry("cluster/lock_dlm_debug", S_IRUGO,
37304 +                                            NULL);
37305 +       if (debug_proc_entry)
37306 +               debug_proc_entry->get_info = &lock_dlm_debug_info;
37307 +#endif
37308 +       debug_init();
37309 +
37310 +       printk("Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
37311 +       return 0;
37312 +}
37313 +
37314 +/**
37315 + * exit_dlm - cleanup the dlm module
37316 + *
37317 + */
37318 +
37319 +void __exit exit_lock_dlm(void)
37320 +{
37321 +       lm_unregister_proto(&lock_dlm_ops);
37322 +
37323 +#ifdef CONFIG_PROC_FS
37324 +       if (debug_proc_entry)
37325 +               remove_proc_entry("cluster/lock_dlm_debug", NULL);
37326 +#endif
37327 +       debug_setup(0);
37328 +}
37329 +
37330 +module_init(init_lock_dlm);
37331 +module_exit(exit_lock_dlm);
37332 +
37333 +MODULE_DESCRIPTION("GFS DLM Locking Module");
37334 +MODULE_AUTHOR("Red Hat, Inc.");
37335 +MODULE_LICENSE("GPL");
37336 diff -urN linux-orig/fs/gfs_locking/lock_dlm/mount.c linux-patched/fs/gfs_locking/lock_dlm/mount.c
37337 --- linux-orig/fs/gfs_locking/lock_dlm/mount.c  1969-12-31 18:00:00.000000000 -0600
37338 +++ linux-patched/fs/gfs_locking/lock_dlm/mount.c       2004-06-16 12:03:17.967822065 -0500
37339 @@ -0,0 +1,335 @@
37340 +/******************************************************************************
37341 +*******************************************************************************
37342 +**
37343 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
37344 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
37345 +**
37346 +**  This copyrighted material is made available to anyone wishing to use,
37347 +**  modify, copy, or redistribute it subject to the terms and conditions
37348 +**  of the GNU General Public License v.2.
37349 +**
37350 +*******************************************************************************
37351 +******************************************************************************/
37352 +
37353 +#include <linux/socket.h>
37354 +#include <net/sock.h>
37355 +
37356 +#include "lock_dlm.h"
37357 +#include <cluster/cnxman.h>
37358 +#include <cluster/service.h>
37359 +
37360 +static int init_cman(dlm_t *dlm)
37361 +{
37362 +       int error = -1;
37363 +       char *name = NULL;
37364 +
37365 +       if (!dlm->clustername)
37366 +               goto fail;
37367 +
37368 +       error = kcl_addref_cluster();
37369 +       if (error) {
37370 +               printk("lock_dlm: cannot get cman reference %d\n", error);
37371 +               goto fail;
37372 +       }
37373 +
37374 +       error = kcl_cluster_name(&name);
37375 +       if (error) {
37376 +               printk("lock_dlm: cannot get cman cluster name %d\n", error);
37377 +               goto fail_ref;
37378 +       }
37379 +
37380 +       if (strcmp(name, dlm->clustername)) {
37381 +               error = -1;
37382 +               printk("lock_dlm: cman cluster name \"%s\" does not match "
37383 +                      "file system cluster name \"%s\"\n",
37384 +                      name, dlm->clustername);
37385 +               goto fail_ref;
37386 +       }
37387 +
37388 +       kfree(name);
37389 +       return 0;
37390 +
37391 + fail_ref:
37392 +       kcl_releaseref_cluster();
37393 + fail:
37394 +       if (name)
37395 +               kfree(name);
37396 +       return error;
37397 +}
37398 +
37399 +static int release_cman(dlm_t *dlm)
37400 +{
37401 +       return kcl_releaseref_cluster();
37402 +}
37403 +
37404 +static int init_cluster(dlm_t *dlm, char *table_name)
37405 +{
37406 +       char *buf, *c, *clname, *fsname;
37407 +       int len, error = -1;
37408 +
37409 +       /*
37410 +        * Parse superblock lock table <clustername>:<fsname>
37411 +        */
37412 +
37413 +       len = strlen(table_name) + 1;
37414 +       buf = kmalloc(len, GFP_KERNEL);
37415 +       if (!buf)
37416 +               goto out;
37417 +       memset(buf, 0, len);
37418 +       memcpy(buf, table_name, strlen(table_name));
37419 +
37420 +       c = strstr(buf, ":");
37421 +       if (!c)
37422 +               goto out_buf;
37423 +
37424 +       *c = '\0';
37425 +       clname = buf;
37426 +       fsname = ++c;
37427 +
37428 +       dlm->max_nodes = LOCK_DLM_MAX_NODES;
37429 +
37430 +       len = strlen(clname) + 1;
37431 +       c = kmalloc(len, GFP_KERNEL);
37432 +       if (!c)
37433 +               goto out_buf;
37434 +       memset(c, 0, len);
37435 +       memcpy(c, clname, len-1);
37436 +       dlm->cnlen = len-1;
37437 +       dlm->clustername = c;
37438 +
37439 +       len = strlen(fsname) + 1;
37440 +       c = kmalloc(len, GFP_KERNEL);
37441 +       if (!c)
37442 +               goto out_cn;
37443 +       memset(c, 0, len);
37444 +       memcpy(c, fsname, len-1);
37445 +       dlm->fnlen = len-1;
37446 +       dlm->fsname = c;
37447 +
37448 +       error = init_cman(dlm);
37449 +       if (error)
37450 +               goto out_fn;
37451 +
37452 +       kfree(buf);
37453 +       return 0;
37454 +
37455 +      out_fn:
37456 +       kfree(dlm->fsname);
37457 +      out_cn:
37458 +       kfree(dlm->clustername);
37459 +      out_buf:
37460 +       kfree(buf);
37461 +      out:
37462 +       printk("lock_dlm: init_cluster error %d\n", error);
37463 +       return error;
37464 +}
37465 +
37466 +static int release_cluster(dlm_t *dlm)
37467 +{
37468 +       release_cman(dlm);
37469 +       kfree(dlm->clustername);
37470 +       kfree(dlm->fsname);
37471 +       return 0;
37472 +}
37473 +
37474 +static int init_fence(dlm_t *dlm)
37475 +{
37476 +       LIST_HEAD(head);
37477 +       struct kcl_service *s, *safe;
37478 +       int error, found = FALSE;
37479 +
37480 +       error = kcl_get_services(&head, SERVICE_LEVEL_FENCE);
37481 +       if (error < 0)
37482 +               goto out;
37483 +
37484 +       list_for_each_entry_safe(s, safe, &head, list) {
37485 +               list_del(&s->list);
37486 +               if (!found && !strcmp(s->name, "default"))
37487 +                       found = TRUE;
37488 +               kfree(s);
37489 +       }
37490 +
37491 +       if (found)
37492 +               return 0;
37493 +
37494 +       error = -1;
37495 + out:
37496 +       printk("lock_dlm: init_fence error %d\n", error);
37497 +       return error;
37498 +}
37499 +
37500 +static int release_fence(dlm_t *dlm)
37501 +{
37502 +       return 0;
37503 +}
37504 +
37505 +static int init_gdlm(dlm_t *dlm)
37506 +{
37507 +       int error;
37508 +
37509 +       error = dlm_new_lockspace(dlm->fsname, dlm->fnlen, &dlm->gdlm_lsp,
37510 +                                  DLM_LSF_NOTIMERS);
37511 +       if (error)
37512 +               printk("lock_dlm: new lockspace error %d\n", error);
37513 +
37514 +       return error;
37515 +}
37516 +
37517 +static int release_gdlm(dlm_t *dlm)
37518 +{
37519 +       dlm_release_lockspace(dlm->gdlm_lsp, 1);
37520 +       return 0;
37521 +}
37522 +
37523 +static dlm_t *init_dlm(lm_callback_t cb, lm_fsdata_t *fsdata)
37524 +{
37525 +       dlm_t *dlm;
37526 +
37527 +       dlm = kmalloc(sizeof(dlm_t), GFP_KERNEL);
37528 +       if (!dlm)
37529 +               return NULL;
37530 +
37531 +       memset(dlm, 0, sizeof(dlm_t));
37532 +
37533 +       dlm->fscb = cb;
37534 +       dlm->fsdata = fsdata;
37535 +
37536 +       spin_lock_init(&dlm->async_lock);
37537 +
37538 +       INIT_LIST_HEAD(&dlm->complete);
37539 +       INIT_LIST_HEAD(&dlm->blocking);
37540 +       INIT_LIST_HEAD(&dlm->delayed);
37541 +       INIT_LIST_HEAD(&dlm->submit);
37542 +       INIT_LIST_HEAD(&dlm->starts);
37543 +       INIT_LIST_HEAD(&dlm->resources);
37544 +
37545 +       init_waitqueue_head(&dlm->wait);
37546 +
37547 +       INIT_LIST_HEAD(&dlm->mg_nodes);
37548 +       init_MUTEX(&dlm->mg_nodes_lock);
37549 +       init_MUTEX(&dlm->res_lock);
37550 +
37551 +       return dlm;
37552 +}
37553 +
37554 +/**
37555 + * dlm_mount - mount a dlm lockspace
37556 + * @table_name: the name of the space to mount
37557 + * @host_data: host specific data
37558 + * @cb: the callback
37559 + * @lockstruct: the structure of crap to fill in
37560 + *
37561 + * Returns: 0 on success, -EXXX on failure
37562 + */
37563 +
37564 +static int lm_dlm_mount(char *table_name, char *host_data,
37565 +                       lm_callback_t cb, lm_fsdata_t *fsdata,
37566 +                       unsigned int min_lvb_size,
37567 +                       struct lm_lockstruct *lockstruct)
37568 +{
37569 +       dlm_t *dlm;
37570 +       int error = -ENOMEM;
37571 +
37572 +       if (min_lvb_size > DLM_LVB_SIZE)
37573 +               goto out;
37574 +
37575 +       dlm = init_dlm(cb, fsdata);
37576 +       if (!dlm)
37577 +               goto out;
37578 +
37579 +       error = init_cluster(dlm, table_name);
37580 +       if (error)
37581 +               goto out_free;
37582 +
37583 +       error = init_fence(dlm);
37584 +       if (error)
37585 +               goto out_cluster;
37586 +
37587 +       error = init_gdlm(dlm);
37588 +       if (error)
37589 +               goto out_fence;
37590 +
37591 +       error = init_async_thread(dlm);
37592 +       if (error)
37593 +               goto out_gdlm;
37594 +
37595 +       error = init_mountgroup(dlm);
37596 +       if (error)
37597 +               goto out_thread;
37598 +
37599 +       lockstruct->ls_jid = dlm->jid;
37600 +       lockstruct->ls_first = test_bit(DFL_FIRST_MOUNT, &dlm->flags);
37601 +       lockstruct->ls_lockspace = dlm;
37602 +       lockstruct->ls_ops = &lock_dlm_ops;
37603 +       lockstruct->ls_flags = LM_LSFLAG_ASYNC;
37604 +       lockstruct->ls_lvb_size = DLM_LVB_SIZE;
37605 +       return 0;
37606 +
37607 +      out_thread:
37608 +       release_async_thread(dlm);
37609 +
37610 +      out_gdlm:
37611 +       release_gdlm(dlm);
37612 +
37613 +      out_fence:
37614 +       release_fence(dlm);
37615 +
37616 +      out_cluster:
37617 +       release_cluster(dlm);
37618 +
37619 +      out_free:
37620 +       kfree(dlm);
37621 +
37622 +      out:
37623 +       return error;
37624 +}
37625 +
37626 +/**
37627 + * dlm_others_may_mount
37628 + * @lockspace: the lockspace to unmount
37629 + *
37630 + */
37631 +
37632 +static void lm_dlm_others_may_mount(lm_lockspace_t *lockspace)
37633 +{
37634 +       /* Do nothing.  The first node to join the Mount Group will complete
37635 +        * before Service Manager allows another node to join. */
37636 +}
37637 +
37638 +/**
37639 + * dlm_unmount - unmount a lock space
37640 + * @lockspace: the lockspace to unmount
37641 + *
37642 + */
37643 +
37644 +static void lm_dlm_unmount(lm_lockspace_t *lockspace)
37645 +{
37646 +       dlm_t *dlm = (dlm_t *) lockspace;
37647 +
37648 +       release_mountgroup(dlm);
37649 +       release_async_thread(dlm);
37650 +       release_gdlm(dlm);
37651 +       release_fence(dlm);
37652 +       release_cluster(dlm);
37653 +       kfree(dlm);
37654 +}
37655 +
37656 +struct lm_lockops lock_dlm_ops = {
37657 +       lm_proto_name:"lock_dlm",
37658 +       lm_mount:lm_dlm_mount,
37659 +       lm_others_may_mount:lm_dlm_others_may_mount,
37660 +       lm_unmount:lm_dlm_unmount,
37661 +       lm_get_lock:lm_dlm_get_lock,
37662 +       lm_put_lock:lm_dlm_put_lock,
37663 +       lm_lock:lm_dlm_lock,
37664 +       lm_unlock:lm_dlm_unlock,
37665 +       lm_plock:lm_dlm_plock,
37666 +       lm_punlock:lm_dlm_punlock,
37667 +       lm_plock_get:lm_dlm_plock_get,
37668 +       lm_cancel:lm_dlm_cancel,
37669 +       lm_hold_lvb:lm_dlm_hold_lvb,
37670 +       lm_unhold_lvb:lm_dlm_unhold_lvb,
37671 +       lm_sync_lvb:lm_dlm_sync_lvb,
37672 +       lm_recovery_done:lm_dlm_recovery_done,
37673 +       lm_owner:THIS_MODULE,
37674 +};
37675 diff -urN linux-orig/fs/gfs_locking/lock_dlm/plock.c linux-patched/fs/gfs_locking/lock_dlm/plock.c
37676 --- linux-orig/fs/gfs_locking/lock_dlm/plock.c  1969-12-31 18:00:00.000000000 -0600
37677 +++ linux-patched/fs/gfs_locking/lock_dlm/plock.c       2004-06-16 12:03:17.967822065 -0500
37678 @@ -0,0 +1,1037 @@
37679 +/******************************************************************************
37680 +*******************************************************************************
37681 +**
37682 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
37683 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
37684 +**
37685 +**  This copyrighted material is made available to anyone wishing to use,
37686 +**  modify, copy, or redistribute it subject to the terms and conditions
37687 +**  of the GNU General Public License v.2.
37688 +**
37689 +*******************************************************************************
37690 +******************************************************************************/
37691 +
37692 +#include "lock_dlm.h"
37693 +
37694 +#define MIN(a,b) ((a) <= (b)) ? (a) : (b)
37695 +#define MAX(a,b) ((a) >= (b)) ? (a) : (b)
37696 +
37697 +#define CREATE    1
37698 +#define NO_CREATE 0
37699 +
37700 +#define WAIT      1
37701 +#define NO_WAIT   0
37702 +#define X_WAIT   -1
37703 +
37704 +#define EX       1
37705 +#define NO_EX     0
37706 +#define SH        NO_EX
37707 +
37708 +
37709 +static int check_conflict(dlm_t *dlm, struct dlm_resource *r,
37710 +                         struct lm_lockname *name, unsigned long owner,
37711 +                         uint64_t start, uint64_t end, int ex);
37712 +
37713 +
37714 +static int lock_resource(struct dlm_resource *r)
37715 +{
37716 +       dlm_lock_t *lp;
37717 +       struct lm_lockname name;
37718 +       int error;
37719 +
37720 +       name.ln_type = LM_TYPE_PLOCK_UPDATE;
37721 +       name.ln_number = r->name.ln_number;
37722 +
37723 +       error = create_lp(r->dlm, &name, &lp);
37724 +       if (error)
37725 +               return error;
37726 +
37727 +       set_bit(LFL_IDLOCK, &lp->flags);
37728 +       lp->req = DLM_LOCK_EX;
37729 +       do_lock(lp, NULL);
37730 +       wait_for_completion(&lp->uast_wait);
37731 +
37732 +       error = lp->lksb.sb_status;
37733 +       if (error) {
37734 +               kfree(lp);
37735 +               lp = NULL;
37736 +       }
37737 +
37738 +       r->update = lp;
37739 +       return error;
37740 +}
37741 +
37742 +static void unlock_resource(struct dlm_resource *r)
37743 +{
37744 +       do_unlock(r->update);
37745 +       kfree(r->update);
37746 +}
37747 +
37748 +static struct dlm_resource *search_resource(dlm_t *dlm, struct lm_lockname *name)
37749 +{
37750 +       struct dlm_resource *r;
37751 +
37752 +       list_for_each_entry(r, &dlm->resources, list) {
37753 +               if (lm_name_equal(&r->name, name))
37754 +                       return r;
37755 +       }
37756 +       return NULL;
37757 +}
37758 +
37759 +static int get_resource(dlm_t *dlm, struct lm_lockname *name, int create,
37760 +                       struct dlm_resource **res)
37761 +{
37762 +       struct dlm_resource *r, *r2;
37763 +       int error = -ENOMEM;
37764 +
37765 +       down(&dlm->res_lock);
37766 +       r = search_resource(dlm, name);
37767 +       if (r)
37768 +               r->count++;
37769 +       up(&dlm->res_lock);
37770 +
37771 +       if (r)
37772 +               goto out;
37773 +
37774 +       if (create == NO_CREATE) {
37775 +               error = -ENOENT;
37776 +               goto fail;
37777 +       }
37778 +
37779 +       r = kmalloc(sizeof(struct dlm_resource), GFP_KERNEL);
37780 +       if (!r)
37781 +               goto fail;
37782 +
37783 +       memset(r, 0, sizeof(struct dlm_resource));
37784 +       r->dlm = dlm;
37785 +       r->name = *name;
37786 +       r->count = 1;
37787 +       INIT_LIST_HEAD(&r->locks);
37788 +       INIT_LIST_HEAD(&r->async_locks);
37789 +       init_MUTEX(&r->sema);
37790 +       spin_lock_init(&r->async_spin);
37791 +
37792 +       down(&dlm->res_lock);
37793 +       r2 = search_resource(dlm, name);
37794 +       if (r2) {
37795 +               r2->count++;
37796 +               up(&dlm->res_lock);
37797 +               kfree(r);
37798 +               r = r2;
37799 +               goto out;
37800 +       }
37801 +
37802 +       list_add_tail(&r->list, &dlm->resources);
37803 +       up(&dlm->res_lock);
37804 +
37805 + out:
37806 +       *res = r;
37807 +       return 0;
37808 + fail:
37809 +       return error;
37810 +}
37811 +
37812 +static void put_resource(struct dlm_resource *r)
37813 +{
37814 +       dlm_t *dlm = r->dlm;
37815 +
37816 +       down(&dlm->res_lock);
37817 +       r->count--;
37818 +       if (r->count == 0) {
37819 +               DLM_ASSERT(list_empty(&r->locks), );
37820 +               DLM_ASSERT(list_empty(&r->async_locks), );
37821 +               list_del(&r->list);
37822 +               kfree(r);
37823 +       }
37824 +       up(&dlm->res_lock);
37825 +}
37826 +
37827 +static inline void hold_resource(struct dlm_resource *r)
37828 +{
37829 +       down(&r->dlm->res_lock);
37830 +       r->count++;
37831 +       up(&r->dlm->res_lock);
37832 +}
37833 +
37834 +static inline int ranges_overlap(uint64_t start1, uint64_t end1,
37835 +                                uint64_t start2, uint64_t end2)
37836 +{
37837 +       if (end1 < start2 || start1 > end2)
37838 +               return FALSE;
37839 +       return TRUE;
37840 +}
37841 +
37842 +/**
37843 + * overlap_type - returns a value based on the type of overlap
37844 + * @s1 - start of new lock range
37845 + * @e1 - end of new lock range
37846 + * @s2 - start of existing lock range
37847 + * @e2 - end of existing lock range
37848 + *
37849 + */
37850 +
37851 +static int overlap_type(uint64_t s1, uint64_t e1, uint64_t s2, uint64_t e2)
37852 +{
37853 +       int ret;
37854 +
37855 +       /*
37856 +        * ---r1---
37857 +        * ---r2---
37858 +        */
37859 +
37860 +       if (s1 == s2 && e1 == e2)
37861 +               ret = 0;
37862 +
37863 +       /*
37864 +        * --r1--
37865 +        * ---r2---
37866 +        */
37867 +
37868 +       else if (s1 == s2 && e1 < e2)
37869 +               ret = 1;
37870 +
37871 +       /*
37872 +        *   --r1--
37873 +        * ---r2---
37874 +        */
37875 +
37876 +       else if (s1 > s2 && e1 == e2)
37877 +               ret = 1;
37878 +
37879 +       /*
37880 +        *  --r1--
37881 +        * ---r2---
37882 +        */
37883 +
37884 +       else if (s1 > s2 && e1 < e2)
37885 +               ret = 2;
37886 +
37887 +       /*
37888 +        * ---r1---  or  ---r1---  or  ---r1---
37889 +        * --r2--          --r2--       --r2--
37890 +        */
37891 +
37892 +       else if (s1 <= s2 && e1 >= e2)
37893 +               ret = 3;
37894 +
37895 +       /*
37896 +        *   ---r1---
37897 +        * ---r2---
37898 +        */
37899 +
37900 +       else if (s1 > s2 && e1 > e2)
37901 +               ret = 4;
37902 +
37903 +       /*
37904 +        * ---r1---
37905 +        *   ---r2---
37906 +        */
37907 +
37908 +       else if (s1 < s2 && e1 < e2)
37909 +               ret = 4;
37910 +
37911 +       else
37912 +               ret = -1;
37913 +
37914 +       return ret;
37915 +}
37916 +
37917 +/* shrink the range start2:end2 by the partially overlapping start:end */
37918 +
37919 +static int shrink_range2(uint64_t *start2, uint64_t *end2,
37920 +                        uint64_t start, uint64_t end)
37921 +{
37922 +       int error = 0;
37923 +
37924 +       if (*start2 < start)
37925 +               *end2 = start - 1;
37926 +       else if (*end2 > end)
37927 +               *start2 =  end + 1;
37928 +       else
37929 +               error = -1;
37930 +       return error;
37931 +}
37932 +
37933 +static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end)
37934 +{
37935 +       return shrink_range2(&po->start, &po->end, start, end);
37936 +}
37937 +
37938 +static void put_lock(dlm_lock_t *lp)
37939 +{
37940 +       struct posix_lock *po = lp->posix;
37941 +
37942 +       po->count--;
37943 +       if (po->count == 0) {
37944 +               kfree(po);
37945 +               kfree(lp);
37946 +       }
37947 +}
37948 +
37949 +static int create_lock(struct dlm_resource *r, unsigned long owner, int ex,
37950 +                      uint64_t start, uint64_t end, dlm_lock_t **lpp)
37951 +{
37952 +       dlm_lock_t *lp;
37953 +       struct posix_lock *po;
37954 +       int error;
37955 +
37956 +       error = create_lp(r->dlm, &r->name, &lp);
37957 +       if (error)
37958 +               return error;
37959 +
37960 +       po = kmalloc(sizeof(struct posix_lock), GFP_KERNEL);
37961 +       if (!po) {
37962 +               kfree(lp);
37963 +               return -ENOMEM;
37964 +       }
37965 +       memset(po, 0, sizeof(struct posix_lock));
37966 +
37967 +       lp->posix = po;
37968 +       po->lp = lp;
37969 +       po->resource = r;
37970 +       po->count = 1;
37971 +       po->start = start;
37972 +       po->end = end;
37973 +       po->owner = owner;
37974 +       po->ex = ex;
37975 +       list_add_tail(&po->list, &r->locks);
37976 +
37977 +       *lpp = lp;
37978 +       return 0;
37979 +}
37980 +
37981 +static unsigned int make_flags_posix(dlm_lock_t *lp, int wait)
37982 +{
37983 +       unsigned int lkf = 0;
37984 +
37985 +       if (wait == NO_WAIT || wait == X_WAIT)
37986 +               lkf |= DLM_LKF_NOQUEUE;
37987 +
37988 +       if (lp->lksb.sb_lkid != 0) {
37989 +               lkf |= DLM_LKF_CONVERT;
37990 +               if (wait == WAIT)
37991 +                       lkf |= DLM_LKF_EXPEDITE;
37992 +       }
37993 +       return lkf;
37994 +}
37995 +
37996 +static void do_range_lock(dlm_lock_t *lp)
37997 +{
37998 +       struct dlm_range range = { lp->posix->start, lp->posix->end };
37999 +       do_lock(lp, &range);
38000 +}
38001 +
38002 +static void request_lock(dlm_lock_t *lp, int wait)
38003 +{
38004 +       log_debug("req %x,%"PRIx64" %s %"PRIx64"-%"PRIx64" %u w %u",
38005 +                 lp->lockname.ln_type, lp->lockname.ln_number,
38006 +                 lp->posix->ex ? "ex" : "sh", lp->posix->start,
38007 +                 lp->posix->end, current->pid, wait);
38008 +
38009 +       set_bit(LFL_IDLOCK, &lp->flags);
38010 +       lp->req = lp->posix->ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38011 +       lp->lkf = make_flags_posix(lp, wait);
38012 +
38013 +       do_range_lock(lp);
38014 +}
38015 +
38016 +static void add_async(struct posix_lock *po, struct dlm_resource *r)
38017 +{
38018 +       spin_lock(&r->async_spin);
38019 +       list_add_tail(&po->async_list, &r->async_locks);
38020 +       spin_unlock(&r->async_spin);
38021 +}
38022 +
38023 +static void del_async(struct posix_lock *po, struct dlm_resource *r)
38024 +{
38025 +       spin_lock(&r->async_spin);
38026 +       list_del(&po->async_list);
38027 +       spin_unlock(&r->async_spin);
38028 +}
38029 +
38030 +static int wait_async(dlm_lock_t *lp)
38031 +{
38032 +       wait_for_completion(&lp->uast_wait);
38033 +       del_async(lp->posix, lp->posix->resource);
38034 +       return lp->lksb.sb_status;
38035 +}
38036 +
38037 +static void wait_async_list(struct dlm_resource *r, unsigned long owner)
38038 +{
38039 +       struct posix_lock *po;
38040 +       int error, found;
38041 +
38042 + restart:
38043 +       found = FALSE;
38044 +       spin_lock(&r->async_spin);
38045 +       list_for_each_entry(po, &r->async_locks, async_list) {
38046 +               if (po->owner != owner)
38047 +                       continue;
38048 +               found = TRUE;
38049 +               break;
38050 +       }
38051 +       spin_unlock(&r->async_spin);
38052 +
38053 +       if (found) {
38054 +               DLM_ASSERT(po->lp, );
38055 +               error = wait_async(po->lp);
38056 +               DLM_ASSERT(!error, );
38057 +               goto restart;
38058 +       }
38059 +}
38060 +
38061 +static void update_lock(dlm_lock_t *lp, int wait)
38062 +{
38063 +       request_lock(lp, wait);
38064 +       add_async(lp->posix, lp->posix->resource);
38065 +
38066 +       if (wait == NO_WAIT || wait == X_WAIT) {
38067 +               int error = wait_async(lp);
38068 +               DLM_ASSERT(!error, printk("error=%d\n", error););
38069 +       }
38070 +}
38071 +
38072 +static void add_lock(struct dlm_resource *r, unsigned long owner, int wait,
38073 +                    int ex, uint64_t start, uint64_t end)
38074 +{
38075 +       dlm_lock_t *lp;
38076 +       int error;
38077 +
38078 +       error = create_lock(r, owner, ex, start, end, &lp);
38079 +       DLM_ASSERT(!error, );
38080 +
38081 +       hold_resource(r);
38082 +       update_lock(lp, wait);
38083 +}
38084 +
38085 +static int remove_lock(dlm_lock_t *lp)
38086 +{
38087 +       struct dlm_resource *r = lp->posix->resource;
38088 +
38089 +       log_debug("remove %x,%"PRIx64" %u",
38090 +                 r->name.ln_type, r->name.ln_number, current->pid);
38091 +
38092 +       do_unlock(lp);
38093 +       put_lock(lp);
38094 +       put_resource(r);
38095 +       return 0;
38096 +}
38097 +
38098 +/* RN within RE (and starts or ends on RE boundary)
38099 +   1. add new lock for non-overlap area of RE, orig mode
38100 +   2. convert RE to RN range and mode */
38101 +
38102 +static int lock_case1(struct posix_lock *po, struct dlm_resource *r,
38103 +                     unsigned long owner, int wait, int ex, uint64_t start,
38104 +                     uint64_t end)
38105 +{
38106 +       uint64_t start2, end2;
38107 +
38108 +       /* non-overlapping area start2:end2 */
38109 +       start2 = po->start;
38110 +       end2 = po->end;
38111 +       shrink_range2(&start2, &end2, start, end);
38112 +
38113 +       po->start = start;
38114 +       po->end = end;
38115 +       po->ex = ex;
38116 +
38117 +       if (ex) {
38118 +               add_lock(r, owner, X_WAIT, SH, start2, end2);
38119 +               update_lock(po->lp, wait);
38120 +       } else {
38121 +               add_lock(r, owner, WAIT, EX, start2, end2);
38122 +               update_lock(po->lp, X_WAIT);
38123 +       }
38124 +       return 0;
38125 +}
38126 +
38127 +/* RN within RE (RE overlaps RN on both sides)
38128 +   1. add new lock for front fragment, orig mode
38129 +   2. add new lock for back fragment, orig mode
38130 +   3. convert RE to RN range and mode */
38131 +
38132 +static int lock_case2(struct posix_lock *po, struct dlm_resource *r,
38133 +                     unsigned long owner, int wait, int ex, uint64_t start,
38134 +                     uint64_t end)
38135 +{
38136 +       if (ex) {
38137 +               add_lock(r, owner, X_WAIT, SH, po->start, start-1);
38138 +               add_lock(r, owner, X_WAIT, SH, end+1, po->end);
38139 +
38140 +               po->start = start;
38141 +               po->end = end;
38142 +               po->ex = ex;
38143 +
38144 +               update_lock(po->lp, wait);
38145 +       } else {
38146 +               add_lock(r, owner, WAIT, EX, po->start, start-1);
38147 +               add_lock(r, owner, WAIT, EX, end+1, po->end);
38148 +
38149 +               po->start = start;
38150 +               po->end = end;
38151 +               po->ex = ex;
38152 +
38153 +               update_lock(po->lp, X_WAIT);
38154 +       }
38155 +       return 0;
38156 +}
38157 +
38158 +/* returns ranges from exist list in order of their start values */
38159 +
38160 +static int next_exist(struct list_head *exist, uint64_t *start, uint64_t *end)
38161 +{
38162 +       struct posix_lock *po;
38163 +       int first = TRUE, first_call = FALSE;
38164 +
38165 +       if (!*start && !*end)
38166 +               first_call = TRUE;
38167 +
38168 +       list_for_each_entry(po, exist, list) {
38169 +               if (!first_call && (po->start <= *start))
38170 +                       continue;
38171 +
38172 +               if (first) {
38173 +                       *start = po->start;
38174 +                       *end = po->end;
38175 +                       first = FALSE;
38176 +               } else if (po->start < *start) {
38177 +                       *start = po->start;
38178 +                       *end = po->end;
38179 +               }
38180 +       }
38181 +
38182 +       return (first ? -1 : 0);
38183 +}
38184 +
38185 +/* adds locks in gaps between existing locks from start to end */
38186 +
38187 +static int fill_gaps(struct list_head *exist, struct dlm_resource *r,
38188 +                    unsigned long owner, int wait, int ex, uint64_t start,
38189 +                    uint64_t end)
38190 +{
38191 +       uint64_t exist_start = 0, exist_end = 0;
38192 +
38193 +       /* cover gaps in front of each existing lock */
38194 +       for (;;) {
38195 +               if (next_exist(exist, &exist_start, &exist_end))
38196 +                       break;
38197 +               if (start < exist_start)
38198 +                       add_lock(r, owner, wait, ex, start, exist_start-1);
38199 +               start = exist_end + 1;
38200 +       }
38201 +
38202 +       /* cover gap after last existing lock */
38203 +       if (exist_end < end)
38204 +               add_lock(r, owner, wait, ex, exist_end+1, end);
38205 +
38206 +       return 0;
38207 +}
38208 +
38209 +/* RE within RN (possibly more than one RE lock, all within RN) */
38210 +
38211 +static int lock_case3(struct list_head *exist, struct dlm_resource *r,
38212 +                     unsigned long owner, int wait, int ex, uint64_t start,
38213 +                     uint64_t end)
38214 +{
38215 +       struct posix_lock *po, *safe;
38216 +
38217 +       fill_gaps(exist, r, owner, wait, ex, start, end);
38218 +
38219 +       if (!ex)
38220 +               wait = X_WAIT;
38221 +
38222 +       /* update existing locks to new mode and put back in locks list */
38223 +       list_for_each_entry_safe(po, safe, exist, list) {
38224 +               list_move_tail(&po->list, &r->locks);
38225 +               if (po->ex == ex)
38226 +                       continue;
38227 +               po->ex = ex;
38228 +               update_lock(po->lp, wait);
38229 +       }
38230 +
38231 +       return 0;
38232 +}
38233 +
38234 +/* RE within RN (possibly more than one RE lock, one RE partially overlaps RN)
38235 +   1. add new locks with new mode for RN gaps not covered by RE's
38236 +   2. convert RE locks' mode to new mode
38237 +   other steps deal with the partial-overlap fragment and depend on whether
38238 +   the request is sh->ex or ex->sh */
38239 +
38240 +static int lock_case4(struct posix_lock *opo, struct list_head *exist,
38241 +                     struct dlm_resource *r, unsigned long owner, int wait,
38242 +                     int ex, uint64_t start, uint64_t end)
38243 +{
38244 +       struct posix_lock *po, *safe;
38245 +       uint64_t over_start = 0, over_end = 0;
38246 +       uint64_t frag_start = 0, frag_end = 0;
38247 +
38248 +       /* fragment (non-overlap) range of opo */
38249 +       if (opo->start < start) {
38250 +               frag_start = opo->start;
38251 +               frag_end = start - 1;
38252 +       } else {
38253 +               frag_start = end + 1;
38254 +               frag_end = opo->end;
38255 +       }
38256 +
38257 +       /* overlap range of opo */
38258 +       if (opo->start < start) {
38259 +               over_start = start;
38260 +               over_end = opo->end;
38261 +       } else {
38262 +               over_start = opo->start;
38263 +               opo->end = end;
38264 +       }
38265 +
38266 +       /* cut off the non-overlap portion of opo so fill_gaps will work */
38267 +       opo->start = over_start;
38268 +       opo->end = over_end;
38269 +
38270 +       fill_gaps(exist, r, owner, wait, ex, start, end);
38271 +
38272 +       /* update existing locks to new mode and put back in locks list */
38273 +       list_for_each_entry_safe(po, safe, exist, list) {
38274 +               list_move_tail(&po->list, &r->locks);
38275 +               if (po == opo)
38276 +                       continue;
38277 +               if (po->ex == ex)
38278 +                       continue;
38279 +               po->ex = ex;
38280 +               update_lock(po->lp, wait);
38281 +       }
38282 +
38283 +       /* deal with the RE that partially overlaps the requested range */
38284 +
38285 +       if (ex == opo->ex)
38286 +               return 0;
38287 +
38288 +       if (ex) {
38289 +               /* 1. add a shared lock in the non-overlap range
38290 +                  2. convert RE to overlap range and requested mode */
38291 +
38292 +               add_lock(r, owner, X_WAIT, SH, frag_start, frag_end);
38293 +
38294 +               opo->start = over_start;
38295 +               opo->end = over_end;
38296 +               opo->ex = ex;
38297 +
38298 +               update_lock(opo->lp, wait);
38299 +       } else {
38300 +               /* 1. request a shared lock in the overlap range
38301 +                  2. convert RE to non-overlap range
38302 +                  3. wait for shared lock to complete */
38303 +
38304 +               add_lock(r, owner, WAIT, SH, over_start, over_end);
38305 +
38306 +               opo->start = frag_start;
38307 +               opo->end = frag_end;
38308 +
38309 +               update_lock(opo->lp, X_WAIT);
38310 +       }
38311 +
38312 +       return 0;
38313 +}
38314 +
38315 +/* go through r->locks to find what needs to be done to extend,
38316 +   shrink, shift, split, etc existing locks (this often involves adding new
38317 +   locks in addition to modifying existing locks. */
38318 +
38319 +static int plock_internal(struct dlm_resource *r, unsigned long owner,
38320 +                         int wait, int ex, uint64_t start, uint64_t end)
38321 +{
38322 +       LIST_HEAD(exist);
38323 +       struct posix_lock *po, *safe, *case4_po = NULL;
38324 +       int error = 0;
38325 +
38326 +       list_for_each_entry_safe(po, safe, &r->locks, list) {
38327 +               if (po->owner != owner)
38328 +                       continue;
38329 +               if (!ranges_overlap(po->start, po->end, start, end))
38330 +                       continue;
38331 +
38332 +               /* existing range (RE) overlaps new range (RN) */
38333 +
38334 +               switch(overlap_type(start, end, po->start, po->end)) {
38335 +
38336 +               case 0:
38337 +                       if (po->ex == ex)
38338 +                               goto out;
38339 +
38340 +                       /* ranges the same - just update the existing lock */
38341 +                       po->ex = ex;
38342 +                       update_lock(po->lp, wait);
38343 +                       goto out;
38344 +
38345 +               case 1:
38346 +                       if (po->ex == ex)
38347 +                               goto out;
38348 +
38349 +                       error = lock_case1(po, r, owner, wait, ex, start, end);
38350 +                       goto out;
38351 +
38352 +               case 2:
38353 +                       if (po->ex == ex)
38354 +                               goto out;
38355 +
38356 +                       error = lock_case2(po, r, owner, wait, ex, start, end);
38357 +                       goto out;
38358 +
38359 +               case 3:
38360 +                       list_move_tail(&po->list, &exist);
38361 +                       break;
38362 +
38363 +               case 4:
38364 +                       DLM_ASSERT(!case4_po, );
38365 +                       case4_po = po;
38366 +                       list_move_tail(&po->list, &exist);
38367 +                       break;
38368 +
38369 +               default:
38370 +                       error = -1;
38371 +                       goto out;
38372 +               }
38373 +       }
38374 +
38375 +       if (case4_po)
38376 +               error = lock_case4(case4_po, &exist, r, owner, wait, ex,
38377 +                                  start, end);
38378 +       else if (!list_empty(&exist))
38379 +               error = lock_case3(&exist, r, owner, wait, ex, start, end);
38380 +       else
38381 +               add_lock(r, owner, wait, ex, start, end);
38382 +
38383 + out:
38384 +       return error;
38385 +}
38386 +
38387 +static int punlock_internal(struct dlm_resource *r, unsigned long owner,
38388 +                           uint64_t start, uint64_t end)
38389 +{
38390 +       struct posix_lock *po, *safe;
38391 +       int error = 0;
38392 +
38393 +       list_for_each_entry_safe(po, safe, &r->locks, list) {
38394 +               if (po->owner != owner)
38395 +                       continue;
38396 +               if (!ranges_overlap(po->start, po->end, start, end))
38397 +                       continue;
38398 +
38399 +               /* existing range (RE) overlaps new range (RN) */
38400 +
38401 +               switch(overlap_type(start, end, po->start, po->end)) {
38402 +
38403 +               case 0:
38404 +                       /* ranges the same - just remove the existing lock */
38405 +
38406 +                       list_del(&po->list);
38407 +                       remove_lock(po->lp);
38408 +                       goto out;
38409 +
38410 +               case 1:
38411 +                       /* RN within RE and starts or ends on RE boundary -
38412 +                        * shrink and update RE */
38413 +
38414 +                       shrink_range(po, start, end);
38415 +                       update_lock(po->lp, X_WAIT);
38416 +                       goto out;
38417 +
38418 +               case 2:
38419 +                       /* RN within RE - shrink and update RE to be front
38420 +                        * fragment, and add a new lock for back fragment */
38421 +
38422 +                       add_lock(r, owner, po->ex ? WAIT : X_WAIT, po->ex,
38423 +                                end+1, po->end);
38424 +
38425 +                       po->end = start - 1;
38426 +                       update_lock(po->lp, X_WAIT);
38427 +                       goto out;
38428 +
38429 +               case 3:
38430 +                       /* RE within RN - remove RE, then continue checking
38431 +                        * because RN could cover other locks */
38432 +
38433 +                       list_del(&po->list);
38434 +                       remove_lock(po->lp);
38435 +                       continue;
38436 +
38437 +               case 4:
38438 +                       /* front of RE in RN, or end of RE in RN - shrink and
38439 +                        * update RE, then continue because RN could cover
38440 +                        * other locks */
38441 +
38442 +                       shrink_range(po, start, end);
38443 +                       update_lock(po->lp, X_WAIT);
38444 +                       continue;
38445 +
38446 +               default:
38447 +                       error = -1;
38448 +                       goto out;
38449 +               }
38450 +       }
38451 +
38452 + out:
38453 +       return error;
38454 +}
38455 +
38456 +int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
38457 +                 unsigned long owner, int wait, int ex, uint64_t start,
38458 +                 uint64_t end)
38459 +{
38460 +       dlm_t *dlm = (dlm_t *) lockspace;
38461 +       struct dlm_resource *r;
38462 +       int error;
38463 +
38464 +       log_debug("en plock %u %x,%"PRIx64"", current->pid,
38465 +                 name->ln_type, name->ln_number);
38466 +
38467 +       error = get_resource(dlm, name, CREATE, &r);
38468 +       if (error)
38469 +               goto out;
38470 +
38471 +#if 0
38472 +       /* Wait, without holding any locks, until this plock request is not
38473 +          blocked by plocks of *other* *local* processes.  Then, none of the
38474 +          dlm requests below will wait on a lock from a local process.
38475 +
38476 +          This should not be necessary since we wait for completion after
38477 +          up().  This means a local process p1 can unlock lkb X while local p2
38478 +          is waiting for X (in wait_async_list). */
38479 +       error = wait_local(r, owner, wait, ex, start, end);
38480 +       if (error)
38481 +               goto out_put;
38482 +#endif
38483 +
38484 +       down(&r->sema);
38485 +       error = lock_resource(r);
38486 +       if (error)
38487 +               goto out_up;
38488 +
38489 +       /* check_conflict() checks for conflicts with plocks from other local
38490 +          processes and other nodes. */
38491 +
38492 +       if (!wait && check_conflict(dlm, r, name, owner, start, end, ex)) {
38493 +               error = -1;
38494 +               unlock_resource(r);
38495 +               goto out_up;
38496 +       }
38497 +
38498 +       /* If NO_WAIT all requests should return immediately.
38499 +          If WAIT all requests go on r->async_locks which we wait on in
38500 +          wait_async_locks().  This means DLM should not return -EAGAIN and we
38501 +          should never block waiting for a plock to be released (by a local or
38502 +          remote process) until we call wait_async_list(). */
38503 +
38504 +       error = plock_internal(r, owner, wait, ex, start, end);
38505 +       unlock_resource(r);
38506 +
38507 +       /* wait_async_list() must follow the up() because we must be able
38508 +          to punlock a range on this resource while there's a blocked plock
38509 +          request to prevent deadlock between nodes (and processes). */
38510 +
38511 + out_up:
38512 +       up(&r->sema);
38513 +       wait_async_list(r, owner);
38514 +       put_resource(r);
38515 + out:
38516 +       log_debug("ex plock %u error %d", current->pid, error);
38517 +       return error;
38518 +}
38519 +
38520 +int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
38521 +                   unsigned long owner, uint64_t start, uint64_t end)
38522 +{
38523 +       dlm_t *dlm = (dlm_t *) lockspace;
38524 +       struct dlm_resource *r;
38525 +       int error;
38526 +
38527 +       log_debug("en punlock %u %x,%"PRIx64"", current->pid,
38528 +                 name->ln_type, name->ln_number);
38529 +
38530 +       error = get_resource(dlm, name, NO_CREATE, &r);
38531 +       if (error)
38532 +               goto out;
38533 +
38534 +       down(&r->sema);
38535 +       error = lock_resource(r);
38536 +       if (error)
38537 +               goto out_up;
38538 +
38539 +       error = punlock_internal(r, owner, start, end);
38540 +       unlock_resource(r);
38541 +
38542 + out_up:
38543 +       up(&r->sema);
38544 +       wait_async_list(r, owner);
38545 +       put_resource(r);
38546 + out:
38547 +       log_debug("ex punlock %u error %d", current->pid, error);
38548 +       return error;
38549 +}
38550 +
38551 +static void query_ast(void *astargs)
38552 +{
38553 +       dlm_lock_t *lp = (dlm_lock_t *) astargs;;
38554 +       complete(&lp->uast_wait);
38555 +}
38556 +
38557 +static int get_conflict_global(dlm_t *dlm, struct lm_lockname *name,
38558 +                              unsigned long owner, uint64_t *start,
38559 +                              uint64_t *end, int *ex, unsigned long *rowner)
38560 +{
38561 +       dlm_lock_t *lp;
38562 +       struct dlm_queryinfo qinfo;
38563 +       struct dlm_lockinfo *lki;
38564 +       int query = 0, s, error;
38565 +
38566 +       /* acquire a null lock on which base the query */
38567 +
38568 +       error = create_lp(dlm, name, &lp);
38569 +       if (error)
38570 +               goto ret;
38571 +
38572 +       lp->req = DLM_LOCK_NL;
38573 +       set_bit(LFL_IDLOCK, &lp->flags);
38574 +       do_lock(lp, NULL);
38575 +       wait_for_completion(&lp->uast_wait);
38576 +
38577 +       /* do query, repeating if insufficient space */
38578 +
38579 +       query = DLM_LOCK_THIS | DLM_QUERY_QUEUE_GRANTED |
38580 +               DLM_QUERY_LOCKS_HIGHER;
38581 +
38582 +       for (s = 16; s < dlm->max_nodes + 1; s += 16) {
38583 +
38584 +               lki = kmalloc(s * sizeof(struct dlm_lockinfo), GFP_KERNEL);
38585 +               if (!lki) {
38586 +                       error = -ENOMEM;
38587 +                       goto out;
38588 +               }
38589 +               memset(lki, 0, s * sizeof(struct dlm_lockinfo));
38590 +               memset(&qinfo, 0, sizeof(qinfo));
38591 +               qinfo.gqi_locksize = s;
38592 +               qinfo.gqi_lockinfo = lki;
38593 +
38594 +               init_completion(&lp->uast_wait);
38595 +               error = dlm_query(dlm->gdlm_lsp, &lp->lksb, query, &qinfo,
38596 +                                  query_ast, (void *) lp);
38597 +               if (error) {
38598 +                       kfree(lki);
38599 +                       goto out;
38600 +               }
38601 +               wait_for_completion(&lp->uast_wait);
38602 +               error = lp->lksb.sb_status;
38603 +
38604 +               if (!error)
38605 +                       break;
38606 +               kfree(lki);
38607 +               if (error != -E2BIG)
38608 +                       goto out;
38609 +       }
38610 +
38611 +       /* check query results for blocking locks */
38612 +
38613 +       for (s = 0; s < qinfo.gqi_lockcount; s++) {
38614 +
38615 +               lki = &qinfo.gqi_lockinfo[s];
38616 +
38617 +               if (!ranges_overlap(*start, *end, lki->lki_grrange.ra_start,
38618 +                                   lki->lki_grrange.ra_end))
38619 +                       continue;
38620 +
38621 +               if (lki->lki_node == dlm->our_nodeid)
38622 +                       continue;
38623 +
38624 +               if (lki->lki_grmode == DLM_LOCK_EX || *ex) {
38625 +                       *start = lki->lki_grrange.ra_start;
38626 +                       *end = lki->lki_grrange.ra_end;
38627 +                       *ex = (lki->lki_grmode == DLM_LOCK_EX) ? 1 : 0;
38628 +                       *rowner = lki->lki_node;
38629 +                       error = -EAGAIN;
38630 +                       break;
38631 +               }
38632 +       }
38633 +
38634 +       kfree(qinfo.gqi_lockinfo);
38635 +
38636 + out:
38637 +       do_unlock(lp);
38638 +       kfree(lp);
38639 + ret:
38640 +       return error;
38641 +}
38642 +
38643 +static int get_conflict_local(dlm_t *dlm, struct dlm_resource *r,
38644 +                             struct lm_lockname *name, unsigned long owner,
38645 +                             uint64_t *start, uint64_t *end, int *ex,
38646 +                             unsigned long *rowner)
38647 +{
38648 +       struct posix_lock *po;
38649 +       int found = FALSE;
38650 +
38651 +       list_for_each_entry(po, &r->locks, list) {
38652 +               if (po->owner == owner)
38653 +                       continue;
38654 +               if (!ranges_overlap(po->start, po->end, *start, *end))
38655 +                       continue;
38656 +
38657 +               if (*ex || po->ex) {
38658 +                       *start = po->start;
38659 +                       *end = po->end;
38660 +                       *ex = po->ex;
38661 +                       *rowner = po->owner;
38662 +                       found = TRUE;
38663 +                       break;
38664 +               }
38665 +       }
38666 +       return found;
38667 +}
38668 +
38669 +int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
38670 +                     unsigned long owner, uint64_t *start, uint64_t *end,
38671 +                     int *ex, unsigned long *rowner)
38672 +{
38673 +       dlm_t *dlm = (dlm_t *) lockspace;
38674 +       struct dlm_resource *r;
38675 +       int error, found;
38676 +
38677 +       error = get_resource(dlm, name, NO_CREATE, &r);
38678 +       if (!error) {
38679 +               down(&r->sema);
38680 +               found = get_conflict_local(dlm, r, name, owner, start, end, ex,
38681 +                                          rowner);
38682 +               up(&r->sema);
38683 +               put_resource(r);
38684 +               if (found)
38685 +                       goto out;
38686 +       }
38687 +
38688 +       error = get_conflict_global(dlm, name, owner, start, end, ex, rowner);
38689 + out:
38690 +       return error;
38691 +}
38692 +
38693 +static int check_conflict(dlm_t *dlm, struct dlm_resource *r,
38694 +                         struct lm_lockname *name, unsigned long owner,
38695 +                         uint64_t start, uint64_t end, int ex)
38696 +{
38697 +       uint64_t get_start = start, get_end = end;
38698 +       unsigned long get_owner = 0;
38699 +       int get_ex = ex, error;
38700 +
38701 +       error = get_conflict_local(dlm, r, name, owner,
38702 +                                  &get_start, &get_end, &get_ex, &get_owner);
38703 +       if (error)
38704 +               goto out;
38705 +
38706 +       error = get_conflict_global(dlm, name, owner,
38707 +                                   &get_start, &get_end, &get_ex, &get_owner);
38708 + out:
38709 +       log_debug("check_conflict %d %"PRIx64"-%"PRIx64" %"PRIx64"-%"PRIx64" "
38710 +                 "ex %d %d own %lu %lu pid %u", error, start, end,
38711 +                 get_start, get_end, ex, get_ex, owner, get_owner,
38712 +                 current->pid);
38713 +       return error;
38714 +}
38715 +
38716 diff -urN linux-orig/fs/gfs_locking/lock_dlm/thread.c linux-patched/fs/gfs_locking/lock_dlm/thread.c
38717 --- linux-orig/fs/gfs_locking/lock_dlm/thread.c 1969-12-31 18:00:00.000000000 -0600
38718 +++ linux-patched/fs/gfs_locking/lock_dlm/thread.c      2004-06-16 12:03:17.967822065 -0500
38719 @@ -0,0 +1,388 @@
38720 +/******************************************************************************
38721 +*******************************************************************************
38722 +**
38723 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
38724 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
38725 +**
38726 +**  This copyrighted material is made available to anyone wishing to use,
38727 +**  modify, copy, or redistribute it subject to the terms and conditions
38728 +**  of the GNU General Public License v.2.
38729 +**
38730 +*******************************************************************************
38731 +******************************************************************************/
38732 +
38733 +#include "lock_dlm.h"
38734 +
38735 +/*
38736 + * Run in dlm_async thread
38737 + */
38738 +
38739 +/**
38740 + * queue_submit - add lock request to queue for dlm_async thread
38741 + * @lp: DLM lock
38742 + *
38743 + * A lock placed on this queue is re-submitted to DLM as soon as
38744 + * dlm_async thread gets to it.
38745 + */
38746 +
38747 +static void queue_submit(dlm_lock_t *lp)
38748 +{
38749 +       dlm_t *dlm = lp->dlm;
38750 +
38751 +       spin_lock(&dlm->async_lock);
38752 +       list_add_tail(&lp->slist, &dlm->submit);
38753 +       set_bit(LFL_SLIST, &lp->flags);
38754 +       spin_unlock(&dlm->async_lock);
38755 +       wake_up(&dlm->wait);
38756 +}
38757 +
38758 +/**
38759 + * process_blocking - processing of blocking callback
38760 + * @lp: DLM lock
38761 + *
38762 + */
38763 +
38764 +static void process_blocking(dlm_lock_t *lp, int bast_mode)
38765 +{
38766 +       dlm_t *dlm = lp->dlm;
38767 +       unsigned int cb;
38768 +
38769 +       switch (make_lmstate(bast_mode)) {
38770 +       case LM_ST_EXCLUSIVE:
38771 +               cb = LM_CB_NEED_E;
38772 +               break;
38773 +       case LM_ST_DEFERRED:
38774 +               cb = LM_CB_NEED_D;
38775 +               break;
38776 +       case LM_ST_SHARED:
38777 +               cb = LM_CB_NEED_S;
38778 +               break;
38779 +       default:
38780 +               DLM_ASSERT(0, printk("unknown bast mode %u\n", lp->bast_mode););
38781 +       }
38782 +
38783 +       dlm->fscb(dlm->fsdata, cb, &lp->lockname);
38784 +}
38785 +
38786 +/**
38787 + * process_complete - processing of completion callback for a lock request
38788 + * @lp: DLM lock
38789 + *
38790 + */
38791 +
38792 +static void process_complete(dlm_lock_t *lp)
38793 +{
38794 +       dlm_t *dlm = lp->dlm;
38795 +       struct lm_async_cb acb;
38796 +       int16_t prev_mode = lp->cur;
38797 +
38798 +       memset(&acb, 0, sizeof(acb));
38799 +
38800 +       /*
38801 +        * This is an AST for an unlock.
38802 +        */
38803 +
38804 +       if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
38805 +
38806 +               /* FIXME: Add an assertion to catch NOFAIL promotions from
38807 +                * non-NL modes? */
38808 +
38809 +               if (lp->lksb.sb_status == -DLM_ECANCEL) {
38810 +
38811 +                       /* lp->cur remains the same, is there anything to clear
38812 +                        * or reset to put this lp into an "ordinary" state? */
38813 +
38814 +                       printk("lock_dlm: -DLM_ECANCEL num=%x,%"PRIx64"\n",
38815 +                              lp->lockname.ln_type, lp->lockname.ln_number);
38816 +               } else {
38817 +                       DLM_ASSERT(lp->lksb.sb_status == -DLM_EUNLOCK,
38818 +                                  printk("num=%x,%"PRIx64" status=%d\n",
38819 +                                         lp->lockname.ln_type,
38820 +                                         lp->lockname.ln_number,
38821 +                                         lp->lksb.sb_status););
38822 +                       lp->cur = DLM_LOCK_IV;
38823 +               }
38824 +
38825 +               complete(&lp->uast_wait);
38826 +               return;
38827 +       }
38828 +
38829 +       /*
38830 +        * A canceled lock request.  The lock was just taken off the delayed
38831 +        * list and was never even submitted to dlm.
38832 +        */
38833 +
38834 +       if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
38835 +               lp->req = lp->cur;
38836 +               acb.lc_ret |= LM_OUT_CANCELED;
38837 +               goto out;
38838 +       }
38839 +
38840 +       /*
38841 +        * An error occured.
38842 +        */
38843 +
38844 +       if (lp->lksb.sb_status) {
38845 +               lp->req = lp->cur;
38846 +               if (lp->cur == DLM_LOCK_IV)
38847 +                       lp->lksb.sb_lkid = 0;
38848 +
38849 +               if ((lp->lksb.sb_status == -EAGAIN) &&
38850 +                   (lp->lkf & DLM_LKF_NOQUEUE)) {
38851 +                       /* a "normal" error */
38852 +               } else
38853 +                       printk("lock_dlm: process_complete error id=%x "
38854 +                              "status=%d\n", lp->lksb.sb_lkid,
38855 +                              lp->lksb.sb_status);
38856 +               goto out;
38857 +       }
38858 +
38859 +       /*
38860 +        * This is an AST for an EX->EX conversion for sync_lvb from GFS.
38861 +        */
38862 +
38863 +       if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
38864 +               complete(&lp->uast_wait);
38865 +               return;
38866 +       }
38867 +
38868 +       /*
38869 +        * A lock has been demoted to NL because it initially completed during
38870 +        * BLOCK_LOCKS.  Now it must be requested in the originally requested
38871 +        * mode.
38872 +        */
38873 +
38874 +       if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
38875 +
38876 +               DLM_ASSERT(lp->req == DLM_LOCK_NL,);
38877 +               DLM_ASSERT(lp->prev_req > DLM_LOCK_NL,);
38878 +
38879 +               lp->cur = DLM_LOCK_NL;
38880 +               lp->req = lp->prev_req;
38881 +               lp->prev_req = DLM_LOCK_IV;
38882 +               lp->lkf &= ~DLM_LKF_CONVDEADLK;
38883 +               lp->lkf |= DLM_LKF_QUECVT;
38884 +
38885 +               set_bit(LFL_NOCACHE, &lp->flags);
38886 +
38887 +               if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) &&
38888 +                   !test_bit(LFL_NOBLOCK, &lp->flags))
38889 +                       queue_delayed(lp, QUEUE_LOCKS_BLOCKED);
38890 +               else
38891 +                       queue_submit(lp);
38892 +               return;
38893 +       }
38894 +
38895 +       /*
38896 +        * A request is granted during dlm recovery.  It may be granted
38897 +        * because the locks of a failed node were cleared.  In that case,
38898 +        * there may be inconsistent data beneath this lock and we must wait
38899 +        * for recovery to complete to use it.  When gfs recovery is done this
38900 +        * granted lock will be converted to NL and then reacquired in this
38901 +        * granted state.
38902 +        */
38903 +
38904 +       if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) &&
38905 +           !test_bit(LFL_NOBLOCK, &lp->flags) &&
38906 +           lp->req != DLM_LOCK_NL) {
38907 +
38908 +               lp->cur = lp->req;
38909 +               lp->prev_req = lp->req;
38910 +               lp->req = DLM_LOCK_NL;
38911 +               lp->lkf |= DLM_LKF_CONVERT;
38912 +               lp->lkf &= ~DLM_LKF_CONVDEADLK;
38913 +               lp->lkf &= ~DLM_LKF_QUECVT;
38914 +
38915 +               set_bit(LFL_REREQUEST, &lp->flags);
38916 +               queue_submit(lp);
38917 +               return;
38918 +       }
38919 +
38920 +       /*
38921 +        * DLM demoted the lock to NL before it was granted so GFS must be
38922 +        * told it cannot cache data for this lock.
38923 +        */
38924 +
38925 +       if (lp->lksb.sb_flags == DLM_SBF_DEMOTED)
38926 +               set_bit(LFL_NOCACHE, &lp->flags);
38927 +
38928 +      out:
38929 +
38930 +       /*
38931 +        * This is an internal lock_dlm lock used for managing JIDs.
38932 +        */
38933 +
38934 +       if (test_bit(LFL_IDLOCK, &lp->flags)) {
38935 +               clear_bit(LFL_NOBLOCK, &lp->flags);
38936 +               lp->cur = lp->req;
38937 +               complete(&lp->uast_wait);
38938 +               return;
38939 +       }
38940 +
38941 +       /*
38942 +        * Normal completion of a lock request.  Tell GFS it now has the lock.
38943 +        */
38944 +
38945 +       clear_bit(LFL_NOBLOCK, &lp->flags);
38946 +       lp->cur = lp->req;
38947 +
38948 +       acb.lc_name = lp->lockname;
38949 +       acb.lc_ret |= make_lmstate(lp->cur);
38950 +
38951 +       if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
38952 +           (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
38953 +               acb.lc_ret |= LM_OUT_CACHEABLE;
38954 +
38955 +       dlm->fscb(dlm->fsdata, LM_CB_ASYNC, &acb);
38956 +}
38957 +
38958 +/**
38959 + * no_work - determine if there's work for the dlm_async thread
38960 + * @dlm:
38961 + *
38962 + * Returns: 1 if no work, 0 otherwise
38963 + */
38964 +
38965 +static __inline__ int no_work(dlm_t * dlm)
38966 +{
38967 +       int ret;
38968 +
38969 +       spin_lock(&dlm->async_lock);
38970 +
38971 +       ret = list_empty(&dlm->complete) &&
38972 +           list_empty(&dlm->blocking) &&
38973 +           list_empty(&dlm->submit) &&
38974 +           list_empty(&dlm->starts) && !test_bit(DFL_MG_FINISH, &dlm->flags);
38975 +
38976 +       spin_unlock(&dlm->async_lock);
38977 +
38978 +       return ret;
38979 +}
38980 +
38981 +/**
38982 + * dlm_async - thread for a variety of asynchronous processing
38983 + * @data:
38984 + *
38985 + * Returns: 0 on success, -EXXX on failure
38986 + */
38987 +
38988 +static int dlm_async(void *data)
38989 +{
38990 +       dlm_t *dlm = (dlm_t *) data;
38991 +       dlm_lock_t *lp = NULL;
38992 +       dlm_start_t *ds = NULL;
38993 +       uint8_t complete, blocking, submit, start, finish;
38994 +       DECLARE_WAITQUEUE(wait, current);
38995 +
38996 +       daemonize("lock_dlm");
38997 +       atomic_inc(&dlm->threads);
38998 +
38999 +       do {
39000 +               current->state = TASK_INTERRUPTIBLE;
39001 +               add_wait_queue(&dlm->wait, &wait);
39002 +               if (no_work(dlm))
39003 +                       schedule();
39004 +               remove_wait_queue(&dlm->wait, &wait);
39005 +               current->state = TASK_RUNNING;
39006 +
39007 +               complete = blocking = submit = start = finish = 0;
39008 +
39009 +               spin_lock(&dlm->async_lock);
39010 +
39011 +               if (!list_empty(&dlm->complete)) {
39012 +                       lp = list_entry(dlm->complete.next, dlm_lock_t, clist);
39013 +                       list_del(&lp->clist);
39014 +                       clear_bit(LFL_CLIST, &lp->flags);
39015 +                       complete = 1;
39016 +               } else if (!list_empty(&dlm->blocking)) {
39017 +                       lp = list_entry(dlm->blocking.next, dlm_lock_t, blist);
39018 +                       list_del(&lp->blist);
39019 +                       clear_bit(LFL_BLIST, &lp->flags);
39020 +                       blocking = lp->bast_mode;
39021 +                       lp->bast_mode = 0;
39022 +               } else if (!list_empty(&dlm->submit)) {
39023 +                       lp = list_entry(dlm->submit.next, dlm_lock_t, slist);
39024 +                       list_del(&lp->slist);
39025 +                       clear_bit(LFL_SLIST, &lp->flags);
39026 +                       submit = 1;
39027 +               } else if (!list_empty(&dlm->starts)) {
39028 +                       ds = list_entry(dlm->starts.next, dlm_start_t, list);
39029 +                       list_del(&ds->list);
39030 +                       start = 1;
39031 +               } else if (test_and_clear_bit(DFL_MG_FINISH, &dlm->flags)) {
39032 +                       finish = 1;
39033 +               }
39034 +
39035 +               spin_unlock(&dlm->async_lock);
39036 +
39037 +               if (complete)
39038 +                       process_complete(lp);
39039 +
39040 +               else if (blocking)
39041 +                       process_blocking(lp, blocking);
39042 +
39043 +               else if (submit)
39044 +                       process_submit(lp);
39045 +
39046 +               else if (start)
39047 +                       process_start(dlm, ds);
39048 +
39049 +               else if (finish)
39050 +                       process_finish(dlm);
39051 +
39052 +               schedule();
39053 +       }
39054 +       while (!test_bit(DFL_THREAD_STOP, &dlm->flags));
39055 +
39056 +       atomic_dec(&dlm->threads);
39057 +       return 0;
39058 +}
39059 +
39060 +/**
39061 + * init_async_thread
39062 + * @dlm:
39063 + *
39064 + * Returns: 0 on success, -EXXX on failure
39065 + */
39066 +
39067 +int init_async_thread(dlm_t * dlm)
39068 +{
39069 +       int error;
39070 +
39071 +       clear_bit(DFL_THREAD_STOP, &dlm->flags);
39072 +       atomic_set(&dlm->threads, 0);
39073 +
39074 +       error = kernel_thread(dlm_async, dlm, 0);
39075 +       if (error < 0)
39076 +               goto out;
39077 +
39078 +       error = kernel_thread(dlm_async, dlm, 0);
39079 +       if (error < 0) {
39080 +               release_async_thread(dlm);
39081 +               goto out;
39082 +       }
39083 +
39084 +       while (atomic_read(&dlm->threads) != 2)
39085 +               schedule();
39086 +       error = 0;
39087 +
39088 +      out:
39089 +       if (error)
39090 +               printk("lock_dlm: can't start async thread %d\n", error);
39091 +       return error;
39092 +}
39093 +
39094 +/**
39095 + * release_async_thread
39096 + * @dlm:
39097 + *
39098 + */
39099 +
39100 +void release_async_thread(dlm_t * dlm)
39101 +{
39102 +       set_bit(DFL_THREAD_STOP, &dlm->flags);
39103 +       while (atomic_read(&dlm->threads)) {
39104 +               wake_up(&dlm->wait);
39105 +               schedule();
39106 +       }
39107 +}
39108 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h
39109 --- linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h 1969-12-31 18:00:00.000000000 -0600
39110 +++ linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h      2004-06-16 12:03:21.956895230 -0500
39111 @@ -0,0 +1,404 @@
39112 +/******************************************************************************
39113 +*******************************************************************************
39114 +**
39115 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
39116 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
39117 +**
39118 +**  This copyrighted material is made available to anyone wishing to use,
39119 +**  modify, copy, or redistribute it subject to the terms and conditions
39120 +**  of the GNU General Public License v.2.
39121 +**
39122 +*******************************************************************************
39123 +******************************************************************************/
39124 +#ifndef __gio_wiretypes_h__
39125 +#define __gio_wiretypes_h__
39126 +
39127 +/* an attempt to do something about tracking changes to the protocol over
39128 + * the wires.
39129 + * If I was really cute, this would be effectivily a checksum of this file.
39130 + */
39131 +#define GIO_WIREPROT_VERS (0x67000010)
39132 +
39133 +/*****************Error codes.
39134 + * everyone uses these same error codes.
39135 + */
39136 +#define gio_Err_Ok              (0)
39137 +#define gio_Err_BadLogin        (1001)
39138 +#define gio_Err_BadCluster      (1003)
39139 +#define gio_Err_BadConfig       (1004)
39140 +#define gio_Err_BadGeneration   (1005)
39141 +#define gio_Err_BadWireProto    (1019)
39142 +
39143 +#define gio_Err_NotAllowed      (1006)
39144 +#define gio_Err_Unknown_Cs      (1007)
39145 +#define gio_Err_BadStateChg     (1008)
39146 +#define gio_Err_MemoryIssues    (1009)
39147 +
39148 +#define gio_Err_PushQu          (1010) /* client should never see this one */
39149 +#define gio_Err_TryFailed       (1011)
39150 +#define gio_Err_AlreadyPend     (1013)
39151 +#define gio_Err_Canceled        (1015)
39152 +
39153 +#define gio_Err_NoSuchFS        (1016)
39154 +#define gio_Err_NoSuchJID       (1017)
39155 +#define gio_Err_NoSuchName      (1018)
39156 +
39157 +/* next free error code: 1002 1012 1014 1020 */
39158 +
39159 +/*
39160 + * Error:  just sort of a generic error code thing.
39161 + *    uint32: gERR
39162 + *    uint32: opcode that this is in reply to. (can be zeros)
39163 + *    uint32: error code
39164 + */
39165 +#define gulm_err_reply (0x67455252)    /* gERR */
39166 +
39167 +#define gulm_nop (0x674e4f50)  /* gNOP */
39168 +
39169 +/********************* Core *****************/
39170 +/*
39171 + * login request
39172 + *    uint32: gCL0
39173 + *    uint32: proto version
39174 + *    string: cluster ID
39175 + *    string: My Name
39176 + *    uint64: generation number
39177 + *    uint32: config CRC
39178 + *    uint32: rank
39179 + * login reply
39180 + *    uint32: gCL1
39181 + *    uint64: generation number
39182 + *    uint32: error code
39183 + *    uint32: rank
39184 + *    uint8:  ama
39185 + *   If I am the Master or Arbitrating and there are no errors, A
39186 + *   serialization of the current nodelist follows. And a client or slave
39187 + *   is connecting (not resources).
39188 + *
39189 + * logout request:
39190 + *    uint32: gCL2
39191 + *    string: node name
39192 + *    uint8:  S/P/A/M/R
39193 + * logout reply:   Don't seem to use this....
39194 + *    uint32: gCL3
39195 + *    uint32: error code
39196 + *
39197 + * resource login request:
39198 + *    uint32: gCL4
39199 + *    uint32: proto version
39200 + *    string: cluster ID
39201 + *    string: resource name
39202 + *    uint32: options
39203 + *  login reply (gCL1) is sent in return.
39204 + *
39205 + * beat req
39206 + *    uint32: gCB0
39207 + *    string: My Name
39208 + * beat rpl
39209 + *    uint32: gCB1
39210 + *    uint32: error code
39211 + *
39212 + * Membership Request
39213 + *    uint32: gCMA
39214 + *    string: node name
39215 + *
39216 + * Membership update
39217 + *    uint32: gCMU
39218 + *    string: node name
39219 + *    IPv6:   IP
39220 + *    uint8:  Current State
39221 + *
39222 + * Membership list request info.
39223 + *    uint32: gCMl
39224 + *
39225 + * Membership list info.
39226 + *    uint32: gCML
39227 + *    list_start_marker
39228 + *     string: node name
39229 + *     IPv6:   IP
39230 + *     uint8:  state
39231 + *     uint8:  laststate
39232 + *     uint8:  mode (S/P/A/M/C)
39233 + *     uint32: missed beats
39234 + *     uint64: last beat
39235 + *     uint64: delay avg
39236 + *     uint64: max delay
39237 + *    list_stop_marker
39238 + *
39239 + * Request Resource info
39240 + *    uint32: gCR0
39241 + *
39242 + * Resource list info
39243 + *    uint32: gCR1
39244 + *    list_start_marker
39245 + *     string: name
39246 + *    list_stop_marker
39247 + *
39248 + * Force node into Expired:
39249 + *    uint32: gCFE
39250 + *    string: node name
39251 + *
39252 + * Core state request:
39253 + *    uint32: gCSR
39254 + *
39255 + * Core state changes:
39256 + *    uint32: gCSC
39257 + *    uint8:  state  (slave, pending, arbitrating, master)
39258 + *  If state == Slave, then the next two will follow.
39259 + *    IPv6:   MasterIP
39260 + *    string: MasterName
39261 + *
39262 + * Core shutdown req:
39263 + *    uint32: gCSD
39264 + *
39265 + * Switch core from current state into Pending:
39266 + *    uint32: gCSP
39267 + *
39268 + */
39269 +#define gulm_core_login_req  (0x67434c00)      /* gCL0 */
39270 +#define gulm_core_login_rpl  (0x67434c01)      /* gCL1 */
39271 +#define gulm_core_logout_req (0x67434c02)      /* gCL2 */
39272 +#define gulm_core_logout_rpl (0x67434c03)      /* gCL3 */
39273 +#define gulm_core_reslgn_req (0x67434c04)      /* gCL4 */
39274 +#define gulm_core_beat_req   (0x67434200)      /* gCB0 */
39275 +#define gulm_core_beat_rpl   (0x67434201)      /* gCB1 */
39276 +#define gulm_core_mbr_req    (0x67434d41)      /* gCMA */
39277 +#define gulm_core_mbr_updt   (0x67434d55)      /* gCMU */
39278 +#define gulm_core_mbr_lstreq (0x67434d6c)      /* gCMl */
39279 +#define gulm_core_mbr_lstrpl (0x67434d4c)      /* gCML */
39280 +#define gulm_core_mbr_force  (0x67434645)      /* gCFE */
39281 +#define gulm_core_res_req    (0x67435200)      /* gCR0 */
39282 +#define gulm_core_res_list   (0x67435201)      /* gCR1 */
39283 +#define gulm_core_state_req  (0x67435352)      /* gCSR */
39284 +#define gulm_core_state_chgs (0x67435343)      /* gCSC */
39285 +#define gulm_core_shutdown   (0x67435344)      /* gCSD */
39286 +#define gulm_core_forcepend  (0x67435350)      /* gCSP */
39287 +
39288 +/* in the st field */
39289 +#define gio_Mbr_Logged_in  (0x05)
39290 +#define gio_Mbr_Logged_out (0x06)
39291 +#define gio_Mbr_Expired    (0x07)
39292 +#define gio_Mbr_Killed     (0x08)
39293 +#define gio_Mbr_OM_lgin    (0x09)
39294 +
39295 +/* in the ama field */
39296 +#define gio_Mbr_ama_Slave       (0x01)
39297 +#define gio_Mbr_ama_Master      (0x02)
39298 +#define gio_Mbr_ama_Pending     (0x03)
39299 +#define gio_Mbr_ama_Arbitrating (0x04)
39300 +#define gio_Mbr_ama_Resource    (0x05)
39301 +#define gio_Mbr_ama_Client      (0x06)
39302 +/* the Client entery is ONLY for mode tracking.
39303 + * nodelist reply is the only place it is used.
39304 + */
39305 +
39306 +/* options that affect behavors on services. (resources) */
39307 +#define gulm_svc_opt_important (0x00000001)
39308 +
39309 +/********************* Info Traffic *****************
39310 + *
39311 + * Note that for many of these, they can be sent to all of the servers and
39312 + * will get sane replies.  Some of these can only be sent to specific
39313 + * servers.
39314 + *
39315 + * stats req:
39316 + *    uint32: gIS0
39317 + * stats rpl:
39318 + *    uint32: gIS1
39319 + *    list start:
39320 + *       string: key
39321 + *       string: value
39322 + *    list stop:
39323 + * Notes:
39324 + *  The stats reply is a set of string pairs.  This way the server can send
39325 + *  whatever things it wants, and the same client code will work for
39326 + *  anything.
39327 + *
39328 + * set verbosity:
39329 + *    uint32: gIV0
39330 + *    string: verb flags (with -/+) to [un]set
39331 + * Note:
39332 + *  We don't bother with a reply for this.  If the server got it, it works.
39333 + *  If it didn't, it cannot send an error back anyways.
39334 + *
39335 + * close socket:
39336 + *   uint32: gSC0
39337 + * Note:
39338 + *   Tells the server to close this connection cleanly.  We're done with
39339 + *   it.  This is *not* the same as loging out.  You must login before you
39340 + *   can logout.  And many commands sent from gulm_tool happen without
39341 + *   logging in.  These commands would be useful for clients in many cases,
39342 + *   so I don't want to put a close at the end of them, but if I don't,
39343 + *   there will be error messages printed on the console when gulm_tool
39344 + *   calls them.
39345 + *   So we need a way to close a connection cleanly that has not been
39346 + *   logged in.
39347 + *
39348 + * request slave list:
39349 + *    uint32: gIL0
39350 + * slave list replay:
39351 + *    uint32: gIL1
39352 + *    list start:
39353 + *       string: name
39354 + *       uint32: poller idx
39355 + *    list stop:
39356 + */
39357 +#define gulm_info_stats_req      (0x67495300)  /* gIS0 */
39358 +#define gulm_info_stats_rpl      (0x67495301)  /* gIS1 */
39359 +#define gulm_info_set_verbosity  (0x67495600)  /* gIV0 */
39360 +#define gulm_socket_close        (0x67534300)  /* gSC0 */
39361 +#define gulm_info_slave_list_req (0x67494c00)  /* gIL0 */
39362 +#define gulm_info_slave_list_rpl (0x67494c01)  /* gIL1 */
39363 +
39364 +/********************* Lock Traffic *****************
39365 + * All lock traffic.
39366 + *
39367 + * login req:
39368 + *    uint32: gLL0
39369 + *    uint32: proto version
39370 + *    string: node name
39371 + *    uint8:  Client/Slave
39372 + * login rpl:
39373 + *    uint32: gLL1
39374 + *    uint32: error code
39375 + *    uint8:  Slave/Master
39376 + *    xdr of current lock state if no errors and master sending reply
39377 + *       and you're a slave.
39378 + *
39379 + * logout req:
39380 + *    uint32: gLL2
39381 + * logout rpl:
39382 + *    uint32: gLL3
39383 + *
39384 + * select lockspace:
39385 + *    uint32: gLS0
39386 + *    raw:    usually just four bytes for lockspace name.
39387 + *            but can be most anything.
39388 + *
39389 + * lock req:
39390 + *    uint32: gLR0
39391 + *    raw:    key
39392 + *    uint8:  state
39393 + *    uint32: flags
39394 + *    raw:    lvb -- Only exists if hasLVB flag is true.
39395 + * lock rpl:
39396 + *    uint32: gLR1
39397 + *    raw:    key
39398 + *    uint8:  state
39399 + *    uint32: flags
39400 + *    uint32: error code
39401 + *    raw:    lvb -- Only exists if hasLVB flag is true.
39402 + *
39403 + * lock state update:
39404 + *    uint32: gLRU
39405 + *    string: node name
39406 + *    raw:    key
39407 + *    uint8:  state
39408 + *    uint32: flags
39409 + *    raw:    lvb -- Only exists if hasLVB flag is true.
39410 + *
39411 + * Action req:
39412 + *    uint32: gLA0
39413 + *    raw:    key
39414 + *    uint8:  action
39415 + *    raw:    lvb -- Only exists if action is SyncLVB
39416 + * Action Rpl:
39417 + *    uint32: gLA1
39418 + *    raw:    key
39419 + *    uint8:  action
39420 + *    uint32: error code
39421 + *
39422 + * Action update:
39423 + *    uint32: gLAU
39424 + *    string: node name
39425 + *    raw:    key
39426 + *    uint8:  action
39427 + *    raw:    lvb -- Only exists if action is SyncLVB
39428 + *
39429 + * Slave Update Rply:   -- for both actions and requests.
39430 + *    uint32: gLUR
39431 + *    raw:    key
39432 + *
39433 + * Drop lock Callback:
39434 + *    uint32: gLC0
39435 + *    raw:    key
39436 + *    uint8:  state
39437 + *
39438 + * Drop all locks callback:  This is the highwater locks thing
39439 + *    uint32: gLC2
39440 + *
39441 + * Drop expired locks:
39442 + *    uint32: gLEO
39443 + *    string: node name  if NULL, then drap all exp for mask.
39444 + *    raw:    keymask  if keymask & key == key, then dropexp on this lock.
39445 + *
39446 + * Lock list req:
39447 + *    uint32: gLD0
39448 + * Lock list rpl:
39449 + *    uint32: gLD1
39450 + *    list start mark
39451 + *     uint8: key length
39452 + *     raw:   key
39453 + *     uint8: state
39454 + *     uint8: lvb length
39455 + *     if lvb length > 0, raw: LVB
39456 + *     uint32: Holder count
39457 + *     list start mark
39458 + *      string: holders
39459 + *     list stop mark
39460 + *     uint32: LVB holder count
39461 + *     list start mark
39462 + *      string: LVB Holders
39463 + *     list stop mark
39464 + *     uint32: Expired holder count
39465 + *     list start mark
39466 + *      string: ExpHolders
39467 + *     list stop mark
39468 + *    list stop mark
39469 + *
39470 + */
39471 +#define gulm_lock_login_req   (0x674C4C00)     /* gLL0 */
39472 +#define gulm_lock_login_rpl   (0x674C4C01)     /* gLL1 */
39473 +#define gulm_lock_logout_req  (0x674C4C02)     /* gLL2 */
39474 +#define gulm_lock_logout_rpl  (0x674C4C03)     /* gLL3 */
39475 +#define gulm_lock_sel_lckspc  (0x674C5300)     /* gLS0 */
39476 +#define gulm_lock_state_req   (0x674C5200)     /* gLR0 */
39477 +#define gulm_lock_state_rpl   (0x674C5201)     /* gLR1 */
39478 +#define gulm_lock_state_updt  (0x674C5255)     /* gLRU */
39479 +#define gulm_lock_action_req  (0x674C4100)     /* gLA0 */
39480 +#define gulm_lock_action_rpl  (0x674C4101)     /* gLA1 */
39481 +#define gulm_lock_action_updt (0x674C4155)     /* gLAU */
39482 +#define gulm_lock_update_rpl  (0x674c5552)     /* gLUR */
39483 +#define gulm_lock_cb_state    (0x674C4300)     /* gLC0 */
39484 +#define gulm_lock_cb_dropall  (0x674C4302)     /* gLC2 */
39485 +#define gulm_lock_drop_exp    (0x674C454F)     /* gLEO */
39486 +#define gulm_lock_dump_req    (0x674c4400)     /* gLD0 */
39487 +#define gulm_lock_dump_rpl    (0x674c4401)     /* gLD1 */
39488 +#define gulm_lock_rerunqueues (0x674c5152)     /* gLQR */
39489 +
39490 +/* marks for the login */
39491 +#define gio_lck_st_Slave     (0x00)
39492 +#define gio_lck_st_Client    (0x01)
39493 +
39494 +/* state change requests */
39495 +#define gio_lck_st_Unlock    (0x00)
39496 +#define gio_lck_st_Exclusive (0x01)
39497 +#define gio_lck_st_Deferred  (0x02)
39498 +#define gio_lck_st_Shared    (0x03)
39499 +/* actions */
39500 +#define gio_lck_st_Cancel    (0x09)
39501 +#define gio_lck_st_HoldLVB   (0x0b)
39502 +#define gio_lck_st_UnHoldLVB (0x0c)
39503 +#define gio_lck_st_SyncLVB   (0x0d)
39504 +
39505 +/* flags */
39506 +#define gio_lck_fg_Do_CB       (0x00000001)
39507 +#define gio_lck_fg_Try         (0x00000002)
39508 +#define gio_lck_fg_Any         (0x00000004)
39509 +#define gio_lck_fg_NoExp       (0x00000008)
39510 +#define gio_lck_fg_hasLVB      (0x00000010)
39511 +#define gio_lck_fg_Cachable    (0x00000020)
39512 +#define gio_lck_fg_Piority     (0x00000040)
39513 +
39514 +#endif /*__gio_wiretypes_h__*/
39515 +/* vim: set ai cin et sw=3 ts=3 : */
39516 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm.h linux-patched/fs/gfs_locking/lock_gulm/gulm.h
39517 --- linux-orig/fs/gfs_locking/lock_gulm/gulm.h  1969-12-31 18:00:00.000000000 -0600
39518 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm.h       2004-06-16 12:03:21.957894998 -0500
39519 @@ -0,0 +1,288 @@
39520 +/******************************************************************************
39521 +*******************************************************************************
39522 +**
39523 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
39524 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
39525 +**
39526 +**  This copyrighted material is made available to anyone wishing to use,
39527 +**  modify, copy, or redistribute it subject to the terms and conditions
39528 +**  of the GNU General Public License v.2.
39529 +**
39530 +*******************************************************************************
39531 +******************************************************************************/
39532 +
39533 +#ifndef GULM_DOT_H
39534 +#define GULM_DOT_H
39535 +
39536 +#define GULM_RELEASE_NAME "v6.0.0"
39537 +
39538 +#ifdef MODVERSIONS
39539 +#include <linux/modversions.h>
39540 +#endif                         /*  MODVERSIONS  */
39541 +#include <linux/module.h>
39542 +#include <linux/slab.h>
39543 +#include <linux/vmalloc.h>
39544 +#include <asm/uaccess.h>
39545 +#include <linux/spinlock.h>
39546 +#include <asm/atomic.h>
39547 +#include <linux/config.h>
39548 +#include <linux/version.h>
39549 +#include <linux/smp_lock.h>
39550 +#include <linux/ctype.h>
39551 +#include <linux/string.h>
39552 +
39553 +#ifndef TRUE
39554 +#define TRUE (1)
39555 +#endif
39556 +
39557 +#ifndef FALSE
39558 +#define FALSE (0)
39559 +#endif
39560 +
39561 +#if (BITS_PER_LONG == 64)
39562 +#define PRIu64 "lu"
39563 +#define PRId64 "ld"
39564 +#define PRIo64 "lo"
39565 +#define PRIx64 "lx"
39566 +#define PRIX64 "lX"
39567 +#define SCNu64 "lu"
39568 +#define SCNd64 "ld"
39569 +#define SCNo64 "lo"
39570 +#define SCNx64 "lx"
39571 +#define SCNX64 "lX"
39572 +#else
39573 +#define PRIu64 "Lu"
39574 +#define PRId64 "Ld"
39575 +#define PRIo64 "Lo"
39576 +#define PRIx64 "Lx"
39577 +#define PRIX64 "LX"
39578 +#define SCNu64 "Lu"
39579 +#define SCNd64 "Ld"
39580 +#define SCNo64 "Lo"
39581 +#define SCNx64 "Lx"
39582 +#define SCNX64 "LX"
39583 +#endif
39584 +
39585 +#include <linux/list.h>
39586 +
39587 +#undef MAX
39588 +#define MAX(a,b) ((a>b)?a:b)
39589 +
39590 +#undef MIN
39591 +#define MIN(a,b) ((a<b)?a:b)
39592 +
39593 +/*  Extern Macro  */
39594 +
39595 +#ifndef EXTERN
39596 +#define EXTERN extern
39597 +#define INIT(X)
39598 +#else
39599 +#undef EXTERN
39600 +#define EXTERN
39601 +#define INIT(X) =X
39602 +#endif
39603 +
39604 +/*  Static Macro  */
39605 +#ifndef DEBUG_SYMBOLS
39606 +#define STATIC static
39607 +#else
39608 +#define STATIC
39609 +#endif
39610 +
39611 +/*  Divide x by y.  Round up if there is a remainder.  */
39612 +#define DIV_RU(x, y) (((x) + (y) - 1) / (y))
39613 +
39614 +#include <linux/lm_interface.h>
39615 +
39616 +#include "gulm_prints.h"
39617 +
39618 +#include "libgulm.h"
39619 +
39620 +#include "handler.h"
39621 +
39622 +/* Some fixed length constants.
39623 + * Some of these should be made dynamic in size in the future.
39624 + */
39625 +#define GIO_KEY_SIZE  (46)
39626 +#define GIO_LVB_SIZE  (32)
39627 +#define GIO_NAME_SIZE (32)
39628 +#define GIO_NAME_LEN  (GIO_NAME_SIZE-1)
39629 +
39630 +/* What we know about this filesytem */
39631 +struct gulm_fs_s {
39632 +       struct list_head fs_list;
39633 +       char fs_name[GIO_NAME_SIZE];    /* lock table name */
39634 +
39635 +       lm_callback_t cb;       /* file system callback function */
39636 +       lm_fsdata_t *fsdata;    /* private file system data */
39637 +
39638 +       callback_qu_t cq;
39639 +
39640 +       uint32_t fsJID;
39641 +       uint32_t lvb_size;
39642 +
39643 +       struct semaphore get_lock;      /* I am not 100% sure this is needed.
39644 +                                        * But it only hurts performance,
39645 +                                        * not correctness if it is
39646 +                                        * useless.  Sometime post52, need
39647 +                                        * to investigate.
39648 +                                        */
39649 +
39650 +       /* Stuff for the first mounter lock and state */
39651 +       int firstmounting;
39652 +       /* the recovery done func needs to behave slightly differnt when we are
39653 +        * the first node in an fs.
39654 +        */
39655 +
39656 +       void *mountlock;        /* this lock holds the Firstmounter state of the FS */
39657 +       /* this is because all lock traffic is async, and really at this point
39658 +        * in time we want a sync behavor, so I'm left with doing something to
39659 +        * achive that.
39660 +        *
39661 +        * this works, but it is crufty, but I don't want to build a huge
39662 +        * queuing system for one lock that we touch twice at the beginning and
39663 +        * once on the end.
39664 +        *
39665 +        * I should change the firstmounter lock to work like the journal locks
39666 +        * and the node locks do.  Things are a lot cleaner now with the libgulm
39667 +        * interface than before. (when the firstmounter lock code was written)
39668 +        */
39669 +       struct completion sleep;
39670 +
39671 +       /* Stuff for JID mapping locks */
39672 +       uint32_t JIDcount;      /* how many JID locks are there. */
39673 +};
39674 +typedef struct gulm_fs_s gulm_fs_t;
39675 +
39676 +/* What we know about each locktable.
39677 + * only one now-a-days. (the LTPX)
39678 + * */
39679 +typedef struct lock_table_s {
39680 +       uint32_t magic_one;
39681 +
39682 +       int running;
39683 +       struct task_struct *recver_task;
39684 +       struct completion startup;
39685 +       struct semaphore sender;
39686 +
39687 +       struct task_struct *sender_task;
39688 +       wait_queue_head_t send_wchan;
39689 +       spinlock_t queue_sender;
39690 +       struct list_head to_be_sent;
39691 +
39692 +       int hashbuckets;
39693 +       spinlock_t *hshlk;
39694 +       struct list_head *lkhsh;
39695 +
39696 +       /* stats
39697 +        * it may be wise to make some of these into atomic numbers.
39698 +        * or something.  or not.
39699 +        * */
39700 +       uint32_t locks_total;
39701 +       uint32_t locks_unl;
39702 +       uint32_t locks_exl;
39703 +       uint32_t locks_shd;
39704 +       uint32_t locks_dfr;
39705 +       uint32_t locks_lvbs;
39706 +       atomic_t locks_pending;
39707 +       /* cannot count expired here. clients don't know this */
39708 +
39709 +       uint32_t lops;          /* just incr on each op */
39710 +
39711 +} lock_table_t;
39712 +
39713 +typedef struct gulm_cm_s {
39714 +       uint8_t myName[64];
39715 +       uint8_t clusterID[256]; /* doesn't need to be 256. */
39716 +       uint8_t loaded;         /* True|False whether we grabbed the config data */
39717 +       uint8_t starts;
39718 +
39719 +       uint32_t handler_threads;       /* howmany to have */
39720 +       uint32_t verbosity;
39721 +
39722 +       uint64_t GenerationID;
39723 +
39724 +       lock_table_t ltpx;
39725 +
39726 +       gulm_interface_p hookup;
39727 +
39728 +} gulm_cm_t;
39729 +
39730 +/* things about each lock. */
39731 +typedef struct gulm_lock_s {
39732 +       struct list_head gl_list;
39733 +       atomic_t count;
39734 +
39735 +       uint32_t magic_one;
39736 +       gulm_fs_t *fs;          /* which filesystem we belong to. */
39737 +       uint8_t key[GIO_KEY_SIZE];
39738 +       uint16_t keylen;
39739 +       uint8_t last_suc_state; /* last state we succesfully got. */
39740 +       char *lvb;
39741 +
39742 +       /* this is true when there is a lock request sent out for this lock.
39743 +        * All it really means is that if we've lost the master, and reconnect
39744 +        * to another, this lock needs to have it's request resent.
39745 +        *
39746 +        * This now has two stages.  Since a lock could be pending, but still in
39747 +        * the send queue.  So we don't want to resend requests that haven't
39748 +        * been sent yet.
39749 +        *
39750 +        * we don't handle the master losses here any more.  LTPX does that for
39751 +        * us.  Should consider removing the dupicated code then.
39752 +        */
39753 +       int actuallypending;    /* may need to be atomic */
39754 +       int in_to_be_sent;
39755 +
39756 +       enum { glck_nothing, glck_action, glck_state } req_type;
39757 +       /* these three for the lock req.  We save them here so we can rebuild
39758 +        * the lock request if there was a server failover. (?still needed?)
39759 +        */
39760 +       unsigned int cur_state;
39761 +       unsigned int req_state;
39762 +       unsigned int flags;
39763 +
39764 +       /* these three for actions. First is the action, next is result, last is
39765 +        * what threads wait on for the reply.
39766 +        */
39767 +       int action;
39768 +       int result;             /* ok, both are using this. */
39769 +       struct completion actsleep;
39770 +
39771 +} gulm_lock_t;
39772 +
39773 +/*****************************************************************************/
39774 +/* cross pollenate prototypes */
39775 +
39776 +/* from gulm_lt.c */
39777 +void lt_logout (void);
39778 +int lt_login (void);
39779 +int get_mount_lock (gulm_fs_t * fs, int *first);
39780 +int downgrade_mount_lock (gulm_fs_t * fs);
39781 +int drop_mount_lock (gulm_fs_t * fs);
39782 +int send_drop_all_exp (lock_table_t * lt);
39783 +int send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name);
39784 +
39785 +/*from gulm_core.c */
39786 +void cm_logout (void);
39787 +int cm_login (void);
39788 +void delete_ipnames (struct list_head *namelist);
39789 +
39790 +/* from gulm_fs.c */
39791 +void init_gulm_fs (void);
39792 +void request_journal_replay (uint8_t * name);
39793 +void passup_droplocks (void);
39794 +gulm_fs_t *get_fs_by_name (uint8_t * name);
39795 +void dump_internal_lists (void);
39796 +void gulm_recovery_done (lm_lockspace_t * lockspace,
39797 +                        unsigned int jid, unsigned int message);
39798 +void gulm_unmount (lm_lockspace_t * lockspace);
39799 +void gulm_others_may_mount (lm_lockspace_t * lockspace);
39800 +int gulm_mount (char *table_name, char *host_data,
39801 +               lm_callback_t cb, lm_fsdata_t * fsdata,
39802 +               unsigned int min_lvb_size, struct lm_lockstruct *lockstruct);
39803 +
39804 +extern struct lm_lockops gulm_ops;
39805 +
39806 +#endif                         /*  GULM_DOT_H  */
39807 +/* vim: set ai cin noet sw=8 ts=8 : */
39808 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c
39809 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c     1969-12-31 18:00:00.000000000 -0600
39810 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c  2004-06-16 12:03:21.957894998 -0500
39811 @@ -0,0 +1,255 @@
39812 +/******************************************************************************
39813 +*******************************************************************************
39814 +**
39815 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
39816 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
39817 +**
39818 +**  This copyrighted material is made available to anyone wishing to use,
39819 +**  modify, copy, or redistribute it subject to the terms and conditions
39820 +**  of the GNU General Public License v.2.
39821 +**
39822 +*******************************************************************************
39823 +******************************************************************************/
39824 +
39825 +#include "gulm.h"
39826 +
39827 +#include <linux/kernel.h>
39828 +#include <linux/fs.h>
39829 +#include <linux/slab.h>
39830 +#include <linux/file.h>
39831 +#define __KERNEL_SYSCALLS__
39832 +#include <linux/unistd.h>
39833 +
39834 +#include "util.h"
39835 +#include "utils_tostr.h"
39836 +
39837 +extern gulm_cm_t gulm_cm;
39838 +
39839 +/* private vars. */
39840 +int cm_thd_running;
39841 +struct completion cm_thd_startup;
39842 +struct task_struct *cm_thd_task;
39843 +
39844 +/**
39845 + */
39846 +int
39847 +gulm_core_login_reply (void *misc, uint64_t gen, uint32_t error,
39848 +                      uint32_t rank, uint8_t corestate)
39849 +{
39850 +       if (error != 0) {
39851 +               log_err ("Core returned error %d:%s.\n", error,
39852 +                        gio_Err_to_str (error));
39853 +               cm_thd_running = FALSE;
39854 +               return error;
39855 +       }
39856 +
39857 +       if( gulm_cm.GenerationID != 0 ) {
39858 +               GULM_ASSERT(gulm_cm.GenerationID == gen,
39859 +                               printk("us: %"PRIu64" them: %"PRIu64"\n",
39860 +                                       gulm_cm.GenerationID,gen);
39861 +                               );
39862 +       }
39863 +       gulm_cm.GenerationID = gen;
39864 +
39865 +       error = lt_login ();
39866 +       if (error != 0) {
39867 +               log_err ("lt_login failed. %d\n", error);
39868 +               lg_core_logout (gulm_cm.hookup);        /* XXX is this safe? */
39869 +               return error;
39870 +       }
39871 +
39872 +       log_msg (lgm_Network2, "Logged into local core.\n");
39873 +
39874 +       return 0;
39875 +}
39876 +
39877 +/**
39878 + * gulm_core_logout_reply -
39879 + * @misc:
39880 + *
39881 + *
39882 + * Returns: int
39883 + */
39884 +int
39885 +gulm_core_logout_reply (void *misc)
39886 +{
39887 +       log_msg (lgm_Network2, "Logged out of local core.\n");
39888 +       return 0;
39889 +}
39890 +
39891 +/**
39892 + */
39893 +int
39894 +gulm_core_nodechange (void *misc, char *nodename,
39895 +                     struct in6_addr *nodeip, uint8_t nodestate)
39896 +{
39897 +       if (nodestate == lg_core_Fenced) {
39898 +               request_journal_replay (nodename);
39899 +       }
39900 +       /* if me and state is logout, Need to close out things if we can.
39901 +        */
39902 +       if (gulm_cm.starts && nodestate == lg_core_Logged_out &&
39903 +                       strcmp(gulm_cm.myName, nodename) == 0 ) {
39904 +               lt_logout();
39905 +               cm_thd_running = FALSE;
39906 +               lg_core_logout (gulm_cm.hookup);
39907 +               return -1;
39908 +       }
39909 +       return 0;
39910 +}
39911 +
39912 +int gulm_core_statechange (void *misc, uint8_t corestate,
39913 +                           struct in6_addr *masterip, char *mastername)
39914 +{
39915 +       int *cst = (int *)misc;
39916 +       if( misc != NULL ) {
39917 +               if( corestate != lg_core_Slave &&
39918 +                               corestate != lg_core_Master ) {
39919 +                       *cst = TRUE;
39920 +               }else{
39921 +                       *cst = FALSE;
39922 +               }
39923 +       }
39924 +       return 0;
39925 +}
39926 +
39927 +/**
39928 + */
39929 +int
39930 +gulm_core_error (void *misc, uint32_t err)
39931 +{
39932 +       log_err ("Got error code %d %#x back fome some reason!\n", err, err);
39933 +       return 0;
39934 +}
39935 +
39936 +static lg_core_callbacks_t core_cb = {
39937 +      login_reply:gulm_core_login_reply,
39938 +      logout_reply:gulm_core_logout_reply,
39939 +      nodechange:gulm_core_nodechange,
39940 +      statechange:gulm_core_statechange,
39941 +      error:gulm_core_error
39942 +};
39943 +
39944 +/**
39945 + * cm_io_recving_thread -
39946 + * @data:
39947 + *
39948 + *
39949 + * Returns: int
39950 + */
39951 +int
39952 +cm_io_recving_thread (void *data)
39953 +{
39954 +       int err;
39955 +
39956 +       daemonize ("gulm_res_recvd");
39957 +       cm_thd_task = current;
39958 +       complete (&cm_thd_startup);
39959 +
39960 +       while (cm_thd_running) {
39961 +               err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, NULL);
39962 +               if (err != 0) {
39963 +                       log_err
39964 +                           ("Got an error in gulm_res_recvd err: %d\n", err);
39965 +                       if (!cm_thd_running)
39966 +                               break;
39967 +                       /*
39968 +                        * Pause a bit, then try to log back into the local
39969 +                        * lock_gulmd.  Keep doing this until an outside force
39970 +                        * stops us. (which I don't think there is any at this
39971 +                        * point.  forceunmount would be one, if we ever do
39972 +                        * that.)
39973 +                        *
39974 +                        * If we are still in the gulm_mount() function, we
39975 +                        * should not retry. We should just exit.
39976 +                        */
39977 +                       current->state = TASK_INTERRUPTIBLE;
39978 +                       schedule_timeout (3 * HZ);
39979 +
39980 +                       while ((err =
39981 +                               lg_core_login (gulm_cm.hookup, TRUE)) != 0) {
39982 +                               log_err
39983 +                                   ("Got a %d trying to login to lock_gulmd.  Is it running?\n",
39984 +                                    err);
39985 +                               current->state = TASK_INTERRUPTIBLE;
39986 +                               schedule_timeout (3 * HZ);
39987 +                       }
39988 +               }
39989 +       }                       /* while( gulm_cm.cm_thd_running ) */
39990 +
39991 +       complete (&cm_thd_startup);
39992 +       return 0;
39993 +}
39994 +
39995 +/**
39996 + * cm_logout -
39997 + */
39998 +void
39999 +cm_logout (void)
40000 +{
40001 +
40002 +       if (cm_thd_running) {
40003 +               cm_thd_running = FALSE;
40004 +               lg_core_logout (gulm_cm.hookup);
40005 +
40006 +               /* wait for thread to finish */
40007 +               wait_for_completion (&cm_thd_startup);
40008 +       }
40009 +
40010 +}
40011 +
40012 +/**
40013 + * cm_login -
40014 + *
40015 + * Returns: int
40016 + */
40017 +int
40018 +cm_login (void)
40019 +{
40020 +       int err = -1;
40021 +       int cst=TRUE;
40022 +
40023 +       cm_thd_running = FALSE;
40024 +       init_completion (&cm_thd_startup);
40025 +
40026 +       err = lg_core_login (gulm_cm.hookup, TRUE);
40027 +       if (err != 0) {
40028 +               log_err
40029 +                   ("Got a %d trying to login to lock_gulmd.  Is it running?\n",
40030 +                    err);
40031 +               goto exit;
40032 +       }
40033 +       /* handle login reply.  which will start the lt thread. */
40034 +       err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, NULL);
40035 +       if (err != 0) {
40036 +               goto exit;
40037 +       }
40038 +
40039 +       /* do not pass go until Slave(client) or Master */
40040 +       while(cst) {
40041 +               lg_core_corestate(gulm_cm.hookup);
40042 +               err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, &cst);
40043 +               if (err != 0) {
40044 +                       goto exit;
40045 +               }
40046 +               if(cst) {
40047 +                       current->state = TASK_INTERRUPTIBLE;
40048 +                       schedule_timeout (3 * HZ);
40049 +                       /* if interrupted, exit */
40050 +               }
40051 +       }
40052 +
40053 +       /* start recver thread. */
40054 +       cm_thd_running = TRUE;
40055 +       err = kernel_thread (cm_io_recving_thread, NULL, 0);
40056 +       if (err < 0) {
40057 +               log_err ("Failed to start gulm_res_recvd. (%d)\n", err);
40058 +               goto exit;
40059 +       }
40060 +       wait_for_completion (&cm_thd_startup);
40061 +
40062 +       err = 0;
40063 +      exit:
40064 +       return err;
40065 +}
40066 +/* vim: set ai cin noet sw=8 ts=8 : */
40067 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c
40068 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c       1969-12-31 18:00:00.000000000 -0600
40069 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c    2004-06-16 12:03:21.957894998 -0500
40070 @@ -0,0 +1,613 @@
40071 +/******************************************************************************
40072 +*******************************************************************************
40073 +**
40074 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
40075 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
40076 +**
40077 +**  This copyrighted material is made available to anyone wishing to use,
40078 +**  modify, copy, or redistribute it subject to the terms and conditions
40079 +**  of the GNU General Public License v.2.
40080 +**
40081 +*******************************************************************************
40082 +******************************************************************************/
40083 +
40084 +#include "gulm.h"
40085 +
40086 +#include <linux/kernel.h>
40087 +#include <linux/fs.h>
40088 +#include <linux/slab.h>
40089 +#include <linux/file.h>
40090 +#define __KERNEL_SYSCALLS__
40091 +#include <linux/unistd.h>
40092 +
40093 +#include "util.h"
40094 +#include "load_info.h"
40095 +#include "handler.h"
40096 +#include "gulm_procinfo.h"
40097 +#include "gulm_jid.h"
40098 +
40099 +/* things about myself */
40100 +extern gulm_cm_t gulm_cm;
40101 +
40102 +/* globals for this file.*/
40103 +uint32_t filesystems_count = 0;
40104 +LIST_HEAD (filesystems_list);
40105 +struct semaphore filesystem_lck;       /* we use a sema instead of a spin here because
40106 +                                        * all of the interruptible things we do inside
40107 +                                        * of it.
40108 +                                        * If i stop doing nasty things within this it doesn't need
40109 +                                        * to be a sema.
40110 +                                        */
40111 +struct semaphore start_stop_lock;
40112 +atomic_t start_stop_cnt;
40113 +
40114 +/**
40115 + * init_gulm_fs -
40116 + */
40117 +void
40118 +init_gulm_fs (void)
40119 +{
40120 +       init_MUTEX (&filesystem_lck);
40121 +       init_MUTEX (&start_stop_lock);
40122 +       atomic_set (&start_stop_cnt, 0);
40123 +}
40124 +
40125 +/*****************************************************************************/
40126 +struct rjrpf_s {
40127 +       gulm_fs_t *fs;
40128 +       uint8_t *name;
40129 +};
40130 +
40131 +void
40132 +request_journal_replay_per_fs (void *d)
40133 +{
40134 +       struct rjrpf_s *rf = (struct rjrpf_s *) d;
40135 +       uint32_t jid;
40136 +       unsigned int ujid;
40137 +
40138 +       /* lookup jid <=> name mapping */
40139 +       if (find_jid_by_name_and_mark_replay (rf->fs, rf->name, &jid) != 0) {
40140 +               log_msg (lgm_JIDMap,
40141 +                        "In fs (%s), no jid for name (%s) was found.\n",
40142 +                        rf->fs->fs_name, rf->name);
40143 +       } else {
40144 +               log_msg (lgm_JIDMap,
40145 +                        "In fs (%s), jid %d was found for name (%s).\n",
40146 +                        rf->fs->fs_name, jid, rf->name);
40147 +
40148 +               /* all that the replay journal call back into gfs does is malloc
40149 +                * some memory and add it to a list.  So we really don't need to
40150 +                * queue that action.  Since that is what gfs is doing.
40151 +                *
40152 +                * This will need to change if gfs changes.
40153 +                *
40154 +                * Basically, we assume that the callback is non-blocking.
40155 +                */
40156 +               ujid = jid;
40157 +               rf->fs->cb (rf->fs->fsdata, LM_CB_NEED_RECOVERY, &ujid);
40158 +       }
40159 +
40160 +       kfree (rf->name);
40161 +       kfree (rf);
40162 +
40163 +}
40164 +
40165 +/**
40166 + * request_journal_replay - give a journal replay request to mounted filesystems
40167 + * @name: < the name of the node that died.
40168 + *
40169 + *
40170 + * Returns: void
40171 + */
40172 +void
40173 +request_journal_replay (uint8_t * name)
40174 +{
40175 +       struct list_head *tmp;
40176 +       gulm_fs_t *fs;
40177 +       struct rjrpf_s *rf;
40178 +
40179 +       log_msg (lgm_Always, "Checking for journals for node \"%s\"\n",
40180 +                name);
40181 +
40182 +       down (&filesystem_lck);
40183 +
40184 +       list_for_each (tmp, &filesystems_list) {
40185 +               fs = list_entry (tmp, gulm_fs_t, fs_list);
40186 +
40187 +               /* we don't want to process replay requests when we are
40188 +                * still in the first mounter state.  All the journals are
40189 +                * getting replayed anyways, and there could be some issue
40190 +                * with stuff happening twice.
40191 +                */
40192 +               if (fs->firstmounting)
40193 +                       continue;
40194 +
40195 +               /* due to the way the new jid mapping code works, we had to
40196 +                * move it out of here.
40197 +                */
40198 +
40199 +               rf = kmalloc (sizeof (struct rjrpf_s), GFP_KERNEL);
40200 +               GULM_ASSERT (rf != NULL,);
40201 +
40202 +               rf->fs = fs;
40203 +               rf->name = kmalloc (strlen (name) + 1, GFP_KERNEL);
40204 +               GULM_ASSERT (rf->name != NULL,);
40205 +               memcpy (rf->name, name, strlen (name) + 1);
40206 +
40207 +               qu_function_call (&fs->cq, request_journal_replay_per_fs, rf);
40208 +
40209 +       }
40210 +       up (&filesystem_lck);
40211 +}
40212 +
40213 +/**
40214 + * passup_droplocks -
40215 + */
40216 +void
40217 +passup_droplocks (void)
40218 +{
40219 +       struct list_head *tmp;
40220 +       gulm_fs_t *fs;
40221 +       down (&filesystem_lck);
40222 +       list_for_each (tmp, &filesystems_list) {
40223 +               fs = list_entry (tmp, gulm_fs_t, fs_list);
40224 +               qu_drop_req (&fs->cq, fs->cb, fs->fsdata, LM_CB_DROPLOCKS, 0,
40225 +                            0);
40226 +               /* If this decides to block someday, we need to change this function.
40227 +                */
40228 +       }
40229 +       up (&filesystem_lck);
40230 +}
40231 +
40232 +/**
40233 + * dump_internal_lists -
40234 + *
40235 + */
40236 +void
40237 +dump_internal_lists (void)
40238 +{
40239 +       struct list_head *tmp;
40240 +       gulm_fs_t *fs;
40241 +       down (&filesystem_lck);
40242 +       list_for_each (tmp, &filesystems_list) {
40243 +               fs = list_entry (tmp, gulm_fs_t, fs_list);
40244 +               log_msg (lgm_Always, "Handler queue for %s\n", fs->fs_name);
40245 +               display_handler_queue (&fs->cq);
40246 +               /* other lists? */
40247 +       }
40248 +       up (&filesystem_lck);
40249 +}
40250 +
40251 +/**
40252 + * get_fs_by_name -
40253 + * @name:
40254 + *
40255 + *
40256 + * Returns: gulm_fs_t
40257 + */
40258 +gulm_fs_t *
40259 +get_fs_by_name (uint8_t * name)
40260 +{
40261 +       struct list_head *tmp;
40262 +       gulm_fs_t *fs = NULL;
40263 +       down (&filesystem_lck);
40264 +       list_for_each (tmp, &filesystems_list) {
40265 +               fs = list_entry (tmp, gulm_fs_t, fs_list);
40266 +               if (strcmp (name, fs->fs_name) == 0) {
40267 +                       up (&filesystem_lck);
40268 +                       return fs;
40269 +               }
40270 +       }
40271 +       up (&filesystem_lck);
40272 +       return NULL;
40273 +}
40274 +
40275 +/*****************************************************************************/
40276 +
40277 +/**
40278 + * clear_locks -
40279 + *
40280 + * quick check to see if there was leaking
40281 + * should I panic on these? or just complain?
40282 + *
40283 + * Returns: void
40284 + */
40285 +void
40286 +clear_locks (void)
40287 +{
40288 +       int i;
40289 +       lock_table_t *lt = &gulm_cm.ltpx;
40290 +
40291 +       for (i = 0; i < lt->hashbuckets; i++) {
40292 +               struct list_head *lcktmp, *lckfoo;
40293 +               spin_lock (&lt->hshlk[i]);
40294 +               list_for_each_safe (lcktmp, lckfoo, &lt->lkhsh[i]) {
40295 +                       gulm_lock_t *lck = NULL;
40296 +                       lck = list_entry (lcktmp, gulm_lock_t, gl_list);
40297 +                       /* need to relelase it. umm, should any even exist? */
40298 +                       log_err ("AH! Rogue lock buffer! refcount:%d\n",
40299 +                                atomic_read (&lck->count));
40300 +
40301 +                       if (lck->lvb) {
40302 +                               log_err ("AH! Rogue lock buffer with LVB!\n");
40303 +                               kfree (lck->lvb);
40304 +                       }
40305 +
40306 +                       list_del (lcktmp);
40307 +                       kfree (lck);
40308 +
40309 +               }
40310 +               spin_unlock (&lt->hshlk[i]);
40311 +       }
40312 +       kfree (lt->hshlk);
40313 +       lt->hshlk = NULL;
40314 +       kfree (lt->lkhsh);
40315 +       lt->lkhsh = NULL;
40316 +}
40317 +
40318 +/*****************************************************************************/
40319 +/**
40320 + * start_gulm_threads -
40321 + * @host_data:
40322 + *
40323 + *
40324 + * Returns: int
40325 + */
40326 +int
40327 +start_gulm_threads (char *csnm, char *host_data)
40328 +{
40329 +       int error = 0;
40330 +
40331 +       down (&start_stop_lock);
40332 +       atomic_inc (&start_stop_cnt);
40333 +       if (atomic_read (&start_stop_cnt) == 1) {
40334 +               /* first one. get stuff going */
40335 +               strncpy (gulm_cm.clusterID, csnm, 255);
40336 +               gulm_cm.clusterID[255] = '\0';
40337 +
40338 +               error = lg_initialize (&gulm_cm.hookup, gulm_cm.clusterID,
40339 +                                      "GFS Kernel Interface");
40340 +               if (error != 0) {
40341 +                       log_err ("lg_initialize failed, %d\n", error);
40342 +                       goto fail;
40343 +               }
40344 +               gulm_cm.starts = TRUE;
40345 +
40346 +               error = load_info (host_data);
40347 +               if (error != 0) {
40348 +                       log_err ("load_info failed. %d\n", error);
40349 +                       goto fail;
40350 +               }
40351 +
40352 +               jid_init ();
40353 +
40354 +               error = cm_login ();
40355 +               if (error != 0) {
40356 +                       log_err ("cm_login failed. %d\n", error);
40357 +                       goto fail;
40358 +               }
40359 +
40360 +               /* lt_login() is called after the success packet for cm_login()
40361 +                * returns.
40362 +                */
40363 +       }
40364 +      fail:
40365 +       up (&start_stop_lock);
40366 +       return error;
40367 +}
40368 +
40369 +/**
40370 + * stop_gulm_threads -
40371 + */
40372 +void
40373 +stop_gulm_threads (void)
40374 +{
40375 +       down (&start_stop_lock);
40376 +       atomic_dec (&start_stop_cnt);
40377 +       if (atomic_read (&start_stop_cnt) == 0) {
40378 +               /* last one, put it all away. */
40379 +               lt_logout ();
40380 +               cm_logout ();
40381 +               clear_locks ();
40382 +               lg_release (gulm_cm.hookup);
40383 +               gulm_cm.hookup = NULL;
40384 +               gulm_cm.loaded = FALSE;
40385 +               gulm_cm.GenerationID = 0;
40386 +       }
40387 +       up (&start_stop_lock);
40388 +}
40389 +
40390 +/*****************************************************************************/
40391 +
40392 +/**
40393 + * gulm_mount
40394 + * @table_name: clusterID:FS_Name
40395 + * @host_data:
40396 + * @cb: GFS callback function
40397 + * @fsdata: opaque GFS handle
40398 + * @lockstruct: the structure of crap to fill in
40399 + *
40400 + * Returns: 0 on success, -EXXX on failure
40401 + */
40402 +int
40403 +gulm_mount (char *table_name, char *host_data,
40404 +           lm_callback_t cb, lm_fsdata_t * fsdata,
40405 +           unsigned int min_lvb_size, struct lm_lockstruct *lockstruct)
40406 +{
40407 +       gulm_fs_t *gulm;
40408 +       char work[256], *tbln;
40409 +       int first;
40410 +       int error = -1;
40411 +       struct list_head *lltmp;
40412 +
40413 +       strncpy (work, table_name, 256);
40414 +
40415 +       tbln = strstr (work, ":");
40416 +       if (tbln == NULL) {
40417 +               log_err
40418 +                   ("Malformed table name. Couldn't find separator ':' between "
40419 +                    "clusterID and lockspace name.\n");
40420 +               error = -1;
40421 +               goto fail;
40422 +       }
40423 +       *tbln++ = '\0';
40424 +
40425 +       /* make sure that the cluster name exists. */
40426 +       if (strlen (work) <= 0) {
40427 +               log_err ("Cluster name \"%s\" is too short.\n", work);
40428 +               error = -EPROTO;
40429 +               goto fail;
40430 +       }
40431 +       if (strlen (work) > 16) {
40432 +               log_err ("Cluster name \"%s\" is too long.\n", work);
40433 +               error = -EPROTO;
40434 +               goto fail;
40435 +       }
40436 +
40437 +       /* the second one is an artifact of the way I use the name.
40438 +        * A better fix to this will happen when I actually get dynamic key
40439 +        * lengths working.
40440 +        */
40441 +       if (strlen (tbln) > MIN (GIO_NAME_LEN, (GIO_KEY_SIZE - 13))) {
40442 +               log_err
40443 +                   ("Warning! lockspace name (%s) is longer than %d chars!\n",
40444 +                    tbln, MIN (GIO_NAME_LEN, (GIO_KEY_SIZE - 13)));
40445 +               error = -EPROTO;
40446 +               goto fail;
40447 +       }
40448 +       if (strlen (tbln) <= 0) {
40449 +               log_err ("Table name \"%s\" is too short.\n", tbln);
40450 +               error = -EPROTO;
40451 +               goto fail;
40452 +       }
40453 +
40454 +       /*  Check to make sure this lock table isn't already being used  */
40455 +       down (&filesystem_lck);
40456 +       list_for_each (lltmp, &filesystems_list) {
40457 +               gulm = list_entry (lltmp, gulm_fs_t, fs_list);
40458 +               if (!strncmp (gulm->fs_name, tbln, GIO_NAME_LEN)) {
40459 +                       log_err ("\"%s\" is already in use\n", tbln);
40460 +                       error = -EEXIST;
40461 +                       up (&filesystem_lck);
40462 +                       goto fail;
40463 +               }
40464 +       }
40465 +       up (&filesystem_lck);
40466 +
40467 +       /*  Set up our main structure  */
40468 +
40469 +       gulm = kmalloc (sizeof (gulm_fs_t), GFP_KERNEL);
40470 +       if (!gulm) {
40471 +               log_err ("out of memory\n");
40472 +               error = -ENOMEM;
40473 +               goto fail;
40474 +       }
40475 +       memset (gulm, 0, sizeof (gulm_fs_t));
40476 +
40477 +       INIT_LIST_HEAD (&gulm->fs_list);
40478 +
40479 +       strncpy (gulm->fs_name, tbln, GIO_NAME_LEN);
40480 +       gulm->cb = cb;
40481 +       gulm->fsdata = fsdata;
40482 +       gulm->lvb_size = min_lvb_size;
40483 +       init_completion (&gulm->sleep);
40484 +       init_MUTEX (&gulm->get_lock);
40485 +
40486 +       if ((error = start_gulm_threads (work, host_data)) != 0) {
40487 +               log_err ("Got a %d trying to start the threads.\n", error);
40488 +               goto fail_free_gulm;
40489 +       }
40490 +
40491 +       if ((error =
40492 +            start_callback_qu (&gulm->cq, gulm_cm.handler_threads)) < 0) {
40493 +               log_err ("fsid=%s: Failed to start the callback handler.\n",
40494 +                        gulm->fs_name);
40495 +               goto fail_free_gulm;
40496 +       }
40497 +
40498 +       /* the mount lock HAS to be the first thing done in the LTs for this fs. */
40499 +       error = get_mount_lock (gulm, &first);
40500 +       if (error != 0) {
40501 +               log_err
40502 +                   ("fsid=%s: Error %d while trying to get the mount lock\n",
40503 +                    gulm->fs_name, error);
40504 +               goto fail_callback;
40505 +       }
40506 +
40507 +       jid_lockstate_reserve (gulm, first);
40508 +       jid_fs_init (gulm);
40509 +       get_journalID (gulm);
40510 +
40511 +       /* things act a bit different until the first mounter is finished.
40512 +        */
40513 +       if (first)
40514 +               gulm->firstmounting = TRUE;
40515 +
40516 +       /*  Success  */
40517 +       down (&filesystem_lck);
40518 +       list_add (&gulm->fs_list, &filesystems_list);
40519 +       filesystems_count++;
40520 +       up (&filesystem_lck);
40521 +
40522 +       log_msg (lgm_JIDMap, "fsid=%s: We will be using jid %d\n",
40523 +                gulm->fs_name, gulm->fsJID);
40524 +
40525 +       if (add_to_proc (gulm) != 0) {
40526 +               /* ignored for now */
40527 +       }
40528 +
40529 +       lockstruct->ls_jid = gulm->fsJID;
40530 +       lockstruct->ls_first = first;
40531 +       lockstruct->ls_lvb_size = gulm->lvb_size;
40532 +       lockstruct->ls_lockspace = gulm;
40533 +       lockstruct->ls_ops = &gulm_ops;
40534 +#ifdef USE_SYNC_LOCKING
40535 +       lockstruct->ls_flags = 0;
40536 +
40537 +       log_msg (lgm_Network2, "Done: %s, sync mode\n", table_name);
40538 +#else
40539 +       lockstruct->ls_flags = LM_LSFLAG_ASYNC;
40540 +
40541 +       log_msg (lgm_Network2, "Done: %s, async mode\n", table_name);
40542 +#endif
40543 +
40544 +       gulm_cm.starts = FALSE;
40545 +       return 0;
40546 +
40547 +      fail_callback:
40548 +       stop_callback_qu (&gulm->cq);
40549 +
40550 +      fail_free_gulm:
40551 +       kfree (gulm);
40552 +       stop_gulm_threads ();
40553 +
40554 +      fail:
40555 +
40556 +       gulm_cm.starts = FALSE;
40557 +       log_msg (lgm_Always, "fsid=%s: Exiting gulm_mount with errors %d\n",
40558 +                table_name, error);
40559 +       return error;
40560 +}
40561 +
40562 +/**
40563 + * gulm_others_may_mount
40564 + * @lockspace: handle to specific lock space
40565 + *
40566 + * GFS calls this function if it was the first mounter after it's done
40567 + * checking all the journals.
40568 + *
40569 + */
40570 +void
40571 +gulm_others_may_mount (lm_lockspace_t * lockspace)
40572 +{
40573 +       gulm_fs_t *fs = (gulm_fs_t *) lockspace;
40574 +       int err = 0;
40575 +       lock_table_t *lt = &gulm_cm.ltpx;
40576 +
40577 +       /* first send the drop all exp message.
40578 +        * */
40579 +       err = send_drop_exp (fs, lt, NULL);
40580 +       if (err < 0)
40581 +               log_err
40582 +                   ("fsid=%s: Problems sending DropExp request to LTPX: %d\n",
40583 +                    fs->fs_name, err);
40584 +
40585 +       /* then move the FirstMountLock to shared so others can mount. */
40586 +       err = downgrade_mount_lock (fs);
40587 +
40588 +       if (err < 0) {
40589 +               log_err ("fsid=%s: error sending Fs_FinMount_Req.(%d)\n",
40590 +                        fs->fs_name, err);
40591 +       }
40592 +
40593 +       /* first mounter is all done.  let the gulm_recovery_done function
40594 +        * behave as normal now.
40595 +        */
40596 +       fs->firstmounting = FALSE;
40597 +}
40598 +
40599 +/**
40600 + * gulm_umount
40601 + * @lockspace: handle to specific lock space
40602 + *
40603 + */
40604 +void
40605 +gulm_unmount (lm_lockspace_t * lockspace)
40606 +{
40607 +       gulm_fs_t *gulm_fs = (gulm_fs_t *) lockspace;
40608 +
40609 +       down (&filesystem_lck);
40610 +       list_del (&gulm_fs->fs_list);
40611 +       --filesystems_count;
40612 +       up (&filesystem_lck);
40613 +
40614 +       /* close and release stuff */
40615 +       drop_mount_lock (gulm_fs);
40616 +       put_journalID (gulm_fs);
40617 +       jid_fs_release (gulm_fs);
40618 +       jid_lockstate_release (gulm_fs);
40619 +
40620 +       stop_callback_qu (&gulm_fs->cq);
40621 +
40622 +       remove_from_proc (gulm_fs);
40623 +
40624 +       kfree (gulm_fs);
40625 +
40626 +       stop_gulm_threads ();
40627 +
40628 +}
40629 +
40630 +/**
40631 + * gulm_recovery_done -
40632 + * @lockspace:
40633 + * @jid:
40634 + *
40635 + * Returns: void
40636 + */
40637 +void
40638 +gulm_recovery_done (lm_lockspace_t * lockspace, unsigned int jid,
40639 +                   unsigned int message)
40640 +{
40641 +       gulm_fs_t *fs = (gulm_fs_t *) lockspace;
40642 +       int err;
40643 +       uint8_t name[256];
40644 +
40645 +       if (message != LM_RD_SUCCESS) {
40646 +               /* Need to start thinking about how I want to use this... */
40647 +               return;
40648 +       }
40649 +
40650 +       if (jid == fs->fsJID) { /* this may be drifting crud through. */
40651 +               /* hey! its me! */
40652 +               strncpy (name, gulm_cm.myName, 256);
40653 +       } else if (lookup_name_by_jid (fs, jid, name) != 0) {
40654 +               log_msg (lgm_JIDMap,
40655 +                        "fsid=%s: Could not find a client for jid %d\n",
40656 +                        fs->fs_name, jid);
40657 +               return;
40658 +       }
40659 +       if (strlen (name) == 0) {
40660 +               log_msg (lgm_JIDMap, "fsid=%s: No one mapped to jid %d\n",
40661 +                        fs->fs_name, jid);
40662 +               return;
40663 +       }
40664 +       log_msg (lgm_JIDMap, "fsid=%s: Found %s for jid %d\n",
40665 +                fs->fs_name, name, jid);
40666 +
40667 +       err = send_drop_exp (fs, &gulm_cm.ltpx, name);
40668 +
40669 +       if (jid != fs->fsJID) {
40670 +               /* rather dumb to do this to ourselves right after we mount... */
40671 +               log_msg (lgm_JIDMap,
40672 +                        "fsid=%s: Clearing JID %d for use by others\n",
40673 +                        fs->fs_name, jid);
40674 +               release_JID (fs, jid, FALSE);
40675 +       }
40676 +
40677 +       /* If someone died while replaying someoneelse's journal, there will be
40678 +        * stale expired jids.
40679 +        */
40680 +       check_for_stale_expires (fs);
40681 +
40682 +}
40683 +/* vim: set ai cin noet sw=8 ts=8 : */
40684 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c
40685 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c      1969-12-31 18:00:00.000000000 -0600
40686 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c   2004-06-16 12:03:21.957894998 -0500
40687 @@ -0,0 +1,806 @@
40688 +/******************************************************************************
40689 +*******************************************************************************
40690 +**
40691 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
40692 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
40693 +**
40694 +**  This copyrighted material is made available to anyone wishing to use,
40695 +**  modify, copy, or redistribute it subject to the terms and conditions
40696 +**  of the GNU General Public License v.2.
40697 +**
40698 +*******************************************************************************
40699 +******************************************************************************/
40700 +
40701 +#include "gulm.h"
40702 +
40703 +#include <linux/kernel.h>
40704 +#include <linux/fs.h>
40705 +#include <linux/slab.h>
40706 +#include <linux/file.h>
40707 +#define __KERNEL_SYSCALLS__
40708 +#include <linux/unistd.h>
40709 +
40710 +#include "util.h"
40711 +
40712 +extern gulm_cm_t gulm_cm;
40713 +
40714 +/****************************************************************************/
40715 +
40716 +/* jid locks:
40717 + *
40718 + * Header lock: "JHeader" + \0\0\0 + fsname
40719 + *         lvb: <uint32> :number of JIDs
40720 + * Mappinglock: "JM" + <uint32> + \0\0\0\0 + fsname
40721 + *         lvb: [012] + <node name>
40722 + *              0: unused
40723 + *              1: replaying journal
40724 + *              2: Mounted
40725 + * list lock  : "JL" + "listlock" + fsname
40726 + * Node Locks : "JN" + <nodename[8]> + fsname
40727 + *
40728 + */
40729 +#define jid_header_lvb_size (8)
40730 +
40731 +struct jid_lookup_item_s {
40732 +       struct list_head jp_list;
40733 +       uint8_t *key;
40734 +       uint16_t keylen;
40735 +       uint8_t *lvb;
40736 +       uint16_t lvblen;
40737 +       struct completion waitforit;
40738 +};
40739 +typedef struct jid_lookup_item_s jid_lookup_item_t;
40740 +
40741 +LIST_HEAD (jid_pending_locks);
40742 +spinlock_t jid_pending;
40743 +struct semaphore jid_listlock;
40744 +
40745 +/**
40746 + * jid_init -
40747 + */
40748 +void
40749 +jid_init (void)
40750 +{
40751 +       spin_lock_init (&jid_pending);
40752 +       init_MUTEX (&jid_listlock);
40753 +}
40754 +
40755 +/**
40756 + * jid_get_header_name -
40757 + * @fs: <
40758 + * @key: <>
40759 + * @keylen: <>
40760 + *
40761 + * key is buffer to write to, keylen is size of buffer on input, and real
40762 + * length on output.
40763 + *
40764 + * Returns: int
40765 + */
40766 +int
40767 +jid_get_header_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen)
40768 +{
40769 +       int len;
40770 +       len = strlen (fsname);
40771 +       if ((len + 11) > *keylen)
40772 +               return -EINVAL;
40773 +       memcpy (key, "JHeader\0\0\0", 10);
40774 +       memcpy (&key[10], fsname, len + 1);
40775 +       *keylen = len + 11;
40776 +       return 0;
40777 +}
40778 +
40779 +int
40780 +jid_get_listlock_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen)
40781 +{
40782 +       int len;
40783 +       len = strlen (fsname);
40784 +       if ((len + 11) > *keylen)
40785 +               return -EINVAL;
40786 +       memcpy (key, "JLlistlock", 10);
40787 +       memcpy (&key[10], fsname, len + 1);
40788 +       *keylen = len + 11;
40789 +       return 0;
40790 +}
40791 +
40792 +/**
40793 + * jid_get_lock_name -
40794 + * @fs: <
40795 + * @jid: <
40796 + * @key: <>
40797 + * @keylen: <>
40798 + *
40799 + * key is buffer to write to, keylen is size of buffer on input, and real
40800 + * length on output.
40801 + *
40802 + * Returns: int
40803 + */
40804 +int
40805 +jid_get_lock_name (uint8_t * fsname, uint32_t jid, uint8_t * key,
40806 +                  uint16_t * keylen)
40807 +{
40808 +       int len;
40809 +       len = strlen (fsname);
40810 +       if ((len + 11) > *keylen)
40811 +               return -EINVAL;
40812 +       key[0] = 'J';
40813 +       key[1] = 'M';
40814 +       key[5] = (jid >> 24) & 0xff;
40815 +       key[4] = (jid >> 16) & 0xff;
40816 +       key[3] = (jid >> 8) & 0xff;
40817 +       key[2] = (jid >> 0) & 0xff;
40818 +       key[6] = 0;
40819 +       key[7] = 0;
40820 +       key[8] = 0;
40821 +       key[9] = 0;
40822 +       memcpy (&key[10], fsname, len + 1);
40823 +       *keylen = len + 11;
40824 +       return 0;
40825 +}
40826 +
40827 +/**
40828 + * jid_hold_lvb -
40829 + * @key:
40830 + * @keylen:
40831 + *
40832 + *
40833 + */
40834 +void
40835 +jid_hold_lvb (uint8_t * key, uint16_t keylen)
40836 +{
40837 +       jid_lookup_item_t jp;
40838 +       GULM_ASSERT (keylen > 6,);
40839 +       jp.key = key;
40840 +       jp.keylen = keylen;
40841 +       jp.lvb = NULL;
40842 +       jp.lvblen = 0;
40843 +       INIT_LIST_HEAD (&jp.jp_list);
40844 +       init_completion (&jp.waitforit);
40845 +
40846 +       spin_lock (&jid_pending);
40847 +       list_add (&jp.jp_list, &jid_pending_locks);
40848 +       spin_unlock (&jid_pending);
40849 +
40850 +       lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_HoldLVB,
40851 +                           NULL, 0);
40852 +
40853 +       wait_for_completion (&jp.waitforit);
40854 +}
40855 +
40856 +void
40857 +jid_unhold_lvb (uint8_t * key, uint16_t keylen)
40858 +{
40859 +       jid_lookup_item_t jp;
40860 +       GULM_ASSERT (keylen > 6,);
40861 +       jp.key = key;
40862 +       jp.keylen = keylen;
40863 +       jp.lvb = NULL;
40864 +       jp.lvblen = 0;
40865 +       INIT_LIST_HEAD (&jp.jp_list);
40866 +       init_completion (&jp.waitforit);
40867 +
40868 +       spin_lock (&jid_pending);
40869 +       list_add (&jp.jp_list, &jid_pending_locks);
40870 +       spin_unlock (&jid_pending);
40871 +
40872 +       lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_UnHoldLVB,
40873 +                           NULL, 0);
40874 +
40875 +       wait_for_completion (&jp.waitforit);
40876 +}
40877 +
40878 +void
40879 +jid_sync_lvb (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen)
40880 +{
40881 +       jid_lookup_item_t jp;
40882 +       GULM_ASSERT (keylen > 6,);
40883 +       jp.key = key;
40884 +       jp.keylen = keylen;
40885 +       jp.lvb = NULL;
40886 +       jp.lvblen = 0;
40887 +       INIT_LIST_HEAD (&jp.jp_list);
40888 +       init_completion (&jp.waitforit);
40889 +
40890 +       spin_lock (&jid_pending);
40891 +       list_add (&jp.jp_list, &jid_pending_locks);
40892 +       spin_unlock (&jid_pending);
40893 +
40894 +       lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_SyncLVB,
40895 +                           lvb, lvblen);
40896 +
40897 +       wait_for_completion (&jp.waitforit);
40898 +}
40899 +
40900 +/**
40901 + * jid_action_reply -
40902 + * @key:
40903 + * @keylen:
40904 + *
40905 + * called from the lock handler callback.
40906 + *
40907 + * Returns: void
40908 + */
40909 +void
40910 +jid_action_reply (uint8_t * key, uint16_t keylen)
40911 +{
40912 +       struct list_head *tmp, *nxt;
40913 +       jid_lookup_item_t *jp, *fnd = NULL;
40914 +       spin_lock (&jid_pending);
40915 +       list_for_each_safe (tmp, nxt, &jid_pending_locks) {
40916 +               jp = list_entry (tmp, jid_lookup_item_t, jp_list);
40917 +               if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) {
40918 +                       fnd = jp;
40919 +                       list_del (tmp);
40920 +                       break;
40921 +               }
40922 +       }
40923 +       spin_unlock (&jid_pending);
40924 +
40925 +       if (fnd != NULL)
40926 +               complete (&fnd->waitforit);
40927 +}
40928 +
40929 +/**
40930 + * jid_get_lock_state_inr -
40931 + * @key:
40932 + * @keylen:
40933 + * @state:
40934 + * @flags:
40935 + * @lvb:
40936 + * @lvblen:
40937 + *
40938 + *
40939 + */
40940 +void
40941 +jid_get_lock_state_inr (uint8_t * key, uint16_t keylen, uint8_t state,
40942 +                       uint32_t flags, uint8_t * lvb, uint16_t lvblen)
40943 +{
40944 +       jid_lookup_item_t jp;
40945 +       GULM_ASSERT (keylen > 6,);
40946 +       jp.key = key;
40947 +       jp.keylen = keylen;
40948 +       jp.lvb = lvb;
40949 +       jp.lvblen = lvblen;
40950 +       INIT_LIST_HEAD (&jp.jp_list);
40951 +       init_completion (&jp.waitforit);
40952 +
40953 +       spin_lock (&jid_pending);
40954 +       list_add (&jp.jp_list, &jid_pending_locks);
40955 +       spin_unlock (&jid_pending);
40956 +
40957 +       lg_lock_state_req (gulm_cm.hookup, key, keylen, state, flags, lvb, lvblen);
40958 +
40959 +       wait_for_completion (&jp.waitforit);
40960 +}
40961 +
40962 +/**
40963 + * jid_get_lock_state_lvb -
40964 + * @key:
40965 + * @keylen:
40966 + * @state:
40967 + * @lvb:
40968 + * @lvblen:
40969 + *
40970 + *
40971 + */
40972 +void
40973 +jid_get_lock_state_lvb (uint8_t * key, uint16_t keylen, uint8_t state,
40974 +                       uint8_t * lvb, uint16_t lvblen)
40975 +{
40976 +       jid_get_lock_state_inr (key, keylen, state, 0, lvb, lvblen);
40977 +}
40978 +/**
40979 + * jid_get_lock_state -
40980 + * @key:
40981 + * @keylen:
40982 + * @state:
40983 + *
40984 + *
40985 + */
40986 +void
40987 +jid_get_lock_state (uint8_t * key, uint16_t keylen, uint8_t state)
40988 +{
40989 +       jid_get_lock_state_inr (key, keylen, state, 0, NULL, 0);
40990 +}
40991 +
40992 +/**
40993 + * jid_state_reply -
40994 + * @key:
40995 + * @keylen:
40996 + * @lvb:
40997 + * @lvblen:
40998 + *
40999 + *
41000 + */
41001 +void
41002 +jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen)
41003 +{
41004 +       struct list_head *tmp, *nxt;
41005 +       jid_lookup_item_t *jp, *fnd = NULL;
41006 +       spin_lock (&jid_pending);
41007 +       list_for_each_safe (tmp, nxt, &jid_pending_locks) {
41008 +               jp = list_entry (tmp, jid_lookup_item_t, jp_list);
41009 +               if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) {
41010 +                       fnd = jp;
41011 +                       list_del (tmp);
41012 +                       break;
41013 +               }
41014 +       }
41015 +       spin_unlock (&jid_pending);
41016 +
41017 +       if (fnd != NULL) {
41018 +               if (lvb != NULL && fnd->lvb != NULL)
41019 +                       memcpy (fnd->lvb, lvb, MIN (fnd->lvblen, lvblen));
41020 +               complete (&fnd->waitforit);
41021 +       }
41022 +}
41023 +
41024 +/****************************************************************************/
41025 +
41026 +/**
41027 + * jid_hold_list_lock -
41028 + * @fs:
41029 + *
41030 + * only make one call to this per node.
41031 + *
41032 + * Returns: void
41033 + */
41034 +void
41035 +jid_hold_list_lock (gulm_fs_t * fs)
41036 +{
41037 +       uint8_t key[GIO_KEY_SIZE];
41038 +       uint16_t keylen;
41039 +
41040 +       down (&jid_listlock);
41041 +
41042 +       keylen = sizeof (key);
41043 +       jid_get_listlock_name (fs->fs_name, key, &keylen);
41044 +       jid_get_lock_state (key, keylen, lg_lock_state_Exclusive);
41045 +
41046 +}
41047 +
41048 +/**
41049 + * jid_release_list_lock -
41050 + * @fs:
41051 + *
41052 + *
41053 + * Returns: void
41054 + */
41055 +void
41056 +jid_release_list_lock (gulm_fs_t * fs)
41057 +{
41058 +       uint8_t key[GIO_KEY_SIZE];
41059 +       uint16_t keylen;
41060 +
41061 +       keylen = sizeof (key);
41062 +       jid_get_listlock_name (fs->fs_name, key, &keylen);
41063 +       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41064 +
41065 +       up (&jid_listlock);
41066 +}
41067 +
41068 +/**
41069 + * jid_rehold_lvbs -
41070 + * @fs:
41071 + *
41072 + *
41073 + */
41074 +void
41075 +jid_rehold_lvbs (gulm_fs_t * fs)
41076 +{
41077 +       int i;
41078 +       uint32_t oldjcnt;
41079 +       uint8_t key[GIO_KEY_SIZE], lvb[jid_header_lvb_size];
41080 +       uint16_t keylen = GIO_KEY_SIZE;
41081 +
41082 +       oldjcnt = fs->JIDcount;
41083 +
41084 +       jid_get_header_name (fs->fs_name, key, &keylen);
41085 +       jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb,
41086 +                               jid_header_lvb_size);
41087 +       fs->JIDcount = (uint32_t) (lvb[0]) << 0;
41088 +       fs->JIDcount |= (uint32_t) (lvb[1]) << 8;
41089 +       fs->JIDcount |= (uint32_t) (lvb[2]) << 16;
41090 +       fs->JIDcount |= (uint32_t) (lvb[3]) << 24;
41091 +
41092 +       for (i = oldjcnt; i < fs->JIDcount; i++) {
41093 +               keylen = sizeof (key);
41094 +               jid_get_lock_name (fs->fs_name, i, key, &keylen);
41095 +               jid_hold_lvb (key, keylen);
41096 +       }
41097 +
41098 +}
41099 +
41100 +void
41101 +jid_grow_space (gulm_fs_t * fs)
41102 +{
41103 +       uint8_t key[GIO_KEY_SIZE], lvb[jid_header_lvb_size];
41104 +       uint16_t keylen = GIO_KEY_SIZE;
41105 +       uint32_t jidc;
41106 +
41107 +       keylen = sizeof (key);
41108 +       jid_get_header_name (fs->fs_name, key, &keylen);
41109 +       jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb,
41110 +                               jid_header_lvb_size);
41111 +       jidc = (uint32_t) (lvb[0]) << 0;
41112 +       jidc |= (uint32_t) (lvb[1]) << 8;
41113 +       jidc |= (uint32_t) (lvb[2]) << 16;
41114 +       jidc |= (uint32_t) (lvb[3]) << 24;
41115 +       jidc += 10;
41116 +       lvb[3] = (jidc >> 24) & 0xff;
41117 +       lvb[2] = (jidc >> 16) & 0xff;
41118 +       lvb[1] = (jidc >> 8) & 0xff;
41119 +       lvb[0] = (jidc >> 0) & 0xff;
41120 +       jid_sync_lvb (key, keylen, lvb, jid_header_lvb_size);
41121 +       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41122 +       /* do an unlock here, so that when rehold grabs it shared, there is no
41123 +        * lvb writing.
41124 +        */
41125 +
41126 +       jid_rehold_lvbs (fs);
41127 +}
41128 +
41129 +/**
41130 + * lookup_name_by_jid -
41131 + * @fs:
41132 + * @jid:
41133 + * @name:
41134 + *
41135 + *
41136 + * Returns: int
41137 + */
41138 +int
41139 +lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name)
41140 +{
41141 +       uint8_t key[GIO_KEY_SIZE], lvb[64];
41142 +       uint16_t keylen = 64;
41143 +       int err = 0;
41144 +
41145 +       if (jid >= fs->JIDcount) {
41146 +               err = -1;
41147 +               goto exit;
41148 +       }
41149 +
41150 +       jid_hold_list_lock (fs);
41151 +
41152 +       jid_get_lock_name (fs->fs_name, jid, key, &keylen);
41153 +       jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb, 64);
41154 +
41155 +       if (lvb[0] != 0) {
41156 +               memcpy (name, &lvb[1], strlen (&lvb[1]) + 1);
41157 +       } else {
41158 +               err = -1;
41159 +       }
41160 +
41161 +       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41162 +
41163 +       jid_release_list_lock (fs);
41164 +
41165 +      exit:
41166 +       return err;
41167 +}
41168 +
41169 +/**
41170 + * Release_JID -
41171 + * @fs:
41172 + * @jid:
41173 + *
41174 + * actually may only need to et first byte to zero
41175 + *
41176 + * Returns: int
41177 + */
41178 +int
41179 +release_JID (gulm_fs_t * fs, uint32_t jid, int nop)
41180 +{
41181 +       uint8_t key[GIO_KEY_SIZE], lvb[64];
41182 +       uint16_t keylen = 64;
41183 +
41184 +       /* there is no such, so this becomes a nop. */
41185 +       if (jid >= fs->JIDcount)
41186 +               goto exit;
41187 +
41188 +       jid_hold_list_lock (fs);
41189 +
41190 +       jid_get_lock_name (fs->fs_name, jid, key, &keylen);
41191 +       jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb, 64);
41192 +       lvb[0] = 0;
41193 +       jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2);
41194 +       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41195 +
41196 +       jid_release_list_lock (fs);
41197 +
41198 +      exit:
41199 +       return 0;
41200 +}
41201 +
41202 +void
41203 +put_journalID (gulm_fs_t * fs)
41204 +{
41205 +       release_JID (fs, fs->fsJID, TRUE);
41206 +}
41207 +
41208 +/**
41209 + * get_journalID -
41210 + * @fs:
41211 + * @jid:
41212 + *
41213 + * This is broken.
41214 + *
41215 + * Returns: int
41216 + */
41217 +void
41218 +get_journalID (gulm_fs_t * fs)
41219 +{
41220 +       uint32_t i = 0;
41221 +       uint8_t key[GIO_KEY_SIZE], lvb[64];
41222 +       uint16_t keylen;
41223 +       int first_clear = -1;
41224 +
41225 +      retry:
41226 +       jid_hold_list_lock (fs);
41227 +
41228 +       /* find an empty space, or ourselves again */
41229 +       for (i = 0; i < fs->JIDcount; i++) {
41230 +               keylen = sizeof (key);
41231 +               jid_get_lock_name (fs->fs_name, i, key, &keylen);
41232 +               jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
41233 +                                       lvb, 64);
41234 +               jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41235 +               if (first_clear == -1 && lvb[0] == 0 ) {
41236 +                       first_clear = i;
41237 +               } else if (strcmp (gulm_cm.myName, &lvb[1]) == 0) {
41238 +                       first_clear = i;
41239 +                       break;
41240 +               }
41241 +       }
41242 +       if (first_clear >= 0) {
41243 +               /* take the jid we have found */
41244 +               keylen = sizeof (key);
41245 +               jid_get_lock_name (fs->fs_name, first_clear, key, &keylen);
41246 +               jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
41247 +                                       lvb, 64);
41248 +               lvb[0] = 2;
41249 +               memcpy (&lvb[1], gulm_cm.myName, strlen (gulm_cm.myName) + 1);
41250 +               jid_sync_lvb (key, keylen, lvb, strlen (gulm_cm.myName) + 2);
41251 +               jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41252 +
41253 +               fs->fsJID = first_clear;
41254 +       }
41255 +
41256 +       /* unlock the header lock */
41257 +       jid_release_list_lock (fs);
41258 +
41259 +       if (first_clear < 0) {
41260 +               /* nothing found, grow and try again. */
41261 +               jid_grow_space (fs);
41262 +               goto retry;
41263 +       }
41264 +
41265 +}
41266 +
41267 +/**
41268 + * find_jid_by_name_and_mark_replay -
41269 + * @fs:
41270 + * @name:
41271 + * @jid:
41272 + *
41273 + *
41274 + * Returns: int
41275 + */
41276 +int
41277 +find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name,
41278 +                                 uint32_t * jid)
41279 +{
41280 +       uint32_t i, found = -1;
41281 +       uint8_t key[GIO_KEY_SIZE], lvb[64];
41282 +       uint16_t keylen;
41283 +
41284 +       /* grab list lock */
41285 +       jid_hold_list_lock (fs);
41286 +
41287 +       for (i = 0; i < fs->JIDcount; i++) {
41288 +               keylen = sizeof (key);
41289 +               jid_get_lock_name (fs->fs_name, i, key, &keylen);
41290 +               jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
41291 +                                       lvb, 64);
41292 +               if (strcmp (name, &lvb[1]) == 0) {
41293 +                       *jid = i;
41294 +                       found = 0;
41295 +                       lvb[0] = 1;
41296 +                       jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2);
41297 +                       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41298 +                       break;
41299 +               }
41300 +               jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41301 +
41302 +       }
41303 +       /* unlock the list lock */
41304 +       jid_release_list_lock (fs);
41305 +
41306 +       return found;
41307 +}
41308 +
41309 +/**
41310 + * Check_for_replays -
41311 + * @fs:
41312 + *
41313 + *
41314 + * Returns: int
41315 + */
41316 +void
41317 +check_for_stale_expires (gulm_fs_t * fs)
41318 +{
41319 +       uint32_t i;
41320 +       uint8_t key[GIO_KEY_SIZE], lvb[64];
41321 +       uint16_t keylen;
41322 +       unsigned int ujid;
41323 +
41324 +       /* grab list lock */
41325 +       jid_hold_list_lock (fs);
41326 +
41327 +       for (i = 0; i < fs->JIDcount; i++) {
41328 +               keylen = sizeof (key);
41329 +               jid_get_lock_name (fs->fs_name, i, key, &keylen);
41330 +               jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb,
41331 +                                       64);
41332 +               jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41333 +
41334 +               if (lvb[0] == 1) {
41335 +                       log_msg (lgm_JIDMap,
41336 +                                "fsid=%s: stale JID %d found\n",
41337 +                                fs->fs_name, i);
41338 +                       ujid = i;
41339 +                       fs->cb (fs->fsdata, LM_CB_NEED_RECOVERY, &ujid);
41340 +               }
41341 +       }
41342 +
41343 +       /* unlock the list lock */
41344 +       jid_release_list_lock (fs);
41345 +}
41346 +
41347 +/**
41348 + * jid_fs_init -
41349 + * @fs:
41350 + *
41351 + */
41352 +void
41353 +jid_fs_init (gulm_fs_t * fs)
41354 +{
41355 +       uint8_t key[GIO_KEY_SIZE];
41356 +       uint16_t keylen = GIO_KEY_SIZE;
41357 +
41358 +       fs->JIDcount = 0;
41359 +
41360 +       jid_get_header_name (fs->fs_name, key, &keylen);
41361 +       jid_hold_lvb (key, keylen);
41362 +       jid_rehold_lvbs (fs);
41363 +}
41364 +
41365 +/**
41366 + * jid_fs_release -
41367 + * @fs:
41368 + *
41369 + */
41370 +void
41371 +jid_fs_release (gulm_fs_t * fs)
41372 +{
41373 +       uint32_t i;
41374 +       uint8_t key[GIO_KEY_SIZE];
41375 +       uint16_t keylen;
41376 +       for (i = 0; i < fs->JIDcount; i++) {
41377 +               keylen = sizeof (key);
41378 +               jid_get_lock_name (fs->fs_name, i, key, &keylen);
41379 +               jid_unhold_lvb (key, keylen);
41380 +       }
41381 +       keylen = sizeof (key);
41382 +       jid_get_header_name (fs->fs_name, key, &keylen);
41383 +       jid_unhold_lvb (key, keylen);
41384 +       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41385 +}
41386 +
41387 +/**
41388 + * jid_unlock_callback -
41389 + * @d:
41390 + *
41391 + * *MUST* be called from a Handler thread.
41392 + *
41393 + * Returns: int
41394 + */
41395 +void
41396 +jid_unlock_callback (void *d)
41397 +{
41398 +       gulm_fs_t *fs = (gulm_fs_t *) d;
41399 +       jid_rehold_lvbs (fs);
41400 +}
41401 +
41402 +/**
41403 + * jid_header_lock_drop -
41404 + * @key:
41405 + * @keylen:
41406 + *
41407 + * Returns: void
41408 + */
41409 +void
41410 +jid_header_lock_drop (uint8_t * key, uint16_t keylen)
41411 +{
41412 +       gulm_fs_t *fs;
41413 +       /* make sure this is the header lock.... */
41414 +       if (key[1] == 'H' && (fs = get_fs_by_name (&key[10])) != NULL) {
41415 +               qu_function_call (&fs->cq, jid_unlock_callback, fs);
41416 +       }
41417 +}
41418 +
41419 +/****************************************************************************/
41420 +/**
41421 + * jid_get_lsresv_name -
41422 + * @fsname:
41423 + * @key:
41424 + * @keylen:
41425 + *
41426 + *
41427 + * Returns: int
41428 + */
41429 +int
41430 +jid_get_lsresv_name (char *fsname, uint8_t * key, uint16_t * keylen)
41431 +{
41432 +       int len;
41433 +
41434 +       key[0] = 'J';
41435 +       key[1] = 'N';
41436 +       len = strlen (gulm_cm.myName) + 1;
41437 +       memset (&key[2], 0, 8);
41438 +       memcpy ((&key[2]), gulm_cm.myName, MIN (len, 8));
41439 +       /* fsname starts at byte 10 so the dropexp pattern will find it. */
41440 +       memcpy ((&key[10]), fsname, strlen (fsname) + 1);
41441 +
41442 +       *keylen = 10 + strlen (fsname) + 1;
41443 +
41444 +       return 0;
41445 +}
41446 +
41447 +/**
41448 + * jid_lockstate_reserve -
41449 + * @fs:
41450 + *
41451 + *
41452 + * Returns: void
41453 + */
41454 +void
41455 +jid_lockstate_reserve (gulm_fs_t * fs, int first)
41456 +{
41457 +       uint8_t key[GIO_KEY_SIZE];
41458 +       uint16_t keylen;
41459 +
41460 +       jid_get_lsresv_name (fs->fs_name, key, &keylen);
41461 +
41462 +       /* if we are expired, this will block until someone else has cleaned our
41463 +        * last mess up.
41464 +        *
41465 +        * Will very well may need to put in some kind of timeout otherwise this
41466 +        * may do a forever lockup much like the FirstMounter lock had.
41467 +        */
41468 +       jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
41469 +                       first?lg_lock_flag_IgnoreExp:0, NULL, 0);
41470 +
41471 +}
41472 +
41473 +/**
41474 + * jid_lockstate_release -
41475 + * @fs:
41476 + *
41477 + *
41478 + * Returns: void
41479 + */
41480 +void
41481 +jid_lockstate_release (gulm_fs_t * fs)
41482 +{
41483 +       uint8_t key[GIO_KEY_SIZE];
41484 +       uint16_t keylen;
41485 +
41486 +       jid_get_lsresv_name (fs->fs_name, key, &keylen);
41487 +
41488 +       jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41489 +
41490 +}
41491 +
41492 +
41493 +/* vim: set ai cin noet sw=8 ts=8 : */
41494 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h
41495 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h      1969-12-31 18:00:00.000000000 -0600
41496 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h   2004-06-16 12:03:21.957894998 -0500
41497 @@ -0,0 +1,41 @@
41498 +/******************************************************************************
41499 +*******************************************************************************
41500 +**
41501 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
41502 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
41503 +**
41504 +**  This copyrighted material is made available to anyone wishing to use,
41505 +**  modify, copy, or redistribute it subject to the terms and conditions
41506 +**  of the GNU General Public License v.2.
41507 +**
41508 +*******************************************************************************
41509 +******************************************************************************/
41510 +
41511 +#ifndef __GULM_JID_H__
41512 +#define __GULM_JID_H__
41513 +#include "gulm.h"
41514 +void jid_init (void);
41515 +void jid_fs_init (gulm_fs_t * fs);
41516 +void jid_fs_release (gulm_fs_t * fs);
41517 +int get_journalID (gulm_fs_t * fs);
41518 +int lookup_jid_by_name (gulm_fs_t * fs, uint8_t * name, uint32_t * injid);
41519 +int lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name);
41520 +void release_JID (gulm_fs_t * fs, uint32_t jid, int owner);
41521 +void put_journalID (gulm_fs_t * fs);
41522 +void check_for_stale_expires (gulm_fs_t * fs);
41523 +
41524 +int
41525 + find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name, uint32_t * jid);
41526 +
41527 +void jid_start_journal_reply (gulm_fs_t * fs, uint32_t jid);
41528 +void jid_finish_journal_reply (gulm_fs_t * fs, uint32_t jid);
41529 +
41530 +void jid_lockstate_reserve (gulm_fs_t * fs, int first);
41531 +void jid_lockstate_release (gulm_fs_t * fs);
41532 +
41533 +/* to be called from the lg_lock callbacks. */
41534 +void jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb,
41535 +                     uint16_t lvblen);
41536 +void jid_action_reply (uint8_t * key, uint16_t keylen);
41537 +void jid_header_lock_drop (uint8_t * key, uint16_t keylen);
41538 +#endif /*__GULM_JID_H__*/
41539 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h
41540 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h     1969-12-31 18:00:00.000000000 -0600
41541 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h  2004-06-16 12:03:21.957894998 -0500
41542 @@ -0,0 +1,40 @@
41543 +/******************************************************************************
41544 +*******************************************************************************
41545 +**
41546 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
41547 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
41548 +**
41549 +**  This copyrighted material is made available to anyone wishing to use,
41550 +**  modify, copy, or redistribute it subject to the terms and conditions
41551 +**  of the GNU General Public License v.2.
41552 +**
41553 +*******************************************************************************
41554 +******************************************************************************/
41555 +
41556 +#ifndef __gulm_log_msg_bits_h__
41557 +#define __gulm_log_msg_bits_h__
41558 +/* log_msg bit flags
41559 + * These got thier own file so I can easily include them in both user and
41560 + * kernel space.
41561 + * */
41562 +#define lgm_Always      (0x00000000)   /*Print Message no matter what */
41563 +#define lgm_Network     (0x00000001)
41564 +#define lgm_Network2    (0x00000002)
41565 +#define lgm_Stomith     (0x00000004)
41566 +#define lgm_Heartbeat   (0x00000008)
41567 +#define lgm_locking     (0x00000010)
41568 +#define lgm_FuncDebug   (0x00000020)
41569 +#define lgm_Forking     (0x00000040)
41570 +#define lgm_JIDMap      (0x00000080)
41571 +#define lgm_Subscribers (0x00000100)
41572 +#define lgm_LockUpdates (0x00000200)
41573 +#define lgm_LoginLoops  (0x00000400)
41574 +#define lgm_Network3    (0x00000800)
41575 +#define lgm_JIDUpdates  (0x00001000)
41576 +#define lgm_ServerState (0x00002000)
41577 +
41578 +#define lgm_ReallyAll   (0xffffffff)
41579 +
41580 +#define lgm_BitFieldSize (32)
41581 +
41582 +#endif /*__gulm_log_msg_bits_h__*/
41583 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c
41584 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c       1969-12-31 18:00:00.000000000 -0600
41585 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c    2004-06-16 12:03:21.957894998 -0500
41586 @@ -0,0 +1,1937 @@
41587 +/******************************************************************************
41588 +*******************************************************************************
41589 +**
41590 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
41591 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
41592 +**
41593 +**  This copyrighted material is made available to anyone wishing to use,
41594 +**  modify, copy, or redistribute it subject to the terms and conditions
41595 +**  of the GNU General Public License v.2.
41596 +**
41597 +*******************************************************************************
41598 +******************************************************************************/
41599 +
41600 +#include "gulm.h"
41601 +
41602 +#include <linux/kernel.h>
41603 +#include <linux/fs.h>
41604 +#include <linux/slab.h>
41605 +#include <linux/file.h>
41606 +#define __KERNEL_SYSCALLS__
41607 +#include <linux/unistd.h>
41608 +
41609 +#include "util.h"
41610 +#include "handler.h"
41611 +#include "utils_tostr.h"
41612 +#include "gulm_jid.h"
41613 +
41614 +extern gulm_cm_t gulm_cm;
41615 +
41616 +/****************************************************************************/
41617 +/* A bunch of prints that hopefully contain more information that is also
41618 + * useful
41619 + *
41620 + * these are a mess.
41621 + */
41622 +
41623 +/**
41624 + * lck_key_to_hex -
41625 + * @key:
41626 + * @len:
41627 + * @workspace: <> place to put string. !! better be 2x len !!
41628 + *
41629 + *
41630 + * Returns: char
41631 + */
41632 +static char *
41633 +lck_key_to_hex (uint8_t * key, uint16_t len, char *workspace)
41634 +{
41635 +       int i;
41636 +       for (i = 0; i < len; i++)
41637 +               sprintf (&workspace[i * 2], "%02x", (key[i] & 0xff));
41638 +       return workspace;
41639 +}
41640 +
41641 +static void __inline__
41642 +db_lck_entered (gulm_lock_t * lck)
41643 +{
41644 +       char bb[GIO_KEY_SIZE * 2 + 3];
41645 +       lck_key_to_hex (lck->key, lck->keylen, bb);
41646 +       printk ("Started  lock 0x%s cur:%#x req:%#x flags:%#x\n", bb,
41647 +               lck->cur_state, lck->req_state, lck->flags);
41648 +}
41649 +static void __inline__
41650 +db_lck_exited (gulm_lock_t * lck)
41651 +{
41652 +       char bb[GIO_KEY_SIZE * 2 + 3];
41653 +       lck_key_to_hex (lck->key, lck->keylen, bb);
41654 +       printk ("Finished lock 0x%s result:%#x\n", bb, lck->result);
41655 +}
41656 +
41657 +static void __inline__
41658 +dump_gulm_lock_t (gulm_lock_t * lck)
41659 +{
41660 +       char bb[GIO_KEY_SIZE * 2 + 3];
41661 +
41662 +       lck_key_to_hex (lck->key, lck->keylen, bb);
41663 +       log_msg (lgm_Always, " key = 0x%s\n", bb);
41664 +       log_msg (lgm_Always, " req_type = %#x\n", lck->req_type);
41665 +       log_msg (lgm_Always, " last_suc_state = %#x\n", lck->last_suc_state);
41666 +       log_msg (lgm_Always, " actuallypending = %d\n", lck->actuallypending);
41667 +       log_msg (lgm_Always, " in_to_be_sent = %d\n", lck->in_to_be_sent);
41668 +       log_msg (lgm_Always, " cur_state = %d\n", lck->cur_state);
41669 +       log_msg (lgm_Always, " req_state = %d\n", lck->req_state);
41670 +       log_msg (lgm_Always, " flags = %#x\n", lck->flags);
41671 +       log_msg (lgm_Always, " action = %d\n", lck->action);
41672 +       log_msg (lgm_Always, " result = %d\n", lck->result);
41673 +}
41674 +
41675 +/* DEBUG_BY_LOCK is gone.  I may later add something back if needed.
41676 + *
41677 + * I love the idea of being able to log only certain locks, I just cannot
41678 + * think of an easy way to do it.  The best I can come up with is some
41679 + * pattern (or set of) that are used to decide which locks get logged.  But
41680 + * that could be expensive if the pattern is checked everytime, and won't
41681 + * behave as expected if only applied in get_lock.
41682 + * */
41683 +
41684 +/* The old log functions.
41685 + * These need their own sort of clean up someday as well.
41686 + * */
41687 +#define log_msg_lk(key, keylen, fmt, args...) {\
41688 +      uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41689 +      lck_key_to_hex( key, keylen, bb); \
41690 +      printk(PROTO_NAME ": On lock 0x%s " fmt , bb , ## args ); \
41691 +   }
41692 +
41693 +#define log_err_lk(key, keylen, fmt, args...) {\
41694 +      uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41695 +      lck_key_to_hex( key, keylen, bb); \
41696 +      printk(KERN_ERR PROTO_NAME ": ERROR On lock 0x%s " fmt , bb , ## args ); \
41697 +   }
41698 +
41699 +#define log_msg_lck(lck, fmt, args...) {\
41700 +      uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41701 +      lck_key_to_hex( (lck)->key, (lck)->keylen, bb); \
41702 +      printk(PROTO_NAME ": On lock 0x%s " fmt , bb , ## args ); \
41703 +   }
41704 +
41705 +#define log_err_lck(lck, fmt, args...) {\
41706 +      uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41707 +      lck_key_to_hex( (lck)->key, (lck)->keylen, bb); \
41708 +      printk(KERN_ERR PROTO_NAME ": ERROR On lock 0x%s " fmt , bb , ## args ); \
41709 +   }
41710 +
41711 +#ifdef DEBUG_LVB
41712 +static void __inline__
41713 +print_lk_lvb (uint8_t * key, uint8_t * lvb, uint8_t st, uint8_t * dir)
41714 +{
41715 +       uint8_t bk[GIO_KEY_SIZE * 2 + 3];
41716 +       uint8_t bl[GIO_LVB_SIZE * 2 + 3];
41717 +       int i;
41718 +       for (i = 0; i < GIO_KEY_SIZE; i++)
41719 +               sprintf (&bk[(i * 2)], "%02x", (key[i]) & 0xff);
41720 +       for (i = 0; i < GIO_LVB_SIZE; i++)
41721 +               sprintf (&bl[(i * 2)], "%02x", (lvb[i]) & 0xff);
41722 +       printk (PROTO_NAME ": On lock 0x%s with state %d\n\t%s LVB 0x%s\n",
41723 +               bk, st, dir, bl);
41724 +}
41725 +
41726 +#define lvb_log_msg_lk(k, fmt, args...) log_msg_lk( k , fmt , ## args )
41727 +#define lvb_log_msg(fmt, args...) log_msg(lgm_Always , fmt , ## args )
41728 +#else                          /*DEBUG_LVB */
41729 +#define print_lk_lvb(k,l,s,d)
41730 +#define lvb_log_msg_lk(k, fmt, args...)
41731 +#define lvb_log_msg(fmt, args...)
41732 +#endif                         /*DEBUG_LVB */
41733 +
41734 +/****************************************************************************/
41735 +/**
41736 + * find_and_mark_lock -
41737 + * @key:
41738 + * @keylen:
41739 + * @lockp:
41740 + *
41741 + * looks for a lock struct of key.  If found, marks it.
41742 + *
41743 + * Returns: TRUE or FALSE
41744 + */
41745 +int
41746 +find_and_mark_lock (uint8_t * key, uint8_t keylen, gulm_lock_t ** lockp)
41747 +{
41748 +       int found = FALSE;
41749 +       uint32_t bkt;
41750 +       gulm_lock_t *lck = NULL;
41751 +       struct list_head *tmp;
41752 +
41753 +       /* now find the lock */
41754 +       bkt = hash_lock_key (key, keylen);
41755 +       bkt %= gulm_cm.ltpx.hashbuckets;
41756 +
41757 +       spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
41758 +       list_for_each (tmp, &gulm_cm.ltpx.lkhsh[bkt]) {
41759 +               lck = list_entry (tmp, gulm_lock_t, gl_list);
41760 +               if (memcmp (lck->key, key, keylen) == 0) {
41761 +                       found = TRUE;
41762 +                       atomic_inc (&lck->count);
41763 +                       break;
41764 +               }
41765 +       }
41766 +       spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
41767 +
41768 +       if (found)
41769 +               *lockp = lck;
41770 +
41771 +       return found;
41772 +}
41773 +
41774 +/**
41775 + * mark_lock -
41776 + * @lck:
41777 + *
41778 + * like above, but since we have the lock, don't search for it.
41779 + *
41780 + * Returns: int
41781 + */
41782 +void __inline__
41783 +mark_lock (gulm_lock_t * lck)
41784 +{
41785 +       atomic_inc (&lck->count);
41786 +}
41787 +
41788 +/**
41789 + * unmark_and_release_lock -
41790 + * @lck:
41791 + *
41792 + * decrement the counter on a lock, freeing it if it reaches 0.
41793 + * (also removes it from the hash table)
41794 + *
41795 + * TRUE if lock was freed.
41796 + *
41797 + * Returns: TRUE or FALSE
41798 + */
41799 +int
41800 +unmark_and_release_lock (gulm_lock_t * lck)
41801 +{
41802 +       uint32_t bkt;
41803 +       int deld = FALSE;
41804 +
41805 +       bkt = hash_lock_key (lck->key, lck->keylen);
41806 +       bkt %= gulm_cm.ltpx.hashbuckets;
41807 +       spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
41808 +       if (atomic_dec_and_test (&lck->count)) {
41809 +               list_del (&lck->gl_list);
41810 +               deld = TRUE;
41811 +       }
41812 +       spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
41813 +       if (deld) {
41814 +               gulm_cm.ltpx.locks_total--;
41815 +               gulm_cm.ltpx.locks_unl--;
41816 +               if (lck->lvb != NULL) {
41817 +                       kfree (lck->lvb);
41818 +               }
41819 +               kfree (lck);
41820 +       }
41821 +
41822 +       return deld;
41823 +}
41824 +
41825 +/****************************************************************************/
41826 +
41827 +void
41828 +gulm_key_to_lm_lockname (uint8_t * key, struct lm_lockname *lockname)
41829 +{
41830 +       (*lockname).ln_number = (u64) (key[9]) << 0;
41831 +       (*lockname).ln_number |= (u64) (key[8]) << 8;
41832 +       (*lockname).ln_number |= (u64) (key[7]) << 16;
41833 +       (*lockname).ln_number |= (u64) (key[6]) << 24;
41834 +       (*lockname).ln_number |= (u64) (key[5]) << 32;
41835 +       (*lockname).ln_number |= (u64) (key[4]) << 40;
41836 +       (*lockname).ln_number |= (u64) (key[3]) << 48;
41837 +       (*lockname).ln_number |= (u64) (key[2]) << 56;
41838 +       (*lockname).ln_type = key[1];
41839 +}
41840 +
41841 +void
41842 +do_drop_lock_req (gulm_fs_t * fs, uint8_t state, uint8_t key[GIO_KEY_SIZE])
41843 +{
41844 +       unsigned int type;
41845 +       struct lm_lockname lockname;
41846 +       /* i might want to shove most of this function into the new lockcallback
41847 +        * handing queue.
41848 +        * later.
41849 +        */
41850 +
41851 +       /* don't do callbacks on the gulm mount lock.
41852 +        * I need to someday come up with a cleaner way of seperating the
41853 +        * firstmounter lock and the rest of gfs's locks.
41854 +        * i duno, this first byte is pretty clean.
41855 +        * */
41856 +       if (key[0] != 'G') {
41857 +               return;
41858 +       }
41859 +
41860 +       switch (state) {
41861 +       case lg_lock_state_Unlock:
41862 +               type = LM_CB_DROPLOCKS;
41863 +               break;
41864 +       case lg_lock_state_Exclusive:
41865 +               type = LM_CB_NEED_E;
41866 +               break;
41867 +       case lg_lock_state_Shared:
41868 +               type = LM_CB_NEED_S;
41869 +               break;
41870 +       case lg_lock_state_Deferred:
41871 +               type = LM_CB_NEED_D;
41872 +               break;
41873 +       default:
41874 +               type = LM_CB_DROPLOCKS;
41875 +               break;
41876 +       }
41877 +       gulm_key_to_lm_lockname (key, &lockname);
41878 +
41879 +       qu_drop_req (&fs->cq, fs->cb, fs->fsdata, type,
41880 +                    lockname.ln_type, lockname.ln_number);
41881 +}
41882 +
41883 +/**
41884 + * send_async_reply -
41885 + * @lck:
41886 + *
41887 + *
41888 + * Returns: void
41889 + */
41890 +void
41891 +send_async_reply (gulm_lock_t * lck)
41892 +{
41893 +       gulm_fs_t *fs = lck->fs;
41894 +       struct lm_lockname lockname;
41895 +
41896 +       if (lck->key[0] == 'F') {
41897 +               /* whee! it is the first mounter lock.  two things:
41898 +                * A: gfs could care less about this.
41899 +                * B: we need to up the sleeper in the fs.  (hack)
41900 +                */
41901 +               complete (&fs->sleep);
41902 +               return;
41903 +       }
41904 +
41905 +       gulm_key_to_lm_lockname (lck->key, &lockname);
41906 +
41907 +       qu_async_rpl (&fs->cq, fs->cb, fs->fsdata, &lockname, lck->result);
41908 +}
41909 +
41910 +/**
41911 + * send_drop_exp_inter -
41912 + * @lt:
41913 + * @name:
41914 + *
41915 + *
41916 + * Returns: int
41917 + */
41918 +int
41919 +send_drop_exp_inter (gulm_fs_t * fs, lock_table_t * lt, char *name)
41920 +{
41921 +       int err, len;
41922 +       uint8_t mask[GIO_KEY_SIZE];
41923 +
41924 +       memset (mask, 0, GIO_KEY_SIZE);
41925 +       /* pack key mask */
41926 +       mask[0] = 0xff;         /* minor lock type. 'G', 'F', 'J'. */
41927 +       mask[1] = 0xff;         /* GFS lock type. */
41928 +       mask[2] = 0xff;         /* next 8 are lock number */
41929 +       mask[3] = 0xff;
41930 +       mask[4] = 0xff;
41931 +       mask[5] = 0xff;
41932 +       mask[6] = 0xff;
41933 +       mask[7] = 0xff;
41934 +       mask[8] = 0xff;
41935 +       mask[9] = 0xff;
41936 +       /* Now stick the fsname into the remaining space. */
41937 +       len = strlen (fs->fs_name);
41938 +       strncpy (&mask[10], fs->fs_name, GIO_KEY_SIZE - 16);
41939 +       len += 11;              /* 10 for the encoded buf, 1 for the '\0' after the fs name */
41940 +
41941 +       err = lg_lock_drop_exp (gulm_cm.hookup, name, mask, len);
41942 +
41943 +       return err;
41944 +}
41945 +
41946 +/**
41947 + * send_lock_action -
41948 + * @lck:
41949 + *
41950 + *
41951 + * Returns: int
41952 + */
41953 +int
41954 +send_lock_action (gulm_lock_t * lck, uint8_t action)
41955 +{
41956 +       int err;
41957 +
41958 +       GULM_ASSERT (lck->req_type == glck_action, dump_gulm_lock_t (lck););
41959 +
41960 +       err = lg_lock_action_req (gulm_cm.hookup, lck->key, lck->keylen, action,
41961 +                                 lck->lvb, lck->fs->lvb_size);
41962 +       if (err != 0)
41963 +               log_err ("Issues sending action request. %d\n", err);
41964 +
41965 +       return err;
41966 +}
41967 +
41968 +/**
41969 + * send_lock_req -
41970 + * @lck:
41971 + *
41972 + *
41973 + * Returns: int
41974 + */
41975 +int
41976 +send_lock_req (gulm_lock_t * lck)
41977 +{
41978 +       gulm_fs_t *fs = lck->fs;
41979 +       int err;
41980 +       uint32_t flags = 0;
41981 +       uint8_t state;
41982 +
41983 +       GULM_ASSERT (lck->req_type == glck_state, dump_gulm_lock_t (lck););
41984 +
41985 +       switch (lck->req_state) {
41986 +       case LM_ST_EXCLUSIVE:
41987 +               state = lg_lock_state_Exclusive;
41988 +               break;
41989 +       case LM_ST_DEFERRED:
41990 +               state = lg_lock_state_Deferred;
41991 +               break;
41992 +       case LM_ST_SHARED:
41993 +               state = lg_lock_state_Shared;
41994 +               break;
41995 +       case LM_ST_UNLOCKED:
41996 +               state = lg_lock_state_Unlock;
41997 +               break;
41998 +       default:
41999 +               GULM_ASSERT (0, log_err ("fsid=%s: Anit no lock state %d.\n",
42000 +                                        fs->fs_name, lck->req_state););
42001 +               break;
42002 +       }
42003 +       if (lck->flags & LM_FLAG_TRY) {
42004 +               flags |= lg_lock_flag_Try;
42005 +       }
42006 +       if (lck->flags & LM_FLAG_TRY_1CB) {
42007 +               flags |= lg_lock_flag_Try | lg_lock_flag_DoCB;
42008 +       }
42009 +       if (lck->flags & LM_FLAG_NOEXP) {
42010 +               flags |= lg_lock_flag_IgnoreExp;
42011 +       }
42012 +       if (lck->flags & LM_FLAG_ANY) {
42013 +               flags |= lg_lock_flag_Any;
42014 +       }
42015 +       if (lck->flags & LM_FLAG_PRIORITY) {
42016 +               flags |= lg_lock_flag_Piority;
42017 +       }
42018 +       if (lck->lvb != NULL) {
42019 +               print_lk_lvb (lck->key, lck->lvb, lck->req_state, "Sending");
42020 +       }
42021 +
42022 +       err = lg_lock_state_req (gulm_cm.hookup, lck->key, lck->keylen,
42023 +                                state, flags, lck->lvb, lck->fs->lvb_size);
42024 +       if (err != 0)
42025 +               log_err ("Issues sending state request. %d\n", err);
42026 +
42027 +       return err;
42028 +}
42029 +
42030 +/**
42031 + * toggle_lock_counters -
42032 + *
42033 + * called after a succesful request to change lock state.  Decrements
42034 + * counts for what the lock was, and increments for what it is now.
42035 + */
42036 +void
42037 +toggle_lock_counters (lock_table_t * lt, int old, int new)
42038 +{
42039 +       /* what we had it in */
42040 +       switch (old) {
42041 +       case LM_ST_EXCLUSIVE:
42042 +               lt->locks_exl--;
42043 +               break;
42044 +       case LM_ST_DEFERRED:
42045 +               lt->locks_dfr--;
42046 +               break;
42047 +       case LM_ST_SHARED:
42048 +               lt->locks_shd--;
42049 +               break;
42050 +       case LM_ST_UNLOCKED:
42051 +               lt->locks_unl--;
42052 +               break;
42053 +       }
42054 +       /* what we have it in */
42055 +       switch (new) {
42056 +       case LM_ST_EXCLUSIVE:
42057 +               lt->locks_exl++;
42058 +               break;
42059 +       case LM_ST_DEFERRED:
42060 +               lt->locks_dfr++;
42061 +               break;
42062 +       case LM_ST_SHARED:
42063 +               lt->locks_shd++;
42064 +               break;
42065 +       case LM_ST_UNLOCKED:
42066 +               lt->locks_unl++;
42067 +               break;
42068 +       }
42069 +}
42070 +
42071 +/**
42072 + * calc_lock_result -
42073 + * @lck:
42074 + * @state:
42075 + * @error:
42076 + * @flags:
42077 + *
42078 + * This calculates the correct result to return for gfs lock requests.
42079 + *
42080 + * Returns: int
42081 + */
42082 +int
42083 +calc_lock_result (gulm_lock_t * lck,
42084 +                 uint8_t state, uint32_t error, uint32_t flags)
42085 +{
42086 +       gulm_fs_t *fs = lck->fs;
42087 +       lock_table_t *lt = &gulm_cm.ltpx;
42088 +       int result = -69;
42089 +
42090 +       /* adjust result based on success status. */
42091 +       switch (error) {
42092 +       case lg_err_Ok:
42093 +               /* set result to current lock state. */
42094 +               if (!(lck->flags & LM_FLAG_ANY)) {
42095 +                       /* simple case, we got what we asked for. */
42096 +                       result = lck->req_state;
42097 +               } else {
42098 +                       /* complex case, we got something else, but we said that was ok */
42099 +                       switch (state) {
42100 +                       case lg_lock_state_Shared:
42101 +                               result = LM_ST_SHARED;
42102 +                               break;
42103 +                       case lg_lock_state_Deferred:
42104 +                               result = LM_ST_DEFERRED;
42105 +                               break;
42106 +
42107 +                       case lg_lock_state_Exclusive:
42108 +                       case lg_lock_state_Unlock:
42109 +                               GULM_ASSERT (0,
42110 +                                            dump_gulm_lock_t (lck);
42111 +                                            log_err
42112 +                                            ("fsid=%s: lock state %d is invalid on "
42113 +                                             "ANY flag return\n", fs->fs_name,
42114 +                                             state);
42115 +                                   );
42116 +                               break;
42117 +
42118 +                       default:
42119 +                               GULM_ASSERT (0,
42120 +                                            dump_gulm_lock_t (lck);
42121 +                                            log_err_lck (lck,
42122 +                                                         "fsid=%s: Anit no lock state %d.\n",
42123 +                                                         fs->fs_name, state);
42124 +                                   );
42125 +                               break;
42126 +                       }
42127 +               }
42128 +
42129 +               /* toggle counters.
42130 +                * due to ANY flag, new state may not be req_state.
42131 +                * */
42132 +               toggle_lock_counters (lt, lck->cur_state, result);
42133 +
42134 +               /* if no internal unlocks, it is cachable. */
42135 +               if (result != LM_ST_UNLOCKED && (flags & lg_lock_flag_Cachable))
42136 +                       result |= LM_OUT_CACHEABLE;
42137 +
42138 +               /* record and move on
42139 +                * */
42140 +               lck->last_suc_state = result & LM_OUT_ST_MASK;
42141 +               break;
42142 +       case lg_err_Canceled:
42143 +               result = LM_OUT_CANCELED | lck->cur_state;
42144 +               break;
42145 +       case lg_err_TryFailed:
42146 +               result = lck->cur_state;        /* if we didn't get it. */
42147 +               break;
42148 +       default:
42149 +               result = -error;
42150 +               break;
42151 +       }
42152 +
42153 +       return result;
42154 +}
42155 +
42156 +/**
42157 + * my_strdup -
42158 + * @s:
42159 + *
42160 + *
42161 + * Returns: char
42162 + */
42163 +char *
42164 +my_strdup (char *s)
42165 +{
42166 +       char *tmp;
42167 +       int len;
42168 +       len = strlen (s) + 1;
42169 +       tmp = kmalloc (len, GFP_KERNEL);
42170 +       if (tmp == NULL)
42171 +               return NULL;
42172 +       memcpy (tmp, s, len);
42173 +       return tmp;
42174 +}
42175 +
42176 +/* Instead of directly calling the send function below, the functions will
42177 + * create of of these.
42178 + * Which exist only because I cannot stick the lock_t onto two lists
42179 + * at once.
42180 + *
42181 + * this could use some clean up.
42182 + */
42183 +typedef struct send_req_s {
42184 +       struct list_head sr_list;
42185 +       enum { sr_lock, sr_act, sr_cancel, sr_drop } type;
42186 +       gulm_lock_t *who;
42187 +       gulm_fs_t *fs;
42188 +       lock_table_t *lt;
42189 +       char *name;
42190 +} send_req_t;
42191 +
42192 +/**
42193 + * alloc_send_req -
42194 + * @oid:
42195 + *
42196 + *
42197 + * Returns: send_req_t
42198 + */
42199 +send_req_t *
42200 +alloc_send_req (void)
42201 +{
42202 +       send_req_t *tmp;
42203 +       tmp = kmalloc (sizeof (send_req_t), GFP_KERNEL);
42204 +       GULM_ASSERT (tmp != NULL,);     /* so evil.... */
42205 +       return tmp;
42206 +}
42207 +
42208 +/**
42209 + * send_drop_exp -
42210 + * @fs:
42211 + * @lt:
42212 + * @name:
42213 + *
42214 + *
42215 + * Returns: int
42216 + */
42217 +int
42218 +send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name)
42219 +{
42220 +       send_req_t *sr;
42221 +
42222 +       sr = alloc_send_req ();
42223 +       INIT_LIST_HEAD (&sr->sr_list);
42224 +       sr->type = sr_drop;
42225 +       sr->who = NULL;
42226 +       sr->fs = fs;
42227 +       sr->lt = lt;
42228 +       if (name != NULL) {
42229 +               sr->name = my_strdup (name);
42230 +       } else {
42231 +               sr->name = NULL;
42232 +       }
42233 +
42234 +       spin_lock (&lt->queue_sender);
42235 +       list_add (&sr->sr_list, &lt->to_be_sent);
42236 +       spin_unlock (&lt->queue_sender);
42237 +
42238 +       wake_up (&lt->send_wchan);
42239 +       return 0;
42240 +}
42241 +
42242 +/**
42243 + * add_lock_to_send_req_queue -
42244 + * @lt:
42245 + * @lck:
42246 + *
42247 + *
42248 + * Returns: void
42249 + */
42250 +void
42251 +add_lock_to_send_req_queue (lock_table_t * lt, gulm_lock_t * lck, int type)
42252 +{
42253 +       send_req_t *sr;
42254 +
42255 +       sr = alloc_send_req ();
42256 +       INIT_LIST_HEAD (&sr->sr_list);
42257 +       sr->type = type;
42258 +       sr->who = lck;
42259 +       sr->fs = NULL;
42260 +       sr->lt = NULL;
42261 +       sr->name = NULL;
42262 +       if (type != sr_cancel)
42263 +               lck->in_to_be_sent = TRUE;
42264 +
42265 +       mark_lock (lck);
42266 +
42267 +       spin_lock (&lt->queue_sender);
42268 +       list_add (&sr->sr_list, &lt->to_be_sent);
42269 +       spin_unlock (&lt->queue_sender);
42270 +
42271 +       wake_up (&lt->send_wchan);
42272 +}
42273 +
42274 +/**
42275 + * queue_empty -
42276 + * @lt:
42277 + *
42278 + *
42279 + * Returns: int
42280 + */
42281 +static __inline__ int
42282 +queue_empty (lock_table_t * lt)
42283 +{
42284 +       int ret;
42285 +       spin_lock (&lt->queue_sender);
42286 +       ret = list_empty (&lt->to_be_sent);
42287 +       spin_unlock (&lt->queue_sender);
42288 +       return ret;
42289 +}
42290 +
42291 +/**
42292 + * lt_io_sender_thread -
42293 + * @data:
42294 + *
42295 + * Right now, only gfs lock requests should go through this thread.
42296 + * Must look, May not even need this.
42297 + * well, it is nice to get the socket io off of what ever process the user
42298 + * is running that is going through gfs into here. ?is it?
42299 + *
42300 + *
42301 + * Returns: int
42302 + */
42303 +int
42304 +lt_io_sender_thread (void *data)
42305 +{
42306 +       lock_table_t *lt = (lock_table_t *) data;
42307 +       struct list_head *tmp;
42308 +       send_req_t *sr = NULL;
42309 +       int err = 0;
42310 +
42311 +       daemonize ("gulm_LT_sender");
42312 +       lt->sender_task = current;
42313 +       complete (&lt->startup);
42314 +
42315 +       while (lt->running) {
42316 +               do {
42317 +                       DECLARE_WAITQUEUE (__wait_chan, current);
42318 +                       current->state = TASK_INTERRUPTIBLE;
42319 +                       add_wait_queue (&lt->send_wchan, &__wait_chan);
42320 +                       if (queue_empty (lt))
42321 +                               schedule ();
42322 +                       remove_wait_queue (&lt->send_wchan, &__wait_chan);
42323 +                       current->state = TASK_RUNNING;
42324 +               } while (0);
42325 +               if (!lt->running)
42326 +                       break;
42327 +
42328 +               /* check to make sure socket is ok. */
42329 +               down (&lt->sender);
42330 +
42331 +               /* pop next item to be sent
42332 +                *  (it will get pushed back if there was problems.)
42333 +                */
42334 +               spin_lock (&lt->queue_sender);
42335 +               if (list_empty (&lt->to_be_sent)) {
42336 +                       spin_unlock (&lt->queue_sender);
42337 +                       up (&lt->sender);
42338 +                       continue;
42339 +               }
42340 +               tmp = (&lt->to_be_sent)->prev;
42341 +               list_del (tmp);
42342 +               spin_unlock (&lt->queue_sender);
42343 +               sr = list_entry (tmp, send_req_t, sr_list);
42344 +
42345 +               /* send. */
42346 +               if (sr->type == sr_lock) {
42347 +                       err = send_lock_req (sr->who);
42348 +                       if (err == 0) {
42349 +                               sr->who->in_to_be_sent = FALSE;
42350 +                               unmark_and_release_lock (sr->who);
42351 +                       }
42352 +               } else if (sr->type == sr_act) {
42353 +                       err = send_lock_action (sr->who, sr->who->action);
42354 +                       if (err == 0) {
42355 +                               sr->who->in_to_be_sent = FALSE;
42356 +                               unmark_and_release_lock (sr->who);
42357 +                       }
42358 +               } else if (sr->type == sr_cancel) {
42359 +                       err =
42360 +                           lg_lock_cancel_req (gulm_cm.hookup, sr->who->key,
42361 +                                               sr->who->keylen);
42362 +                       if (err == 0)
42363 +                               unmark_and_release_lock (sr->who);
42364 +               } else if (sr->type == sr_drop) {
42365 +                       /* XXX sr->lt isn't really needed.
42366 +                        * just lt should be fine.
42367 +                        * look into it someday.
42368 +                        */
42369 +                       err = send_drop_exp_inter (sr->fs, sr->lt, sr->name);
42370 +               } else {
42371 +                       log_err ("Unknown send_req type! %d\n", sr->type);
42372 +               }
42373 +               up (&lt->sender);
42374 +
42375 +               /* if no errors, remove from queue. */
42376 +               if (err == 0) {
42377 +                       if (sr->type == sr_drop && sr->name != NULL)
42378 +                               kfree (sr->name);
42379 +                       kfree (sr);
42380 +                       sr = NULL;
42381 +               } else {
42382 +                       /* if errors, re-queue.
42383 +                        * the send_* funcs already reported the error, so we won't
42384 +                        * repeat that.
42385 +                        * */
42386 +                       spin_lock (&lt->queue_sender);
42387 +                       /* reset the pointers. otherwise things get weird. */
42388 +                       INIT_LIST_HEAD (&sr->sr_list);
42389 +                       list_add_tail (&sr->sr_list, &lt->to_be_sent);
42390 +                       spin_unlock (&lt->queue_sender);
42391 +
42392 +                       current->state = TASK_INTERRUPTIBLE;
42393 +                       schedule_timeout (3 * HZ);
42394 +
42395 +                       /* gotta break shit up.
42396 +                        * else this loops hard and fast.
42397 +                        */
42398 +               }
42399 +       }                       /* while( lt->running ) */
42400 +
42401 +       complete (&lt->startup);
42402 +       return 0;
42403 +}
42404 +
42405 +/**
42406 + * cancel_pending_sender -
42407 + * @lck:
42408 + *
42409 + * want to cancel a lock request that we haven't sent to the server yet.
42410 + *
42411 + * this must skip over unlock requests. (never cancel unlocks)
42412 + *
42413 + * Returns: int
42414 + */
42415 +int
42416 +cancel_pending_sender (gulm_lock_t * lck)
42417 +{
42418 +       lock_table_t *lt = &gulm_cm.ltpx;
42419 +       struct list_head *tmp, *nxt;
42420 +       send_req_t *sr;
42421 +       int found = FALSE;
42422 +
42423 +       spin_lock (&lt->queue_sender);
42424 +
42425 +       list_for_each_safe (tmp, nxt, &lt->to_be_sent) {
42426 +               sr = list_entry (tmp, send_req_t, sr_list);
42427 +               if (sr->who == lck) {   /* good enough? */
42428 +                       if (lck->req_type == sr_cancel)
42429 +                               continue;
42430 +                       if (lck->req_state == LM_ST_UNLOCKED)
42431 +                               continue;       /*donot cancel unlocks */
42432 +                       list_del (tmp);
42433 +                       kfree (sr);
42434 +                       found = TRUE;
42435 +                       lck->in_to_be_sent = FALSE;
42436 +
42437 +                       /* Now we need to tell the waiting lock req that it got canceled.
42438 +                        * basically, we need to fake a lg_err_Canceled return....
42439 +                        */
42440 +                       lck->result = LM_OUT_CANCELED | lck->cur_state;
42441 +                       lck->actuallypending = FALSE;
42442 +                       lck->req_type = glck_nothing;
42443 +                       atomic_dec (&lt->locks_pending);
42444 +#ifndef USE_SYNC_LOCKING
42445 +                       send_async_reply (lck);
42446 +#else
42447 +                       complete (&lck->actsleep);
42448 +#endif
42449 +                       unmark_and_release_lock (lck);
42450 +                       break;
42451 +               }
42452 +       }
42453 +
42454 +       spin_unlock (&lt->queue_sender);
42455 +       return found;
42456 +}
42457 +
42458 +/**
42459 + * gulm_lt_login_reply -
42460 + * @misc:
42461 + * @error:
42462 + * @which:
42463 + *
42464 + *
42465 + * Returns: int
42466 + */
42467 +int
42468 +gulm_lt_login_reply (void *misc, uint32_t error, uint8_t which)
42469 +{
42470 +       if (error != 0) {
42471 +               gulm_cm.ltpx.running = FALSE;
42472 +               log_err ("LTPX: Got a %d from the login request.\n", error);
42473 +       } else {
42474 +               log_msg (lgm_Network2, "Logged into local LTPX.\n");
42475 +       }
42476 +       return error;
42477 +}
42478 +
42479 +/**
42480 + * gulm_lt_logout_reply -
42481 + * @misc:
42482 + *
42483 + *
42484 + * Returns: int
42485 + */
42486 +int
42487 +gulm_lt_logout_reply (void *misc)
42488 +{
42489 +       gulm_cm.ltpx.running = FALSE;
42490 +       log_msg (lgm_Network2, "Logged out of local LTPX.\n");
42491 +       return 0;
42492 +}
42493 +
42494 +/**
42495 + * gulm_lt_lock_state -
42496 + * @misc:
42497 + * @key:
42498 + * @keylen:
42499 + * @state:
42500 + * @flags:
42501 + * @error:
42502 + * @LVB:
42503 + * @LVBlen:
42504 + *
42505 + *
42506 + * Returns: int
42507 + */
42508 +int
42509 +gulm_lt_lock_state (void *misc, uint8_t * key, uint16_t keylen,
42510 +                   uint8_t state, uint32_t flags, uint32_t error,
42511 +                   uint8_t * LVB, uint16_t LVBlen)
42512 +{
42513 +       gulm_lock_t *lck;
42514 +
42515 +       if (key[0] == 'J') {
42516 +               jid_state_reply (key, keylen, LVB, LVBlen);
42517 +               return 0;
42518 +       }
42519 +
42520 +       if (!find_and_mark_lock (key, keylen, &lck)) {
42521 +               log_err_lk (key, keylen, "Got a lock state reply for a lock "
42522 +                           "that we don't know of. state:%#x flags:%#x error:%#x\n",
42523 +                           state, flags, error);
42524 +               return 0;
42525 +       }
42526 +
42527 +       lck->result = calc_lock_result (lck, state, error, flags);
42528 +
42529 +       if ((lck->result & LM_OUT_ST_MASK) != LM_ST_UNLOCKED &&
42530 +           lck->lvb != NULL) {
42531 +               memcpy (lck->lvb, LVB, MIN (lck->fs->lvb_size, LVBlen));
42532 +       }
42533 +
42534 +       lck->actuallypending = FALSE;
42535 +       lck->req_type = glck_nothing;
42536 +       atomic_dec (&gulm_cm.ltpx.locks_pending);
42537 +#ifndef USE_SYNC_LOCKING
42538 +       send_async_reply (lck);
42539 +#else
42540 +       complete (&lck->actsleep);
42541 +#endif
42542 +
42543 +       if (error != 0 && error != lg_err_TryFailed && error != lg_err_Canceled)
42544 +               log_msg_lck (lck, "Error: %d:%s (req:%#x rpl:%#x lss:%#x)\n",
42545 +                            error, gio_Err_to_str (error),
42546 +                            lck->req_state, state, lck->last_suc_state);
42547 +
42548 +       unmark_and_release_lock (lck);
42549 +       return 0;
42550 +}
42551 +
42552 +/**
42553 + * gulm_lt_lock_action -
42554 + * @misc:
42555 + * @key:
42556 + * @keylen:
42557 + * @action:
42558 + * @error:
42559 + *
42560 + *
42561 + * Returns: int
42562 + */
42563 +int
42564 +gulm_lt_lock_action (void *misc, uint8_t * key, uint16_t keylen,
42565 +                    uint8_t action, uint32_t error)
42566 +{
42567 +       gulm_lock_t *lck;
42568 +
42569 +       if (key[0] == 'J') {
42570 +               jid_action_reply (key, keylen);
42571 +               return 0;
42572 +       }
42573 +
42574 +       if (!find_and_mark_lock (key, keylen, &lck)) {
42575 +               log_err_lk (key, keylen, "Got a lock action reply for a lock "
42576 +                           "that we don't know of. action:%#x error:%#x\n",
42577 +                           action, error);
42578 +               return 0;
42579 +       }
42580 +
42581 +       if (action == lg_lock_act_HoldLVB ||
42582 +           action == lg_lock_act_UnHoldLVB || action == lg_lock_act_SyncLVB) {
42583 +               /*  */
42584 +               lck->result = error;
42585 +               if (error != lg_err_Ok) {
42586 +                       log_err ("on action reply act:%d err:%d\n", action,
42587 +                                error);
42588 +               }
42589 +               lck->req_type = glck_nothing;
42590 +               lck->actuallypending = FALSE;
42591 +               complete (&lck->actsleep);
42592 +       } else {
42593 +               log_err_lck (lck, "Got strange Action %#x\n", action);
42594 +       }
42595 +       unmark_and_release_lock (lck);
42596 +       return 0;
42597 +}
42598 +
42599 +/**
42600 + * gulm_lt_drop_lock_req -
42601 + * @misc:
42602 + * @key:
42603 + * @keylen:
42604 + * @state:
42605 + *
42606 + *
42607 + * Returns: int
42608 + */
42609 +int
42610 +gulm_lt_drop_lock_req (void *misc, uint8_t * key, uint16_t keylen,
42611 +                      uint8_t state)
42612 +{
42613 +       gulm_lock_t *lck;
42614 +
42615 +       if (key[0] == 'J') {
42616 +               jid_header_lock_drop (key, keylen);
42617 +               return 0;
42618 +       }
42619 +
42620 +       if (!find_and_mark_lock (key, keylen, &lck)) {
42621 +               log_err_lk (key, keylen, "Got a drop lcok request for a lock "
42622 +                           "that we don't know of. state:%#x\n", state);
42623 +               return 0;
42624 +       }
42625 +
42626 +       do_drop_lock_req (lck->fs, state, key);
42627 +
42628 +       unmark_and_release_lock (lck);
42629 +       return 0;
42630 +}
42631 +
42632 +/**
42633 + * gulm_lt_drop_all -
42634 + * @misc:
42635 + *
42636 + *
42637 + * Returns: int
42638 + */
42639 +int
42640 +gulm_lt_drop_all (void *misc)
42641 +{
42642 +       passup_droplocks ();
42643 +       return 0;
42644 +}
42645 +
42646 +/**
42647 + * gulm_lt_error -
42648 + * @misc:
42649 + * @err:
42650 + *
42651 + *
42652 + * Returns: int
42653 + */
42654 +int
42655 +gulm_lt_error (void *misc, uint32_t err)
42656 +{
42657 +       log_err ("LTPX: RANDOM ERROR %d\n", err);
42658 +       return err;
42659 +}
42660 +
42661 +static lg_lockspace_callbacks_t lock_cb = {
42662 +      login_reply:gulm_lt_login_reply,
42663 +      logout_reply:gulm_lt_logout_reply,
42664 +      lock_state:gulm_lt_lock_state,
42665 +      lock_action:gulm_lt_lock_action,
42666 +      drop_lock_req:gulm_lt_drop_lock_req,
42667 +      drop_all:gulm_lt_drop_all,
42668 +      error:gulm_lt_error
42669 +};
42670 +
42671 +/**
42672 + * lt_io_recving_thread -
42673 + * @data:
42674 + *
42675 + *
42676 + * Returns: int
42677 + */
42678 +int
42679 +lt_io_recving_thread (void *data)
42680 +{
42681 +       lock_table_t *lt = &gulm_cm.ltpx;
42682 +       int err;
42683 +
42684 +       daemonize ("gulm_LT_recver");
42685 +       lt->recver_task = current;
42686 +       complete (&lt->startup);
42687 +
42688 +       while (lt->running) {
42689 +               err = lg_lock_handle_messages (gulm_cm.hookup, &lock_cb, NULL);
42690 +               if (err != 0) {
42691 +                       log_err ("gulm_LT_recver err %d\n", err);
42692 +                       lt->running = FALSE;    /* should stop the sender thread. */
42693 +                       wake_up (&lt->send_wchan);
42694 +                       break;
42695 +               }
42696 +       }                       /* while( lt->running ) */
42697 +
42698 +       complete (&lt->startup);
42699 +       return 0;
42700 +}
42701 +
42702 +/**
42703 + * lt_logout - log out of all of the lock tables
42704 + */
42705 +void
42706 +lt_logout (void)
42707 +{
42708 +       lock_table_t *lt = &gulm_cm.ltpx;
42709 +       int err;
42710 +
42711 +       if (lt->running) {
42712 +               lt->running = FALSE;
42713 +
42714 +               /* stop sender thread */
42715 +               wake_up (&lt->send_wchan);
42716 +               wait_for_completion (&lt->startup);
42717 +
42718 +               /* stop recver thread */
42719 +               down (&lt->sender);
42720 +               err = lg_lock_logout (gulm_cm.hookup);
42721 +               up (&lt->sender);
42722 +
42723 +               /* wait for thread to finish */
42724 +               wait_for_completion (&lt->startup);
42725 +       }
42726 +
42727 +}
42728 +
42729 +/**
42730 + * lt_login - login to lock tables.
42731 + *
42732 + * Returns: int
42733 + */
42734 +int
42735 +lt_login (void)
42736 +{
42737 +       int err;
42738 +       lock_table_t *lt = &gulm_cm.ltpx;
42739 +
42740 +       if (lt->running)
42741 +               log_err
42742 +                   ("Trying to log into LTPX when it appears to be logged in!\n");
42743 +
42744 +       err = lg_lock_login (gulm_cm.hookup, "GFS ");
42745 +       if (err != 0) {
42746 +               log_err ("Failed to send login request. %d\n", err);
42747 +               goto fail;
42748 +       }
42749 +
42750 +       /* start recver thread. */
42751 +       lt->running = TRUE;
42752 +       err = kernel_thread (lt_io_recving_thread, lt, 0);
42753 +       if (err < 0) {
42754 +               log_err ("Failed to start gulm_lt_IOd. (%d)\n", err);
42755 +               goto fail;
42756 +       }
42757 +       wait_for_completion (&lt->startup);
42758 +
42759 +       /* start sender thread */
42760 +       err = kernel_thread (lt_io_sender_thread, lt, 0);
42761 +       if (err < 0) {
42762 +               log_err ("Failed to start gulm_LT_sender. (%d)\n", err);
42763 +               goto fail;
42764 +       }
42765 +       wait_for_completion (&lt->startup);
42766 +
42767 +       return 0;
42768 +      fail:
42769 +       lt_logout ();
42770 +       log_msg (lgm_Always, "Exiting lt_login. err:%d\n", err);
42771 +       return err;
42772 +}
42773 +
42774 +/****************************************************************************/
42775 +
42776 +/**
42777 + * internal_gulm_get_lock -
42778 + * @fs:
42779 + * @key:
42780 + * @keylen:
42781 + * @lockp:
42782 + *
42783 + *
42784 + * Returns: 0 on success, -EXXX on failure
42785 + */
42786 +int
42787 +internal_gulm_get_lock (gulm_fs_t * fs, uint8_t * key, uint8_t keylen,
42788 +                       gulm_lock_t ** lockp)
42789 +{
42790 +       int found = FALSE;
42791 +       uint32_t bkt;
42792 +       gulm_lock_t *lck = NULL;
42793 +
42794 +       found = find_and_mark_lock (key, keylen, &lck);
42795 +
42796 +       /* malloc space */
42797 +       if (found) {
42798 +               GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,);
42799 +       } else {
42800 +               lck = kmalloc (sizeof (gulm_lock_t), GFP_KERNEL);
42801 +               if (lck == NULL) {
42802 +                       log_err
42803 +                           ("fsid=%s: Out of memory for lock struct in get_lock!\n",
42804 +                            fs->fs_name);
42805 +                       return -ENOMEM;
42806 +               }
42807 +               memset (lck, 0, sizeof (gulm_lock_t));
42808 +               INIT_LIST_HEAD (&lck->gl_list);
42809 +               atomic_set (&lck->count, 1);
42810 +               lck->magic_one = 0xAAAAAAAA;
42811 +               lck->fs = fs;
42812 +               memcpy (lck->key, key, keylen);
42813 +               lck->keylen = keylen;
42814 +               lck->lvb = NULL;
42815 +               init_completion (&lck->actsleep);
42816 +               lck->actuallypending = FALSE;
42817 +               lck->in_to_be_sent = FALSE;
42818 +               lck->result = 0;
42819 +               lck->action = -1;
42820 +               lck->req_type = glck_nothing;
42821 +               lck->last_suc_state = LM_ST_UNLOCKED;
42822 +
42823 +               gulm_cm.ltpx.locks_total++;
42824 +               gulm_cm.ltpx.locks_unl++;
42825 +
42826 +               bkt = hash_lock_key (key, keylen);
42827 +               bkt %= gulm_cm.ltpx.hashbuckets;
42828 +
42829 +               spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
42830 +               list_add (&lck->gl_list, &gulm_cm.ltpx.lkhsh[bkt]);
42831 +               spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
42832 +       }
42833 +
42834 +       *lockp = lck;
42835 +
42836 +       return 0;
42837 +}
42838 +
42839 +/**
42840 + * gulm_get_lock -
42841 + * @lockspace:
42842 + * @name:
42843 + * @lockp:
42844 + *
42845 + * Returns: 0 on success, -EXXX on failure
42846 + */
42847 +int
42848 +gulm_get_lock (lm_lockspace_t * lockspace, struct lm_lockname *name,
42849 +              lm_lock_t ** lockp)
42850 +{
42851 +       int err, len;
42852 +       gulm_fs_t *fs = (gulm_fs_t *) lockspace;
42853 +       uint8_t key[GIO_KEY_SIZE];
42854 +
42855 +       /* i could add a per fs lock to force only one gulm_get_lock at a time.
42856 +        */
42857 +       down (&fs->get_lock);
42858 +
42859 +       memset (key, 0, GIO_KEY_SIZE);
42860 +       /* pack lockname */
42861 +       key[0] = 'G';           /* G: fs lock, F: First mounter, J: JID mapping lock */
42862 +       key[1] = name->ln_type & 0xff;
42863 +       key[2] = (name->ln_number >> 56) & 0xff;
42864 +       key[3] = (name->ln_number >> 48) & 0xff;
42865 +       key[4] = (name->ln_number >> 40) & 0xff;
42866 +       key[5] = (name->ln_number >> 32) & 0xff;
42867 +       key[6] = (name->ln_number >> 24) & 0xff;
42868 +       key[7] = (name->ln_number >> 16) & 0xff;
42869 +       key[8] = (name->ln_number >> 8) & 0xff;
42870 +       key[9] = (name->ln_number >> 0) & 0xff;
42871 +
42872 +       /* Now stick the fsname into the remaining space. */
42873 +       len = strlen (fs->fs_name);
42874 +       strncpy (&key[10], fs->fs_name, GIO_KEY_SIZE - 16);
42875 +
42876 +       len = MIN (len, GIO_KEY_SIZE - 16);
42877 +       len += 11;              /* 10 for the encoded buf, 1 for the '\0' after the fs name */
42878 +       err = internal_gulm_get_lock (fs, key, len, (gulm_lock_t **) lockp);
42879 +
42880 +       up (&fs->get_lock);
42881 +
42882 +       return err;
42883 +}
42884 +
42885 +/**
42886 + * gulm_put_lock -
42887 + * @lock:
42888 + *
42889 + *
42890 + * Returns: void
42891 + */
42892 +void
42893 +gulm_put_lock (lm_lock_t * lock)
42894 +{
42895 +       gulm_lock_t *lck = (gulm_lock_t *) lock;
42896 +       lock_table_t *lt = &gulm_cm.ltpx;
42897 +       gulm_fs_t *fs = lck->fs;
42898 +
42899 +       down (&fs->get_lock);
42900 +
42901 +       GULM_ASSERT (lt != NULL,);
42902 +
42903 +       if (lck->last_suc_state != LM_ST_UNLOCKED) {
42904 +               log_err_lck (lck,
42905 +                            "fsid=%s: gulm_put_lock called on a lock that is not unlocked!"
42906 +                            " Current state:%#x\n", lck->fs->fs_name,
42907 +                            lck->last_suc_state);
42908 +               /* I'm still not sure about this one.  We should never see it, so I
42909 +                * don't think it is that big of a deal, but i duno.
42910 +                *
42911 +                * Maybe should just make it an assertion.
42912 +                *
42913 +                * with the mark/unmark code, is it even a concern?
42914 +                */
42915 +       }
42916 +
42917 +       unmark_and_release_lock (lck);
42918 +       /* lck = NULL; */
42919 +
42920 +       up (&fs->get_lock);
42921 +
42922 +}
42923 +
42924 +static int
42925 +valid_trasition (unsigned int cur, unsigned int req)
42926 +{
42927 +       int lock_state_changes[16] = {  /* unl   exl    def    shr  */
42928 +               FALSE, TRUE, TRUE, TRUE,        /* unl */
42929 +               TRUE, FALSE, TRUE, TRUE,        /* exl */
42930 +               TRUE, TRUE, FALSE, TRUE,        /* def */
42931 +               TRUE, TRUE, TRUE, FALSE /* shr */
42932 +       };
42933 +       GULM_ASSERT (cur < 4
42934 +                    && req < 4, log_err ("cur:%d req:%d\n", cur, req););
42935 +
42936 +       return (lock_state_changes[4 * cur + req]);
42937 +}
42938 +
42939 +/**
42940 + * verify_gulm_lock_t -
42941 + * @lck:
42942 + *
42943 + * wonder if I should add some other checks.
42944 + *
42945 + * Returns: int
42946 + */
42947 +int
42948 +verify_gulm_lock_t (gulm_lock_t * lck)
42949 +{
42950 +       if (lck == NULL) {
42951 +               log_err ("Lock pointer was NULL!\n");
42952 +               return -1;
42953 +       }
42954 +       if (lck->fs == NULL) {
42955 +               log_err ("This lock has no filesystem!!!\n");
42956 +               return -1;
42957 +       }
42958 +       return 0;
42959 +}
42960 +
42961 +/**
42962 + * gulm_lock -
42963 + * @lock:
42964 + * @cur_state:
42965 + * @req_state:
42966 + * @flags:
42967 + *
42968 + *
42969 + * Returns: int
42970 + */
42971 +unsigned int
42972 +gulm_lock (lm_lock_t * lock, unsigned int cur_state,
42973 +          unsigned int req_state, unsigned int flags)
42974 +{
42975 +       gulm_lock_t *lck = NULL;
42976 +       gulm_fs_t *fs;
42977 +       lock_table_t *lt;
42978 +
42979 +       /* verify vars. */
42980 +       lck = (gulm_lock_t *) lock;
42981 +       if (verify_gulm_lock_t (lck) != 0) {
42982 +               return -EINVAL;
42983 +       }
42984 +       lt = &gulm_cm.ltpx;
42985 +       fs = lck->fs;
42986 +
42987 +       GULM_ASSERT (valid_trasition (cur_state, req_state),
42988 +                    log_err_lck (lck, "want %d with %s thinks:%d\n", req_state,
42989 +                                 (LM_FLAG_TRY & flags) ? "try" : (LM_FLAG_NOEXP
42990 +                                                                  & flags) ?
42991 +                                 "noexp" : "no flags", cur_state);
42992 +           );
42993 +
42994 +       GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
42995 +
42996 +       /* save the details of this request. */
42997 +       lck->req_type = glck_state;
42998 +       lck->result = 0;
42999 +       lck->cur_state = cur_state;
43000 +       lck->req_state = req_state;
43001 +       lck->flags = flags;
43002 +
43003 +       /* moving these here fixes a race on the s390 that ben found.
43004 +        * basically, the request was sent to the server, the server receives
43005 +        * it, the server processes, the server sends a reply, the client
43006 +        * receives the reply, and the client tries to processe the reply before
43007 +        * this thread could mark it as actuallypending.
43008 +        * */
43009 +       lck->actuallypending = TRUE;
43010 +       atomic_inc (&lt->locks_pending);
43011 +       add_lock_to_send_req_queue (lt, lck, sr_lock);
43012 +
43013 +       lt->lops++;
43014 +#ifdef USE_SYNC_LOCKING
43015 +       wait_for_completion (&lck->actsleep);
43016 +#endif
43017 +
43018 +#ifdef USE_SYNC_LOCKING
43019 +       return lck->result;
43020 +#else
43021 +       return LM_OUT_ASYNC;
43022 +#endif
43023 +}
43024 +
43025 +/**
43026 + * gulm_unlock -
43027 + * @lock:
43028 + * @cur_state:
43029 + *
43030 + *
43031 + * Returns: int
43032 + */
43033 +unsigned int
43034 +gulm_unlock (lm_lock_t * lock, unsigned int cur_state)
43035 +{
43036 +       int e;
43037 +       e = gulm_lock (lock, cur_state, LM_ST_UNLOCKED, 0);
43038 +       return e;
43039 +}
43040 +
43041 +/**
43042 + * gulm_cancel -
43043 + * @lock:
43044 + *
43045 + */
43046 +void
43047 +gulm_cancel (lm_lock_t * lock)
43048 +{
43049 +       gulm_lock_t *lck;
43050 +       gulm_fs_t *fs;
43051 +       lock_table_t *lt;
43052 +
43053 +       /* verify vars. */
43054 +       lck = (gulm_lock_t *) lock;
43055 +       if (verify_gulm_lock_t (lck) != 0) {
43056 +               return;
43057 +       }
43058 +       lt = &gulm_cm.ltpx;
43059 +       fs = lck->fs;
43060 +
43061 +       if (lck->actuallypending) {
43062 +               if (lck->in_to_be_sent) {
43063 +                       /* this should pull the req out of the send queue and have it
43064 +                        * return with a cancel code without going to the server.
43065 +                        */
43066 +                       cancel_pending_sender (lck);
43067 +               } else {
43068 +                       add_lock_to_send_req_queue (lt, lck, sr_cancel);
43069 +               }
43070 +       } else {
43071 +               log_msg_lck (lck, "Cancel called with no pending request.\n");
43072 +       }
43073 +
43074 +}
43075 +
43076 +/**
43077 + * gulm_hold_lvb -
43078 + * @lock:
43079 + * @lvbp:
43080 + *
43081 + *
43082 + * Returns: 0 on success, -EXXX on failure
43083 + */
43084 +int
43085 +gulm_hold_lvb (lm_lock_t * lock, char **lvbp)
43086 +{
43087 +       gulm_lock_t *lck;
43088 +       gulm_fs_t *fs;
43089 +       lock_table_t *lt;
43090 +       int err = -1;
43091 +
43092 +       /* verify vars. */
43093 +       lck = (gulm_lock_t *) lock;
43094 +       if (verify_gulm_lock_t (lck) != 0) {
43095 +               return -EINVAL;
43096 +       }
43097 +       lt = &gulm_cm.ltpx;
43098 +       fs = lck->fs;
43099 +
43100 +       /* what where these for? */
43101 +       GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,
43102 +                    log_msg_lck (lck, "Bad gulm_lock magic.\n"););
43103 +       GULM_ASSERT (lt->magic_one == 0xAAAAAAAA,
43104 +                    log_msg_lck (lck, "Bad lock_table magic.\n"););
43105 +
43106 +       lvb_log_msg_lk (lck->key, "Entering gulm_hold_lvb\n");
43107 +
43108 +       GULM_ASSERT (lck->lvb == NULL,
43109 +                    log_msg_lck (lck,
43110 +                                 "fsid=%s: Lvb data wasn't null! must be held "
43111 +                                 "already.\n", fs->fs_name);
43112 +           );
43113 +
43114 +       GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
43115 +
43116 +       lck->lvb = kmalloc (fs->lvb_size, GFP_KERNEL);
43117 +       if (lck->lvb == NULL) {
43118 +               err = -ENOMEM;
43119 +               goto fail;
43120 +       }
43121 +       memset (lck->lvb, 0, fs->lvb_size);
43122 +
43123 +       lck->req_type = glck_action;
43124 +       lck->action = lg_lock_act_HoldLVB;
43125 +       lck->result = 0;
43126 +       lck->actuallypending = TRUE;
43127 +       add_lock_to_send_req_queue (lt, lck, sr_act);
43128 +
43129 +       wait_for_completion (&lck->actsleep);
43130 +
43131 +       if (lck->result != lg_err_Ok) {
43132 +               log_err ("fsid=%s: Got error %d on hold lvb request.\n",
43133 +                        fs->fs_name, lck->result);
43134 +               kfree (lck->lvb);
43135 +               lck->lvb = NULL;
43136 +               goto fail;
43137 +       }
43138 +
43139 +       lt->locks_lvbs++;
43140 +
43141 +       *lvbp = lck->lvb;
43142 +
43143 +       lvb_log_msg_lk (lck->key, "fsid=%s: Exiting gulm_hold_lvb\n",
43144 +                       fs->fs_name);
43145 +       return 0;
43146 +      fail:
43147 +       if (err != 0)
43148 +               log_msg (lgm_Always,
43149 +                        "fsid=%s: Exiting gulm_hold_lvb with errors (%d)\n",
43150 +                        fs->fs_name, err);
43151 +       return err;
43152 +}
43153 +
43154 +/**
43155 + * gulm_unhold_lvb -
43156 + * @lock:
43157 + * @lvb:
43158 + *
43159 + *
43160 + * Returns: void
43161 + */
43162 +void
43163 +gulm_unhold_lvb (lm_lock_t * lock, char *lvb)
43164 +{
43165 +       gulm_lock_t *lck = NULL;
43166 +       gulm_fs_t *fs;
43167 +       lock_table_t *lt;
43168 +
43169 +       /* verify vars. */
43170 +       lck = (gulm_lock_t *) lock;
43171 +       if (verify_gulm_lock_t (lck) != 0) {
43172 +               return;
43173 +       }
43174 +       lt = &gulm_cm.ltpx;
43175 +       fs = lck->fs;
43176 +
43177 +       GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
43178 +
43179 +       if (lck->lvb != lvb) {
43180 +               log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n",
43181 +                        fs->fs_name, lck->lvb, lvb);
43182 +               goto exit;
43183 +       }
43184 +
43185 +       lvb_log_msg_lk (lck->key, "Entering gulm_unhold_lvb\n");
43186 +
43187 +       lck->req_type = glck_action;
43188 +       lck->action = lg_lock_act_UnHoldLVB;
43189 +       lck->result = 0;
43190 +       lck->actuallypending = TRUE;
43191 +       add_lock_to_send_req_queue (lt, lck, sr_act);
43192 +
43193 +       wait_for_completion (&lck->actsleep);
43194 +
43195 +       /* XXX ummm, is it sane to not free the memory if the command fails?
43196 +        * gfs will still think that the lvb was dropped sucessfully....
43197 +        * (it assumes it is always sucessful)
43198 +        * Maybe I should retry the drop request then?
43199 +        */
43200 +       if (lck->result != lg_err_Ok) {
43201 +               log_err ("fsid=%s: Got error %d on unhold LVB request.\n",
43202 +                        lck->fs->fs_name, lck->result);
43203 +       } else {
43204 +               if (lck->lvb != NULL)
43205 +                       kfree (lck->lvb);
43206 +               lck->lvb = NULL;
43207 +               lt->locks_lvbs--;
43208 +       }
43209 +      exit:
43210 +       lvb_log_msg ("Exiting gulm_unhold_lvb\n");
43211 +}
43212 +
43213 +/**
43214 + * gulm_sync_lvb -
43215 + * @lock:
43216 + * @lvb:
43217 + *
43218 + * umm, is this even used anymore? yes.
43219 + *
43220 + * Returns: void
43221 + */
43222 +void
43223 +gulm_sync_lvb (lm_lock_t * lock, char *lvb)
43224 +{
43225 +       gulm_lock_t *lck = NULL;
43226 +       gulm_fs_t *fs;
43227 +       lock_table_t *lt;
43228 +
43229 +       /* verify vars. */
43230 +       lck = (gulm_lock_t *) lock;
43231 +       if (verify_gulm_lock_t (lck) != 0) {
43232 +               return;
43233 +       }
43234 +       lt = &gulm_cm.ltpx;
43235 +       fs = lck->fs;
43236 +
43237 +       GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
43238 +
43239 +       /* this check is also in the server, so it isn't really needed here. */
43240 +       if (lck->last_suc_state != LM_ST_EXCLUSIVE) {
43241 +               log_err ("sync_lvb: You must hold the lock Exclusive first.\n");
43242 +               goto exit;      /*cannot do anything */
43243 +       }
43244 +       if (lck->lvb == NULL) {
43245 +               log_err ("sync_lvb: You forgot to call hold lvb first.\n");
43246 +               goto exit;
43247 +       }
43248 +       if (lck->lvb != lvb) {
43249 +               log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n",
43250 +                        fs->fs_name, lck->lvb, lvb);
43251 +               goto exit;
43252 +       }
43253 +
43254 +       lvb_log_msg_lk (lck->key, "Entering gulm_sync_lvb\n");
43255 +
43256 +       lck->req_type = glck_action;
43257 +       lck->action = lg_lock_act_SyncLVB;
43258 +       lck->result = 0;
43259 +       lck->actuallypending = TRUE;
43260 +       add_lock_to_send_req_queue (lt, lck, sr_act);
43261 +
43262 +       wait_for_completion (&lck->actsleep);
43263 +
43264 +       /* XXX? retry if I get an error? */
43265 +       if (lck->result != lg_err_Ok) {
43266 +               log_err_lck (lck,
43267 +                            "fsid=%s: Got error %d:%s on Sync LVB request.\n",
43268 +                            fs->fs_name, lck->result,
43269 +                            gio_Err_to_str (lck->result));
43270 +       }
43271 +      exit:
43272 +       lvb_log_msg ("Exiting gulm_sync_lvb\n");
43273 +}
43274 +
43275 +/*****************************************************************************/
43276 +static int
43277 +gulm_plock_get (lm_lockspace_t * lockspace,
43278 +               struct lm_lockname *name, unsigned long owner,
43279 +               uint64_t * start, uint64_t * end, int *exclusive,
43280 +               unsigned long *rowner)
43281 +{
43282 +       return -ENOSYS;
43283 +}
43284 +
43285 +static int
43286 +gulm_plock (lm_lockspace_t * lockspace,
43287 +           struct lm_lockname *name, unsigned long owner,
43288 +           int wait, int exclusive, uint64_t start, uint64_t end)
43289 +{
43290 +       return -ENOSYS;
43291 +}
43292 +
43293 +static int
43294 +gulm_punlock (lm_lockspace_t * lockspace,
43295 +             struct lm_lockname *name, unsigned long owner,
43296 +             uint64_t start, uint64_t end)
43297 +{
43298 +       return -ENOSYS;
43299 +}
43300 +
43301 +/****************************************************************************/
43302 +/****************************************************************************/
43303 +/****************************************************************************/
43304 +/* should move the firstmounter lock stuff into its own file perhaps? */
43305 +/**
43306 + * get_special_lock -
43307 + * @fs: <> filesystem we're getting special lock for
43308 + *
43309 + * Returns: gulm_lock_t
43310 + */
43311 +STATIC gulm_lock_t *
43312 +get_special_lock (gulm_fs_t * fs)
43313 +{
43314 +       int err, len;
43315 +       gulm_lock_t *lck = NULL;
43316 +       uint8_t key[GIO_KEY_SIZE];
43317 +
43318 +       /* pack lockname */
43319 +       memset (key, 0, GIO_KEY_SIZE);
43320 +       /* The F at the beginning doesn't mash with the G that prefixes every fs
43321 +        * lock.
43322 +        */
43323 +       memcpy (key, "FirstMount", 10);
43324 +       len = strlen (fs->fs_name);
43325 +       strncpy (&key[10], fs->fs_name, GIO_KEY_SIZE - 21);
43326 +       len = MIN (len, GIO_KEY_SIZE - 21);
43327 +       len += 11;
43328 +
43329 +       err = internal_gulm_get_lock (fs, key, len, &lck);
43330 +
43331 +       /* return pointer */
43332 +       return lck;
43333 +}
43334 +
43335 +/**
43336 + * do_lock_time_out -
43337 + * @d:
43338 + *
43339 + * after timeout, set cancel request on the handler queue. (since we cannot
43340 + * call it from within the timer code.
43341 + *
43342 + */
43343 +static void
43344 +do_lock_time_out (unsigned long d)
43345 +{
43346 +       gulm_lock_t *lck = (gulm_lock_t *) d;
43347 +       qu_function_call (&lck->fs->cq, gulm_cancel, lck);
43348 +}
43349 +
43350 +/**
43351 + * get_mount_lock -
43352 + * @fs:
43353 + * @first:
43354 + *
43355 + * Get the Firstmount lock.
43356 + * We try to grab it Exl.  IF we get that, then we are the first client
43357 + * mounting this fs.  Otherwise we grab it shared to show that there are
43358 + * clients using this fs.
43359 + *
43360 + * Returns: int
43361 + */
43362 +int
43363 +get_mount_lock (gulm_fs_t * fs, int *first)
43364 +{
43365 +       int err;
43366 +       struct timer_list locktimeout;
43367 +       gulm_lock_t *lck = NULL;
43368 +       /*
43369 +        * first we need to get the lock into the hash.
43370 +        * then we can try to get it Exl with try and noexp.
43371 +        * if the try fails, grab it shared.
43372 +        */
43373 +
43374 +       lck = get_special_lock (fs);    /* there is only a mount lock. */
43375 +       if (lck == NULL) {
43376 +               err = -ENOMEM;
43377 +               goto fail;
43378 +       }
43379 +
43380 +       fs->mountlock = lck;
43381 +      try_it_again:
43382 +       *first = FALSE;         /* assume we're not first */
43383 +
43384 +       err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_EXCLUSIVE,
43385 +                        LM_FLAG_TRY | LM_FLAG_NOEXP);
43386 +#ifndef USE_SYNC_LOCKING
43387 +       wait_for_completion (&fs->sleep);
43388 +#endif
43389 +
43390 +       if ((lck->result & LM_OUT_ST_MASK) == LM_ST_EXCLUSIVE) {
43391 +               /* we got the lock, we're the first mounter. */
43392 +               *first = TRUE;
43393 +               log_msg (lgm_locking, "fsid=%s: Got mount lock Exclusive.\n",
43394 +                        fs->fs_name);
43395 +               return 0;
43396 +       } else if ((lck->result & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
43397 +               log_msg (lgm_locking,
43398 +                        "fsid=%s: Didn't get mount lock Exl, someone else "
43399 +                        "was first, trying for shared.\n", fs->fs_name);
43400 +
43401 +               /* the try failed, pick it up shared. */
43402 +               /* There was a case (bug #220) where we could hang here.
43403 +                *
43404 +                * To handle this, we put up a timer for a couple of
43405 +                * minutes.  That if it trips, it cancels our shared
43406 +                * request.  Which we then see, so we go back and try the
43407 +                * EXL again.  If the Firstmounter is fine and is just
43408 +                * taking a damn long time to do its work, this just ends
43409 +                * back here, no worse for the wear.
43410 +                *
43411 +                * Another way to do this, is to wait for a killed message
43412 +                * for the master.  When we get that, && we're pending
43413 +                * shared here, send the gulm_canel for the mounter lock.
43414 +                * (too bad we are not in the fs list yet at this point.
43415 +                * (well, maybe that *isn't* a bad thing))
43416 +                */
43417 +               init_timer (&locktimeout);
43418 +               locktimeout.function = do_lock_time_out;
43419 +               locktimeout.data = (unsigned long) lck;
43420 +               mod_timer (&locktimeout, jiffies + (120 * HZ));
43421 +               err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_SHARED, 0);
43422 +#ifndef USE_SYNC_LOCKING
43423 +               wait_for_completion (&fs->sleep);
43424 +#endif
43425 +               del_timer (&locktimeout);
43426 +
43427 +               if ((lck->result & LM_OUT_ST_MASK) == LM_ST_SHARED) {
43428 +                       /* kewl we got it. */
43429 +                       log_msg (lgm_locking,
43430 +                                "fsid=%s: Got mount lock shared.\n",
43431 +                                fs->fs_name);
43432 +                       return 0;
43433 +               }
43434 +
43435 +               log_msg (lgm_locking,
43436 +                        "fsid=%s: Shared req timed out, trying Exl again.\n",
43437 +                        fs->fs_name);
43438 +               goto try_it_again;
43439 +       }
43440 +      fail:
43441 +       log_err ("Exit get_mount_lock err=%d\n", err);
43442 +       return err;
43443 +}
43444 +
43445 +/**
43446 + * downgrade_mount_lock -
43447 + * @fs:
43448 + *
43449 + * drop the Firstmount lock down to shared.  This lets other mount.
43450 + *
43451 + * Returns: int
43452 + */
43453 +int
43454 +downgrade_mount_lock (gulm_fs_t * fs)
43455 +{
43456 +       int err;
43457 +       gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock;
43458 +       /* we were first, so we have it exl.
43459 +        * shift it to shared so others may mount.
43460 +        */
43461 +       err = gulm_lock (lck, LM_ST_EXCLUSIVE, LM_ST_SHARED, LM_FLAG_NOEXP);
43462 +#ifndef USE_SYNC_LOCKING
43463 +       wait_for_completion (&fs->sleep);
43464 +#endif
43465 +
43466 +       if ((lck->result & LM_OUT_ST_MASK) != LM_ST_SHARED) {
43467 +               log_err
43468 +                   ("fsid=%s: Couldn't downgrade mount lock to shared!!!!!\n",
43469 +                    fs->fs_name);
43470 +       }
43471 +       return 0;
43472 +}
43473 +
43474 +/**
43475 + * drop_mount_lock - drop our hold on the firstmount lock.
43476 + * @fs: <> the filesystem pointer.
43477 + *
43478 + * Returns: int
43479 + */
43480 +int
43481 +drop_mount_lock (gulm_fs_t * fs)
43482 +{
43483 +       int err;
43484 +       gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock;
43485 +
43486 +       if (fs->mountlock == NULL) {
43487 +               log_err ("fsid=%s: There's no Mount lock!!!!!\n", fs->fs_name);
43488 +               return -1;
43489 +       }
43490 +       err = gulm_unlock (lck, LM_ST_SHARED);
43491 +#ifndef USE_SYNC_LOCKING
43492 +       wait_for_completion (&fs->sleep);
43493 +#endif
43494 +
43495 +       if (lck->result != LM_ST_UNLOCKED)
43496 +               log_err ("fsid=%s: Couldn't unlock mount lock!!!!!!\n",
43497 +                        fs->fs_name);
43498 +       gulm_put_lock (fs->mountlock);
43499 +       fs->mountlock = NULL;
43500 +       return 0;
43501 +}
43502 +
43503 +/*****************************************************************************/
43504 +struct lm_lockops gulm_ops = {
43505 +      lm_proto_name:PROTO_NAME,
43506 +      lm_mount:gulm_mount,
43507 +      lm_others_may_mount:gulm_others_may_mount,
43508 +      lm_unmount:gulm_unmount,
43509 +      lm_get_lock:gulm_get_lock,
43510 +      lm_put_lock:gulm_put_lock,
43511 +      lm_lock:gulm_lock,
43512 +      lm_unlock:gulm_unlock,
43513 +      lm_cancel:gulm_cancel,
43514 +      lm_hold_lvb:gulm_hold_lvb,
43515 +      lm_unhold_lvb:gulm_unhold_lvb,
43516 +      lm_sync_lvb:gulm_sync_lvb,
43517 +      lm_plock_get:gulm_plock_get,
43518 +      lm_plock:gulm_plock,
43519 +      lm_punlock:gulm_punlock,
43520 +      lm_recovery_done:gulm_recovery_done,
43521 +      lm_owner:THIS_MODULE,
43522 +};
43523 +/* vim: set ai cin noet sw=8 ts=8 : */
43524 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h
43525 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h   1969-12-31 18:00:00.000000000 -0600
43526 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h        2004-06-16 12:03:21.957894998 -0500
43527 @@ -0,0 +1,45 @@
43528 +/******************************************************************************
43529 +*******************************************************************************
43530 +**
43531 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
43532 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
43533 +**
43534 +**  This copyrighted material is made available to anyone wishing to use,
43535 +**  modify, copy, or redistribute it subject to the terms and conditions
43536 +**  of the GNU General Public License v.2.
43537 +**
43538 +*******************************************************************************
43539 +******************************************************************************/
43540 +
43541 +#ifndef __gulm_prints_h__
43542 +#define __gulm_prints_h__
43543 +#include "gulm_log_msg_bits.h"
43544 +
43545 +#define PROTO_NAME "lock_gulm"
43546 +
43547 +#ifdef GULM_ASSERT
43548 +#undef GULM_ASSERT
43549 +#endif
43550 +#define GULM_ASSERT(x, do) \
43551 +{ \
43552 +  if (!(x)) \
43553 +  { \
43554 +    printk("\n"PROTO_NAME":  Assertion failed on line %d of file %s\n" \
43555 +               PROTO_NAME":  assertion:  \"%s\"\n", \
43556 +               __LINE__, __FILE__, #x ); \
43557 +    {do} \
43558 +    panic("\n"PROTO_NAME":  Record message above and reboot.\n"); \
43559 +  } \
43560 +}
43561 +
43562 +#define log_msg(v, fmt, args...) if(((v)&gulm_cm.verbosity)==(v)||(v)==lgm_Always) {\
43563 +   printk(PROTO_NAME ": " fmt, ## args); \
43564 +}
43565 +#define log_err(fmt, args...) {\
43566 +   printk(KERN_ERR PROTO_NAME ": ERROR " fmt, ## args); \
43567 +}
43568 +
43569 +#define log_nop(fmt, args...)
43570 +#define TICK printk("TICK==>" PROTO_NAME ": [%s:%d] pid:%ld\n",__FILE__,__LINE__,osi_pid())
43571 +
43572 +#endif /*__gulm_prints_h__*/
43573 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.c linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.c
43574 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.c 1969-12-31 18:00:00.000000000 -0600
43575 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.c      2004-06-16 12:03:21.957894998 -0500
43576 @@ -0,0 +1,165 @@
43577 +/******************************************************************************
43578 +*******************************************************************************
43579 +**
43580 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
43581 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
43582 +**
43583 +**  This copyrighted material is made available to anyone wishing to use,
43584 +**  modify, copy, or redistribute it subject to the terms and conditions
43585 +**  of the GNU General Public License v.2.
43586 +**
43587 +*******************************************************************************
43588 +******************************************************************************/
43589 +
43590 +#include "gulm.h"
43591 +#include <linux/kernel.h>
43592 +#include <linux/proc_fs.h>
43593 +#include "util.h"
43594 +
43595 +extern gulm_cm_t gulm_cm;
43596 +
43597 +struct proc_dir_entry *gulm_proc_dir;
43598 +struct proc_dir_entry *gulm_fs_proc_dir;
43599 +
43600 +/* the read operating function. */
43601 +int
43602 +gulm_fs_proc_read (char *buf, char **start, off_t off, int count, int *eof,
43603 +                  void *data)
43604 +{
43605 +       gulm_fs_t *fs = (gulm_fs_t *) data;
43606 +       count = 0;              /* ignore how much it wants */
43607 +
43608 +       count += sprintf (buf + count, "Filesystem: %s\nJID: %d\n"
43609 +                         "handler_queue_cur: %d\n"
43610 +                         "handler_queue_max: %d\n",
43611 +                         fs->fs_name, fs->fsJID,
43612 +                         fs->cq.task_count, fs->cq.task_max);
43613 +
43614 +       *eof = TRUE;
43615 +       if (off >= count)
43616 +               return 0;
43617 +       *start = buf + off;
43618 +       return (count - off);
43619 +}
43620 +
43621 +/* read the stuff for all */
43622 +int
43623 +gulm_core_proc_read (char *buf, char **start, off_t off, int count,
43624 +                    int *eof, void *data)
43625 +{
43626 +       count = 0;              /* ignore how much it wants */
43627 +
43628 +       count = sprintf (buf,
43629 +                        "cluster id: %s\n"
43630 +                        "my name: %s\n", gulm_cm.clusterID, gulm_cm.myName);
43631 +
43632 +       *eof = TRUE;
43633 +       if (off >= count)
43634 +               return 0;
43635 +       *start = buf + off;
43636 +       return (count - off);
43637 +}
43638 +
43639 +int
43640 +gulm_lt_proc_read (char *buf, char **start, off_t off, int count,
43641 +                  int *eof, void *data)
43642 +{
43643 +       lock_table_t *lt = (lock_table_t *) data;
43644 +       count = 0;              /* ignore how much it wants */
43645 +
43646 +       count += sprintf (buf + count, "\n"
43647 +                         "lock counts:\n"
43648 +                         "  total: %d\n"
43649 +                         "    unl: %d\n"
43650 +                         "    exl: %d\n"
43651 +                         "    shd: %d\n"
43652 +                         "    dfr: %d\n"
43653 +                         "pending: %d\n"
43654 +                         "   lvbs: %d\n"
43655 +                         "   lops: %d\n\n",
43656 +                         lt->locks_total,
43657 +                         lt->locks_unl,
43658 +                         lt->locks_exl,
43659 +                         lt->locks_shd,
43660 +                         lt->locks_dfr,
43661 +                         atomic_read (&lt->locks_pending),
43662 +                         lt->locks_lvbs, lt->lops);
43663 +
43664 +       *eof = TRUE;
43665 +       if (off >= count)
43666 +               return 0;
43667 +       *start = buf + off;
43668 +       return (count - off);
43669 +}
43670 +
43671 +/* add entry to our proc folder
43672 + * call this on mount.
43673 + * */
43674 +int
43675 +add_to_proc (gulm_fs_t * fs)
43676 +{
43677 +       if (!(create_proc_read_entry (fs->fs_name, S_IFREG | S_IRUGO,
43678 +                                     gulm_fs_proc_dir, gulm_fs_proc_read,
43679 +                                     (void *) fs))) {
43680 +               log_err ("couldn't register proc entry for %s\n", fs->fs_name);
43681 +               return -EINVAL;
43682 +       }
43683 +       return 0;
43684 +}
43685 +
43686 +/* get rid of it
43687 + * this on umount.
43688 + * */
43689 +void
43690 +remove_from_proc (gulm_fs_t * fs)
43691 +{
43692 +       remove_proc_entry (fs->fs_name, gulm_fs_proc_dir);
43693 +}
43694 +
43695 + /* create our own root dir.
43696 +  * initmodule
43697 +  * */
43698 +int
43699 +init_proc_dir (void)
43700 +{
43701 +       if ((gulm_proc_dir = proc_mkdir ("gulm", &proc_root)) == NULL) {
43702 +               log_err ("cannot create the gulm directory in /proc\n");
43703 +               return -EINVAL;
43704 +       }
43705 +       if (!(create_proc_read_entry ("core", S_IFREG | S_IRUGO, gulm_proc_dir,
43706 +                                     gulm_core_proc_read, NULL))) {
43707 +               log_err ("couldn't register proc entry for core\n");
43708 +               remove_proc_entry ("gulm", &proc_root);
43709 +               return -EINVAL;
43710 +       }
43711 +       if ((gulm_fs_proc_dir =
43712 +            proc_mkdir ("filesystems", gulm_proc_dir)) == NULL) {
43713 +               log_err
43714 +                   ("cannot create the filesystems directory in /proc/gulm\n");
43715 +               remove_proc_entry ("core", gulm_proc_dir);
43716 +               remove_proc_entry ("gulm", &proc_root);
43717 +               return -EINVAL;
43718 +       }
43719 +       if (!(create_proc_read_entry ("lockspace", S_IFREG | S_IRUGO,
43720 +                                     gulm_proc_dir, gulm_lt_proc_read,
43721 +                                     (void *) &gulm_cm.ltpx))) {
43722 +               remove_proc_entry ("filesystems", gulm_proc_dir);
43723 +               remove_proc_entry ("core", gulm_proc_dir);
43724 +               remove_proc_entry ("gulm", &proc_root);
43725 +               return -EINVAL;
43726 +       }
43727 +
43728 +       return 0;
43729 +}
43730 +
43731 +/* destroy it
43732 + * close module
43733 + * */
43734 +void
43735 +remove_proc_dir (void)
43736 +{
43737 +       remove_proc_entry ("lockspace", gulm_proc_dir);
43738 +       remove_proc_entry ("filesystems", gulm_proc_dir);
43739 +       remove_proc_entry ("core", gulm_proc_dir);
43740 +       remove_proc_entry ("gulm", &proc_root);
43741 +}
43742 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.h linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.h
43743 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.h 1969-12-31 18:00:00.000000000 -0600
43744 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.h      2004-06-16 12:03:21.958894765 -0500
43745 @@ -0,0 +1,22 @@
43746 +/******************************************************************************
43747 +*******************************************************************************
43748 +**
43749 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
43750 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
43751 +**
43752 +**  This copyrighted material is made available to anyone wishing to use,
43753 +**  modify, copy, or redistribute it subject to the terms and conditions
43754 +**  of the GNU General Public License v.2.
43755 +**
43756 +*******************************************************************************
43757 +******************************************************************************/
43758 +
43759 +#ifndef __procinfo_h__
43760 +#define __procinfo_h__
43761 +int add_to_proc (gulm_fs_t * fs);
43762 +void remove_from_proc (gulm_fs_t * fs);
43763 +void remove_locktables_from_proc (void);
43764 +void add_locktables_to_proc (void);
43765 +int init_proc_dir (void);
43766 +void remove_proc_dir (void);
43767 +#endif /*__procinfo_h__*/
43768 diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.c linux-patched/fs/gfs_locking/lock_gulm/handler.c
43769 --- linux-orig/fs/gfs_locking/lock_gulm/handler.c       1969-12-31 18:00:00.000000000 -0600
43770 +++ linux-patched/fs/gfs_locking/lock_gulm/handler.c    2004-06-16 12:03:21.958894765 -0500
43771 @@ -0,0 +1,343 @@
43772 +/******************************************************************************
43773 +*******************************************************************************
43774 +**
43775 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
43776 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
43777 +**
43778 +**  This copyrighted material is made available to anyone wishing to use,
43779 +**  modify, copy, or redistribute it subject to the terms and conditions
43780 +**  of the GNU General Public License v.2.
43781 +**
43782 +*******************************************************************************
43783 +******************************************************************************/
43784 +
43785 +#include "gulm.h"
43786 +
43787 +#include <linux/kernel.h>
43788 +#include <linux/slab.h>
43789 +#include <linux/fs.h>
43790 +#include <linux/smp_lock.h>
43791 +#define __KERNEL_SYSCALLS__
43792 +#include <linux/unistd.h>
43793 +
43794 +#include "handler.h"
43795 +
43796 +/* things about myself
43797 + * mostly just for verbosity here.
43798 + * */
43799 +extern gulm_cm_t gulm_cm;
43800 +
43801 +/* the task struct */
43802 +typedef struct runtask_s {
43803 +       struct list_head rt_list;
43804 +
43805 +       gulm_fn fn;
43806 +       lm_callback_t cb;
43807 +       lm_fsdata_t *fsdata;
43808 +       int type;
43809 +       uint64_t lmnum;
43810 +       unsigned int lmtype;
43811 +       int result;
43812 +
43813 +} runtask_t;
43814 +/* ooo crufty. */
43815 +#define LM_CB_GULM_FN 169
43816 +#if LM_CB_GULM_FN == LM_CB_NEED_E || \
43817 +    LM_CB_GULM_FN == LM_CB_NEED_D || \
43818 +    LM_CB_GULM_FN == LM_CB_NEED_S || \
43819 +    LM_CB_GULM_FN == LM_CB_NEED_RECOVERY || \
43820 +    LM_CB_GULM_FN == LM_CB_DROPLOCKS || \
43821 +    LM_CB_GULM_FN == LM_CB_ASYNC
43822 +#error "LM_CB_GULM_FN collision with other LM_CB_*"
43823 +#endif
43824 +
43825 +static __inline__ int
43826 +queue_empty (callback_qu_t * cq)
43827 +{
43828 +       int ret;
43829 +       spin_lock (&cq->list_lock);
43830 +       ret = list_empty (&cq->run_tasks);
43831 +       spin_unlock (&cq->list_lock);
43832 +       return ret;
43833 +}
43834 +
43835 +/**
43836 + * handler -
43837 + * @d:
43838 + *
43839 + *
43840 + * Returns: int
43841 + */
43842 +int
43843 +handler (void *d)
43844 +{
43845 +       callback_qu_t *cq = (callback_qu_t *) d;
43846 +       runtask_t *rt;
43847 +       struct list_head *tmp;
43848 +       struct lm_lockname lockname;
43849 +       struct lm_async_cb acb;
43850 +
43851 +       daemonize ("gulm_Cb_Handler");
43852 +       atomic_inc (&cq->num_threads);
43853 +       complete (&cq->startup);
43854 +
43855 +       while (cq->running) {
43856 +               do {
43857 +                       DECLARE_WAITQUEUE (__wait_chan, current);
43858 +                       current->state = TASK_INTERRUPTIBLE;
43859 +                       add_wait_queue (&cq->waiter, &__wait_chan);
43860 +                       if (queue_empty (cq))
43861 +                               schedule ();
43862 +                       remove_wait_queue (&cq->waiter, &__wait_chan);
43863 +                       current->state = TASK_RUNNING;
43864 +               } while (0);
43865 +
43866 +               if (!cq->running)
43867 +                       break;
43868 +               /* remove item from list */
43869 +               spin_lock (&cq->list_lock);
43870 +               if (list_empty (&cq->run_tasks)) {
43871 +                       spin_unlock (&cq->list_lock);
43872 +                       continue;       /* nothing here. move on */
43873 +               }
43874 +               /* take items off the end of the list, since we add them to the
43875 +                * beginning.
43876 +                */
43877 +               tmp = (&cq->run_tasks)->prev;
43878 +               list_del (tmp);
43879 +               cq->task_count--;
43880 +               spin_unlock (&cq->list_lock);
43881 +
43882 +               rt = list_entry (tmp, runtask_t, rt_list);
43883 +
43884 +               if (rt->type == LM_CB_ASYNC) {
43885 +                       acb.lc_name.ln_number = rt->lmnum;
43886 +                       acb.lc_name.ln_type = rt->lmtype;
43887 +                       acb.lc_ret = rt->result;
43888 +                       rt->cb (rt->fsdata, rt->type, &acb);
43889 +               } else if (rt->type == LM_CB_GULM_FN) {
43890 +                       rt->fn (rt->fsdata);
43891 +               } else {
43892 +                       lockname.ln_number = rt->lmnum;
43893 +                       lockname.ln_type = rt->lmtype;
43894 +                       rt->cb (rt->fsdata, rt->type, &lockname);
43895 +               }
43896 +
43897 +               kfree (rt);
43898 +
43899 +       }                       /*while(running) */
43900 +
43901 +       atomic_dec (&cq->num_threads);
43902 +       complete (&cq->startup);
43903 +       return 0;
43904 +}
43905 +
43906 +/**
43907 + * display_handler_queue -
43908 + * @cq:
43909 + *
43910 + * remember, items are added to the head, and removed from the tail.
43911 + * So the last item listed, is the next item to be handled.
43912 + *
43913 + */
43914 +void
43915 +display_handler_queue (callback_qu_t * cq)
43916 +{
43917 +       struct list_head *lltmp;
43918 +       runtask_t *rt;
43919 +       int i = 0;
43920 +       log_msg (lgm_Always, "Dumping Handler queue with %d items, max %d\n",
43921 +                cq->task_count, cq->task_max);
43922 +       spin_lock (&cq->list_lock);
43923 +       list_for_each (lltmp, &cq->run_tasks) {
43924 +               rt = list_entry (lltmp, runtask_t, rt_list);
43925 +               if (rt->type == LM_CB_ASYNC) {
43926 +                       log_msg (lgm_Always,
43927 +                                "%4d ASYNC    (%" PRIu64 ", %u) result:%#x\n",
43928 +                                i, rt->lmnum, rt->lmtype, rt->result);
43929 +               } else if (rt->type == LM_CB_GULM_FN) {
43930 +                       log_msg (lgm_Always, "%4d GULM FN  func:%p data:%p\n",
43931 +                                i, rt->fn, rt->fsdata);
43932 +               } else {        /* callback. */
43933 +                       log_msg (lgm_Always,
43934 +                                "%4d CALLBACK req:%u (%" PRIu64 ", %u)\n", i,
43935 +                                rt->type, rt->lmnum, rt->lmtype);
43936 +               }
43937 +               i++;
43938 +       }
43939 +       spin_unlock (&cq->list_lock);
43940 +}
43941 +
43942 +/**
43943 + * alloc_runtask -
43944 + * Returns: runtask_t
43945 + */
43946 +runtask_t *
43947 +alloc_runtask (void)
43948 +{
43949 +       runtask_t *rt;
43950 +       rt = kmalloc (sizeof (runtask_t), GFP_KERNEL);
43951 +       return rt;
43952 +}
43953 +
43954 +/**
43955 + * qu_function_call -
43956 + * @cq:
43957 + * @fn:
43958 + * @data:
43959 + *
43960 + * Generic function execing on the handler thread.  Mostly so I can add
43961 + * single things quick without having to build all the details into the
43962 + * handler queues.
43963 + *
43964 + * Returns: int
43965 + */
43966 +int
43967 +qu_function_call (callback_qu_t * cq, gulm_fn fn, void *data)
43968 +{
43969 +       runtask_t *rt;
43970 +       rt = alloc_runtask ();
43971 +       if (rt == NULL)
43972 +               return -ENOMEM;
43973 +       rt->cb = NULL;
43974 +       rt->fn = fn;
43975 +       rt->fsdata = data;
43976 +       rt->type = LM_CB_GULM_FN;
43977 +       rt->lmtype = 0;
43978 +       rt->lmnum = 0;
43979 +       rt->result = 0;
43980 +       INIT_LIST_HEAD (&rt->rt_list);
43981 +       spin_lock (&cq->list_lock);
43982 +       list_add (&rt->rt_list, &cq->run_tasks);
43983 +       cq->task_count++;
43984 +       if (cq->task_count > cq->task_max)
43985 +               cq->task_max = cq->task_count;
43986 +       spin_unlock (&cq->list_lock);
43987 +       wake_up (&cq->waiter);
43988 +       return 0;
43989 +}
43990 +
43991 +/**
43992 + * qu_async_rpl -
43993 + * @cq:
43994 + * @cb:
43995 + * @fsdata:
43996 + * @lockname:
43997 + * @result:
43998 + *
43999 + *
44000 + * Returns: int
44001 + */
44002 +int
44003 +qu_async_rpl (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44004 +             struct lm_lockname *lockname, int result)
44005 +{
44006 +       runtask_t *rt;
44007 +       rt = alloc_runtask ();
44008 +       if (rt == NULL)
44009 +               return -ENOMEM;
44010 +       rt->cb = cb;
44011 +       rt->fsdata = fsdata;
44012 +       rt->type = LM_CB_ASYNC;
44013 +       rt->lmtype = lockname->ln_type;
44014 +       rt->lmnum = lockname->ln_number;
44015 +       rt->result = result;
44016 +       INIT_LIST_HEAD (&rt->rt_list);
44017 +       spin_lock (&cq->list_lock);
44018 +       list_add (&rt->rt_list, &cq->run_tasks);
44019 +       cq->task_count++;
44020 +       if (cq->task_count > cq->task_max)
44021 +               cq->task_max = cq->task_count;
44022 +       spin_unlock (&cq->list_lock);
44023 +       wake_up (&cq->waiter);
44024 +       return 0;
44025 +}
44026 +
44027 +/**
44028 + * qu_drop_req -
44029 + *
44030 + * Returns: <0:Error; =0:Ok
44031 + */
44032 +int
44033 +qu_drop_req (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44034 +            int type, uint8_t lmtype, uint64_t lmnum)
44035 +{
44036 +       runtask_t *rt;
44037 +       rt = alloc_runtask ();
44038 +       if (rt == NULL)
44039 +               return -ENOMEM;
44040 +       rt->cb = cb;
44041 +       rt->fsdata = fsdata;
44042 +       rt->type = type;
44043 +       rt->lmtype = lmtype;
44044 +       rt->lmnum = lmnum;
44045 +       rt->result = 0;
44046 +       INIT_LIST_HEAD (&rt->rt_list);
44047 +       spin_lock (&cq->list_lock);
44048 +       list_add (&rt->rt_list, &cq->run_tasks);
44049 +       cq->task_count++;
44050 +       if (cq->task_count > cq->task_max)
44051 +               cq->task_max = cq->task_count;
44052 +       spin_unlock (&cq->list_lock);
44053 +       wake_up (&cq->waiter);
44054 +       return 0;
44055 +}
44056 +
44057 +/**
44058 + * stop_callback_qu - stop the handler thread
44059 + */
44060 +void
44061 +stop_callback_qu (callback_qu_t * cq)
44062 +{
44063 +       struct list_head *lltmp, *tmp;
44064 +       runtask_t *rt;
44065 +
44066 +       if (cq->running) {
44067 +               cq->running = FALSE;
44068 +               /* make sure all thread stop.
44069 +                * */
44070 +               while (atomic_read (&cq->num_threads) > 0) {
44071 +                       wake_up (&cq->waiter);
44072 +                       wait_for_completion (&cq->startup);
44073 +               }
44074 +               /* clear out any left overs. */
44075 +               list_for_each_safe (tmp, lltmp, &cq->run_tasks) {
44076 +                       rt = list_entry (tmp, runtask_t, rt_list);
44077 +                       list_del (tmp);
44078 +                       kfree (rt);
44079 +               }
44080 +       }
44081 +}
44082 +
44083 +/**
44084 + * start_callback_qu -
44085 + *
44086 + * Returns: <0:Error, >=0:Ok
44087 + */
44088 +int
44089 +start_callback_qu (callback_qu_t * cq, int cnt)
44090 +{
44091 +       int err;
44092 +       INIT_LIST_HEAD (&cq->run_tasks);
44093 +       spin_lock_init (&cq->list_lock);
44094 +       init_completion (&cq->startup);
44095 +       init_waitqueue_head (&cq->waiter);
44096 +       atomic_set (&cq->num_threads, 0);
44097 +       cq->running = TRUE;
44098 +       cq->task_count = 0;
44099 +       cq->task_max = 0;
44100 +       if (cnt <= 0)
44101 +               cnt = 2;
44102 +       for (; cnt > 0; cnt--) {
44103 +               err = kernel_thread (handler, cq, 0);   /* XXX linux part */
44104 +               if (err < 0) {
44105 +                       stop_callback_qu (cq);
44106 +                       /* calling stop here might not behave correctly in all error
44107 +                        * cases.
44108 +                        */
44109 +                       return err;
44110 +               }
44111 +               wait_for_completion (&cq->startup);
44112 +       }
44113 +       return 0;
44114 +}
44115 diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.h linux-patched/fs/gfs_locking/lock_gulm/handler.h
44116 --- linux-orig/fs/gfs_locking/lock_gulm/handler.h       1969-12-31 18:00:00.000000000 -0600
44117 +++ linux-patched/fs/gfs_locking/lock_gulm/handler.h    2004-06-16 12:03:21.958894765 -0500
44118 @@ -0,0 +1,42 @@
44119 +/******************************************************************************
44120 +*******************************************************************************
44121 +**
44122 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
44123 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
44124 +**
44125 +**  This copyrighted material is made available to anyone wishing to use,
44126 +**  modify, copy, or redistribute it subject to the terms and conditions
44127 +**  of the GNU General Public License v.2.
44128 +**
44129 +*******************************************************************************
44130 +******************************************************************************/
44131 +
44132 +#ifndef __handler_c__
44133 +#define __handler_c__
44134 +#include <linux/lm_interface.h>
44135 +
44136 +struct callback_qu_s {
44137 +       struct completion startup;
44138 +       int running;
44139 +       int task_count;
44140 +       int task_max;
44141 +       struct list_head run_tasks;
44142 +       spinlock_t list_lock;
44143 +       wait_queue_head_t waiter;
44144 +       atomic_t num_threads;
44145 +};
44146 +typedef struct callback_qu_s callback_qu_t;
44147 +
44148 +/* kinda an excess overloading */
44149 +typedef void (*gulm_fn) (void *);
44150 +int qu_function_call (callback_qu_t * cq, gulm_fn fn, void *data);
44151 +
44152 +int qu_async_rpl (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44153 +                 struct lm_lockname *lockname, int result);
44154 +int qu_drop_req (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44155 +                int type, uint8_t lmtype, uint64_t lmnum);
44156 +int start_callback_qu (callback_qu_t * cq, int cnt);
44157 +void stop_callback_qu (callback_qu_t * cq);
44158 +void display_handler_queue (callback_qu_t * cq);
44159 +
44160 +#endif /*__handler_c__*/
44161 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_core.c linux-patched/fs/gfs_locking/lock_gulm/lg_core.c
44162 --- linux-orig/fs/gfs_locking/lock_gulm/lg_core.c       1969-12-31 18:00:00.000000000 -0600
44163 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_core.c    2004-06-16 12:03:21.958894765 -0500
44164 @@ -0,0 +1,724 @@
44165 +/******************************************************************************
44166 +*******************************************************************************
44167 +**
44168 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
44169 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
44170 +**
44171 +**  This copyrighted material is made available to anyone wishing to use,
44172 +**  modify, copy, or redistribute it subject to the terms and conditions
44173 +**  of the GNU General Public License v.2.
44174 +**
44175 +*******************************************************************************
44176 +******************************************************************************/
44177 +
44178 +/* All of the core related functions for services are here. */
44179 +
44180 +#include "lg_priv.h"
44181 +
44182 +/**
44183 + * lg_core_selector -
44184 + * @ulm_interface_p:
44185 + *
44186 + *
44187 + * Returns: int
44188 + */
44189 +xdr_socket
44190 +lg_core_selector (gulm_interface_p lgp)
44191 +{
44192 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44193 +       /* make sure it is a gulm_interface_p. */
44194 +       if (lg == NULL || lg->first_magic != LGMAGIC
44195 +           || lg->last_magic != LGMAGIC)
44196 +#ifdef __KERNEL__
44197 +               return NULL;
44198 +#else
44199 +               return -EINVAL;
44200 +#endif
44201 +
44202 +       return lg->core_fd;
44203 +}
44204 +
44205 +/**
44206 + * lg_core_handle_messages -
44207 + * @ulm_interface_p:
44208 + * @lg_core_callbacks_t:
44209 + *
44210 + *
44211 + * Returns: int
44212 + */
44213 +int
44214 +lg_core_handle_messages (gulm_interface_p lgp, lg_core_callbacks_t * ccbp,
44215 +                        void *misc)
44216 +{
44217 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44218 +       xdr_dec_t *dec;
44219 +       int err = 0;
44220 +       uint64_t x_gen;
44221 +       uint32_t x_code, x_error, x_rank;
44222 +       struct in6_addr x_ip;
44223 +       uint8_t x_state, x_mode;
44224 +
44225 +       /* make sure it is a gulm_interface_p. */
44226 +       if (lg == NULL)
44227 +               return -EINVAL;
44228 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44229 +               return -EINVAL;
44230 +
44231 +       if (lg->core_enc == NULL || lg->core_dec == NULL)
44232 +               return -EBADR;
44233 +
44234 +       down (&lg->core_recver);
44235 +       if (lg->in_core_hm)
44236 +               return -EDEADLK;
44237 +       lg->in_core_hm = TRUE;
44238 +       up (&lg->core_recver);
44239 +
44240 +       dec = lg->core_dec;
44241 +
44242 +       err = xdr_dec_uint32 (dec, &x_code);
44243 +       if (err != 0)
44244 +               goto exit;
44245 +
44246 +       if (gulm_core_login_rpl == x_code) {
44247 +               do {
44248 +                       if ((err = xdr_dec_uint64 (dec, &x_gen)) < 0)
44249 +                               break;
44250 +                       if ((err = xdr_dec_uint32 (dec, &x_error)) < 0)
44251 +                               break;
44252 +                       if ((err = xdr_dec_uint32 (dec, &x_rank)) < 0)
44253 +                               break;
44254 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) < 0)
44255 +                               break;
44256 +               } while (0);
44257 +               if (err != 0)
44258 +                       goto exit;
44259 +               if (ccbp->login_reply == NULL) {
44260 +                       err = 0;
44261 +                       goto exit;
44262 +               }
44263 +               err = ccbp->login_reply (misc, x_gen, x_error, x_rank, x_state);
44264 +               goto exit;
44265 +       } else if (gulm_core_logout_rpl == x_code) {
44266 +               if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
44267 +                       goto exit;
44268 +               if (ccbp->logout_reply != NULL) {
44269 +                       err = ccbp->logout_reply (misc);
44270 +               }
44271 +
44272 +               xdr_close (&lg->core_fd);
44273 +               xdr_enc_release (lg->core_enc);
44274 +               lg->core_enc = NULL;
44275 +               xdr_dec_release (lg->core_dec);
44276 +               lg->core_dec = NULL;
44277 +
44278 +               goto exit;
44279 +       } else if (gulm_core_mbr_lstrpl == x_code) {
44280 +               if (ccbp->nodelist != NULL) {
44281 +                       err = ccbp->nodelist (misc, lglcb_start, NULL, 0, 0);
44282 +                       if (err != 0)
44283 +                               goto exit;
44284 +               }
44285 +               do {
44286 +                       if ((err = xdr_dec_list_start (dec)) != 0)
44287 +                               break;
44288 +                       while (xdr_dec_list_stop (dec) != 0) {
44289 +                               if ((err =
44290 +                                    xdr_dec_string_ag (dec, &lg->cfba,
44291 +                                                       &lg->cfba_len)) != 0)
44292 +                                       break;
44293 +                               if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
44294 +                                       break;
44295 +                               if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44296 +                                       break;
44297 +                               if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0)
44298 +                                       break;
44299 +                               if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0)
44300 +                                       break;
44301 +                               if ((err = xdr_dec_uint32 (dec, &x_rank)) != 0)
44302 +                                       break;
44303 +                               if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0)
44304 +                                       break;
44305 +                               if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0)
44306 +                                       break;
44307 +                               if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0)
44308 +                                       break;
44309 +
44310 +                               if (ccbp->nodelist != NULL) {
44311 +                                       err =
44312 +                                           ccbp->nodelist (misc, lglcb_item,
44313 +                                                           lg->cfba, &x_ip,
44314 +                                                           x_state);
44315 +                                       if (err != 0)
44316 +                                               goto exit;
44317 +                               }
44318 +
44319 +                       }
44320 +               } while (0);
44321 +               if (err != 0) {
44322 +                       goto exit;
44323 +               }
44324 +               if (ccbp->nodelist == NULL) {
44325 +                       err = 0;
44326 +                       goto exit;
44327 +               }
44328 +               err = ccbp->nodelist (misc, lglcb_stop, NULL, 0, 0);
44329 +               goto exit;
44330 +       } else if (gulm_core_state_chgs == x_code) {
44331 +               do {
44332 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44333 +                               break;
44334 +                       if (x_state == gio_Mbr_ama_Slave) {
44335 +                               if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
44336 +                                       break;
44337 +                               if ((err =
44338 +                                    xdr_dec_string_ag (dec, &lg->cfba,
44339 +                                                       &lg->cfba_len)) != 0)
44340 +                                       break;
44341 +                       }
44342 +               } while (0);
44343 +               if (err != 0) {
44344 +                       goto exit;
44345 +               }
44346 +               if (ccbp->statechange == NULL) {
44347 +                       err = 0;
44348 +                       goto exit;
44349 +               }
44350 +               err = ccbp->statechange (misc, x_state, &x_ip, lg->cfba);
44351 +               goto exit;
44352 +       } else if (gulm_core_mbr_updt == x_code) {
44353 +               do {
44354 +                       if ((err =
44355 +                            xdr_dec_string_ag (dec, &lg->cfba,
44356 +                                               &lg->cfba_len)) != 0)
44357 +                               break;
44358 +                       if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
44359 +                               break;
44360 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44361 +                               break;
44362 +               } while (0);
44363 +               if (err != 0) {
44364 +                       goto exit;
44365 +               }
44366 +               if (ccbp->nodechange == NULL) {
44367 +                       err = 0;
44368 +                       goto exit;
44369 +               }
44370 +               err = ccbp->nodechange (misc, lg->cfba, &x_ip, x_state);
44371 +               goto exit;
44372 +       } else if (gulm_core_res_list == x_code) {
44373 +               if (ccbp->service_list != NULL) {
44374 +                       if ((err =
44375 +                            ccbp->service_list (misc, lglcb_start, NULL)) != 0)
44376 +                               goto exit;
44377 +               }
44378 +               do {
44379 +                       if ((err = xdr_dec_list_start (dec)) != 0)
44380 +                               break;
44381 +                       while (xdr_dec_list_stop (dec)) {
44382 +                               if ((err =
44383 +                                    xdr_dec_string_ag (dec, &lg->cfba,
44384 +                                                       &lg->cfba_len)) != 0)
44385 +                                       break;
44386 +                               if (ccbp->service_list != NULL) {
44387 +                                       if ((err =
44388 +                                            ccbp->service_list (misc,
44389 +                                                                lglcb_item,
44390 +                                                                lg->cfba)) !=
44391 +                                           0) {
44392 +                                               goto exit;
44393 +                                       }
44394 +                               }
44395 +                       }
44396 +               } while (0);
44397 +               if (err != 0) {
44398 +                       goto exit;
44399 +               }
44400 +               if (ccbp->service_list == NULL) {
44401 +                       err = 0;
44402 +                       goto exit;
44403 +               }
44404 +               err = ccbp->service_list (misc, lglcb_stop, NULL);
44405 +               goto exit;
44406 +       } else if (gulm_info_stats_rpl == x_code) {
44407 +               if (ccbp->status != NULL) {
44408 +                       if ((err =
44409 +                            ccbp->status (misc, lglcb_start, NULL, NULL)) != 0)
44410 +                               goto exit;
44411 +               }
44412 +               do {
44413 +                       if ((err = xdr_dec_list_start (dec)) != 0)
44414 +                               break;
44415 +                       while (xdr_dec_list_stop (dec) != 0) {
44416 +                               if ((err =
44417 +                                    xdr_dec_string_ag (dec, &lg->cfba,
44418 +                                                       &lg->cfba_len)) != 0)
44419 +                                       break;
44420 +                               if ((err =
44421 +                                    xdr_dec_string_ag (dec, &lg->cfbb,
44422 +                                                       &lg->cfbb_len)) != 0)
44423 +                                       break;
44424 +                               if (ccbp->status != NULL) {
44425 +                                       if ((err =
44426 +                                            ccbp->status (misc, lglcb_item,
44427 +                                                          lg->cfba,
44428 +                                                          lg->cfbb)) != 0) {
44429 +                                               goto exit;
44430 +                                       }
44431 +                               }
44432 +                       }
44433 +               } while (0);
44434 +               if (err != 0) {
44435 +                       goto exit;
44436 +               }
44437 +               if (ccbp->status == NULL) {
44438 +                       err = 0;
44439 +                       goto exit;
44440 +               }
44441 +               err = ccbp->status (misc, lglcb_stop, NULL, NULL);
44442 +               goto exit;
44443 +       } else if (gulm_err_reply == x_code) {
44444 +               if ((err = xdr_dec_uint32 (dec, &x_code)) != 0)
44445 +                       goto exit;
44446 +               if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
44447 +                       goto exit;
44448 +               if (ccbp->error == NULL) {
44449 +                       err = 0;
44450 +                       goto exit;
44451 +               }
44452 +               err = ccbp->error (misc, x_error);
44453 +               goto exit;
44454 +       } else {
44455 +               /* unknown code. what to do? */
44456 +               err = -EPROTO;
44457 +               goto exit;
44458 +       }
44459 +
44460 +      exit:
44461 +       lg->in_core_hm = FALSE;
44462 +       return err;
44463 +}
44464 +
44465 +/**
44466 + * lg_core_login -
44467 + * @lgp:
44468 + * @important:
44469 + *
44470 + * On any error, things are closed and released to the state of things
44471 + * before you called login.
44472 + *
44473 + * Returns: int
44474 + */
44475 +int
44476 +lg_core_login (gulm_interface_p lgp, int important)
44477 +{
44478 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44479 +       struct sockaddr_in6 adr;
44480 +       int err;
44481 +       xdr_socket cfd;
44482 +       xdr_enc_t *enc;
44483 +       xdr_dec_t *dec;
44484 +
44485 +       /* make sure it is a gulm_interface_p. */
44486 +       if (lg == NULL)
44487 +               return -EINVAL;
44488 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44489 +               return -EINVAL;
44490 +
44491 +       adr.sin6_family = AF_INET6;
44492 +       adr.sin6_addr = in6addr_loopback;
44493 +       adr.sin6_port = htons (lg->core_port);
44494 +
44495 +       if ((err = xdr_open (&cfd)) < 0) {
44496 +               return err;
44497 +       }
44498 +
44499 +       if ((err = xdr_connect (&adr, cfd)) < 0) {
44500 +               xdr_close (&cfd);
44501 +               return err;
44502 +       }
44503 +
44504 +       enc = xdr_enc_init (cfd, 128);
44505 +       if (enc == NULL) {
44506 +               xdr_close (&cfd);
44507 +               return -ENOMEM;
44508 +       }
44509 +
44510 +       dec = xdr_dec_init (cfd, 128);
44511 +       if (enc == NULL) {
44512 +               xdr_enc_release (enc);
44513 +               xdr_close (&cfd);
44514 +               return -ENOMEM;
44515 +       }
44516 +
44517 +       do {
44518 +               if ((err = xdr_enc_uint32 (enc, gulm_core_reslgn_req)) < 0)
44519 +                       break;
44520 +               if ((err = xdr_enc_uint32 (enc, GIO_WIREPROT_VERS)) < 0)
44521 +                       break;
44522 +               if ((err = xdr_enc_string (enc, lg->clusterID)) < 0)
44523 +                       break;
44524 +               if ((err = xdr_enc_string (enc, lg->service_name)) < 0)
44525 +                       break;
44526 +               if ((err =
44527 +                    xdr_enc_uint32 (enc,
44528 +                                    important ? gulm_svc_opt_important : 0)) !=
44529 +                   0)
44530 +                       break;
44531 +               if ((err = xdr_enc_flush (enc)) < 0)
44532 +                       break;
44533 +       } while (0);
44534 +       if (err != 0) {
44535 +               xdr_dec_release (dec);
44536 +               xdr_enc_release (enc);
44537 +               xdr_close (&cfd);
44538 +               return err;
44539 +       }
44540 +
44541 +       down (&lg->core_sender);
44542 +       lg->core_fd = cfd;
44543 +       lg->core_enc = enc;
44544 +       lg->core_dec = dec;
44545 +       up (&lg->core_sender);
44546 +
44547 +       return 0;
44548 +}
44549 +
44550 +/**
44551 + * lg_core_logout -
44552 + * @lgp:
44553 + *
44554 + *
44555 + * Returns: int
44556 + */
44557 +int
44558 +lg_core_logout (gulm_interface_p lgp)
44559 +{
44560 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44561 +       xdr_enc_t *enc;
44562 +       int err;
44563 +
44564 +       /* make sure it is a gulm_interface_p. */
44565 +       if (lg == NULL)
44566 +               return -EINVAL;
44567 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44568 +               return -EINVAL;
44569 +
44570 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44571 +               return -EINVAL;
44572 +
44573 +       enc = lg->core_enc;
44574 +
44575 +       down (&lg->core_sender);
44576 +       do {
44577 +               if ((err = xdr_enc_uint32 (enc, gulm_core_logout_req)) != 0)
44578 +                       break;
44579 +               if ((err = xdr_enc_string (enc, lg->service_name)) != 0)
44580 +                       break;
44581 +               if ((err = xdr_enc_uint8 (enc, gio_Mbr_ama_Resource)) != 0)
44582 +                       break;
44583 +               if ((err = xdr_enc_flush (enc)) != 0)
44584 +                       break;
44585 +       } while (0);
44586 +       up (&lg->core_sender);
44587 +       return err;
44588 +}
44589 +
44590 +/**
44591 + * lg_core_nodeinfo -
44592 + * @lgp:
44593 + * @nodename:
44594 + *
44595 + *
44596 + * Returns: int
44597 + */
44598 +int
44599 +lg_core_nodeinfo (gulm_interface_p lgp, char *nodename)
44600 +{
44601 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44602 +       xdr_enc_t *enc;
44603 +       int err;
44604 +
44605 +       /* make sure it is a gulm_interface_p. */
44606 +       if (lg == NULL)
44607 +               return -EINVAL;
44608 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44609 +               return -EINVAL;
44610 +
44611 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44612 +               return -EINVAL;
44613 +
44614 +       if (nodename == NULL)
44615 +               return -EINVAL;
44616 +
44617 +       enc = lg->core_enc;
44618 +
44619 +       down (&lg->core_sender);
44620 +       do {
44621 +               if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_req)) != 0)
44622 +                       break;
44623 +               if ((err = xdr_enc_string (enc, nodename)) != 0)
44624 +                       break;
44625 +               if ((err = xdr_enc_flush (enc)) != 0)
44626 +                       break;
44627 +       } while (0);
44628 +       up (&lg->core_sender);
44629 +       return err;
44630 +}
44631 +
44632 +/**
44633 + * lg_core_nodelist -
44634 + * @lgp:
44635 + *
44636 + *
44637 + * Returns: int
44638 + */
44639 +int
44640 +lg_core_nodelist (gulm_interface_p lgp)
44641 +{
44642 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44643 +       xdr_enc_t *enc;
44644 +       int err;
44645 +
44646 +       /* make sure it is a gulm_interface_p. */
44647 +       if (lg == NULL)
44648 +               return -EINVAL;
44649 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44650 +               return -EINVAL;
44651 +
44652 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44653 +               return -EINVAL;
44654 +
44655 +       enc = lg->core_enc;
44656 +
44657 +       down (&lg->core_sender);
44658 +       do {
44659 +               if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_lstreq)) != 0)
44660 +                       break;
44661 +               if ((err = xdr_enc_flush (enc)) != 0)
44662 +                       break;
44663 +       } while (0);
44664 +       up (&lg->core_sender);
44665 +       return err;
44666 +}
44667 +
44668 +/**
44669 + * lg_core_servicelist -
44670 + * @lgp:
44671 + *
44672 + *
44673 + * Returns: int
44674 + */
44675 +int
44676 +lg_core_servicelist (gulm_interface_p lgp)
44677 +{
44678 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44679 +       xdr_enc_t *enc;
44680 +       int err;
44681 +
44682 +       /* make sure it is a gulm_interface_p. */
44683 +       if (lg == NULL)
44684 +               return -EINVAL;
44685 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44686 +               return -EINVAL;
44687 +
44688 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44689 +               return -EINVAL;
44690 +
44691 +       enc = lg->core_enc;
44692 +
44693 +       down (&lg->core_sender);
44694 +       do {
44695 +               if ((err = xdr_enc_uint32 (enc, gulm_core_res_req)) != 0)
44696 +                       break;
44697 +               if ((err = xdr_enc_flush (enc)) != 0)
44698 +                       break;
44699 +       } while (0);
44700 +       up (&lg->core_sender);
44701 +       return err;
44702 +}
44703 +
44704 +/**
44705 + * lg_core_corestate -
44706 + * @lgp:
44707 + *
44708 + *
44709 + * Returns: int
44710 + */
44711 +int
44712 +lg_core_corestate (gulm_interface_p lgp)
44713 +{
44714 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44715 +       xdr_enc_t *enc;
44716 +       int err;
44717 +
44718 +       /* make sure it is a gulm_interface_p. */
44719 +       if (lg == NULL)
44720 +               return -EINVAL;
44721 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44722 +               return -EINVAL;
44723 +
44724 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44725 +               return -EINVAL;
44726 +
44727 +       enc = lg->core_enc;
44728 +
44729 +       down (&lg->core_sender);
44730 +       do {
44731 +               if ((err = xdr_enc_uint32 (enc, gulm_core_state_req)) != 0)
44732 +                       break;
44733 +               if ((err = xdr_enc_flush (enc)) != 0)
44734 +                       break;
44735 +       } while (0);
44736 +       up (&lg->core_sender);
44737 +       return err;
44738 +}
44739 +
44740 +/**
44741 + * lg_core_shutdown -
44742 + * @lgp:
44743 + *
44744 + *
44745 + * Returns: int
44746 + */
44747 +int
44748 +lg_core_shutdown (gulm_interface_p lgp)
44749 +{
44750 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44751 +       xdr_enc_t *enc;
44752 +       int err;
44753 +
44754 +       /* make sure it is a gulm_interface_p. */
44755 +       if (lg == NULL)
44756 +               return -EINVAL;
44757 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44758 +               return -EINVAL;
44759 +
44760 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44761 +               return -EINVAL;
44762 +
44763 +       enc = lg->core_enc;
44764 +
44765 +       down (&lg->core_sender);
44766 +       do {
44767 +               if ((err = xdr_enc_uint32 (enc, gulm_core_shutdown)) != 0)
44768 +                       break;
44769 +               if ((err = xdr_enc_flush (enc)) != 0)
44770 +                       break;
44771 +       } while (0);
44772 +       up (&lg->core_sender);
44773 +       return err;
44774 +}
44775 +
44776 +/**
44777 + * lg_core_forceexpire -
44778 + * @lgp:
44779 + * @node_name:
44780 + *
44781 + *
44782 + * Returns: int
44783 + */
44784 +int
44785 +lg_core_forceexpire (gulm_interface_p lgp, char *nodename)
44786 +{
44787 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44788 +       xdr_enc_t *enc;
44789 +       int err;
44790 +
44791 +       /* make sure it is a gulm_interface_p. */
44792 +       if (lg == NULL)
44793 +               return -EINVAL;
44794 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44795 +               return -EINVAL;
44796 +
44797 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44798 +               return -EINVAL;
44799 +
44800 +       if (nodename == NULL)
44801 +               return -EINVAL;
44802 +
44803 +       enc = lg->core_enc;
44804 +
44805 +       down (&lg->core_sender);
44806 +       do {
44807 +               if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_force)) != 0)
44808 +                       break;
44809 +               if ((err = xdr_enc_string (enc, nodename)) != 0)
44810 +                       break;
44811 +               if ((err = xdr_enc_flush (enc)) != 0)
44812 +                       break;
44813 +       } while (0);
44814 +       up (&lg->core_sender);
44815 +       return err;
44816 +}
44817 +
44818 +/**
44819 + * lg_core_forcepending -
44820 + * @lgp:
44821 + *
44822 + *
44823 + * Returns: int
44824 + */
44825 +int
44826 +lg_core_forcepending (gulm_interface_p lgp)
44827 +{
44828 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44829 +       xdr_enc_t *enc;
44830 +       int err;
44831 +
44832 +       /* make sure it is a gulm_interface_p. */
44833 +       if (lg == NULL)
44834 +               return -EINVAL;
44835 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44836 +               return -EINVAL;
44837 +
44838 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44839 +               return -EINVAL;
44840 +
44841 +       enc = lg->core_enc;
44842 +
44843 +       down (&lg->core_sender);
44844 +       do {
44845 +               if ((err = xdr_enc_uint32 (enc, gulm_core_forcepend)) != 0)
44846 +                       break;
44847 +               if ((err = xdr_enc_flush (enc)) != 0)
44848 +                       break;
44849 +       } while (0);
44850 +       up (&lg->core_sender);
44851 +       return err;
44852 +}
44853 +
44854 +/**
44855 + * lg_core_status -
44856 + * @lgp:
44857 + *
44858 + *
44859 + * Returns: int
44860 + */
44861 +int
44862 +lg_core_status (gulm_interface_p lgp)
44863 +{
44864 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44865 +       xdr_enc_t *enc;
44866 +       int err;
44867 +
44868 +       /* make sure it is a gulm_interface_p. */
44869 +       if (lg == NULL)
44870 +               return -EINVAL;
44871 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44872 +               return -EINVAL;
44873 +
44874 +       if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44875 +               return -EINVAL;
44876 +
44877 +       enc = lg->core_enc;
44878 +
44879 +       down (&lg->core_sender);
44880 +       do {
44881 +               if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0)
44882 +                       break;
44883 +               if ((err = xdr_enc_flush (enc)) != 0)
44884 +                       break;
44885 +       } while (0);
44886 +       up (&lg->core_sender);
44887 +       return err;
44888 +}
44889 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c
44890 --- linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c       1969-12-31 18:00:00.000000000 -0600
44891 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c    2004-06-16 12:03:21.958894765 -0500
44892 @@ -0,0 +1,667 @@
44893 +/******************************************************************************
44894 +*******************************************************************************
44895 +**
44896 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
44897 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
44898 +**
44899 +**  This copyrighted material is made available to anyone wishing to use,
44900 +**  modify, copy, or redistribute it subject to the terms and conditions
44901 +**  of the GNU General Public License v.2.
44902 +**
44903 +*******************************************************************************
44904 +******************************************************************************/
44905 +
44906 +/* all of the lock related fucntion are here. */
44907 +#include "lg_priv.h"
44908 +
44909 +/**
44910 + * lg_lock_selector -
44911 + * @ulm_interface_p:
44912 + *
44913 + *
44914 + * Returns: int
44915 + */
44916 +xdr_socket
44917 +lg_lock_selector (gulm_interface_p lgp)
44918 +{
44919 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44920 +       /* make sure it is a gulm_interface_p. */
44921 +       if (lg == NULL || lg->first_magic != LGMAGIC
44922 +           || lg->last_magic != LGMAGIC)
44923 +#ifdef __KERNEL__
44924 +               return NULL;
44925 +#else
44926 +               return -EINVAL;
44927 +#endif
44928 +
44929 +       return lg->lock_fd;
44930 +}
44931 +
44932 +/**
44933 + * lg_lock_handle_messages -
44934 + * @ulm_interface_p:
44935 + * @lg_lockspace_callbacks_t:
44936 + *
44937 + * Returns: int
44938 + */
44939 +int
44940 +lg_lock_handle_messages (gulm_interface_p lgp, lg_lockspace_callbacks_t * cbp,
44941 +                        void *misc)
44942 +{
44943 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
44944 +       xdr_dec_t *dec;
44945 +       int err = 0;
44946 +       uint32_t x_code, x_error, x_flags;
44947 +       uint16_t x_keylen, x_lvblen = 0;
44948 +       uint8_t x_state;
44949 +
44950 +       /* make sure it is a gulm_interface_p. */
44951 +       if (lg == NULL)
44952 +               return -EINVAL;
44953 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44954 +               return -EINVAL;
44955 +
44956 +       if (lg->core_enc == NULL || lg->core_dec == NULL)
44957 +               return -EBADR;
44958 +
44959 +       down (&lg->lock_recver);
44960 +       if (lg->in_lock_hm)
44961 +               return -EDEADLK;
44962 +       lg->in_lock_hm = TRUE;
44963 +       up (&lg->lock_recver);
44964 +
44965 +       dec = lg->lock_dec;
44966 +
44967 +       err = xdr_dec_uint32 (dec, &x_code);
44968 +       if (err != 0)
44969 +               goto exit;
44970 +
44971 +       if (gulm_lock_login_rpl == x_code) {
44972 +               do {
44973 +                       if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
44974 +                               break;
44975 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44976 +                               break;
44977 +               } while (0);
44978 +               if (err != 0)
44979 +                       goto exit;
44980 +               if (cbp->login_reply == NULL) {
44981 +                       err = 0;
44982 +                       goto exit;
44983 +               }
44984 +               err = cbp->login_reply (misc, x_error, x_state);
44985 +               goto exit;
44986 +       } else if (gulm_lock_logout_rpl == x_code) {
44987 +               if (cbp->logout_reply != NULL) {
44988 +                       err = cbp->logout_reply (misc);
44989 +               }
44990 +
44991 +               xdr_close (&lg->lock_fd);
44992 +               xdr_enc_release (lg->lock_enc);
44993 +               lg->lock_enc = NULL;
44994 +               xdr_dec_release (lg->lock_dec);
44995 +               lg->lock_dec = NULL;
44996 +
44997 +               goto exit;
44998 +       } else if (gulm_lock_state_rpl == x_code) {
44999 +               do {
45000 +                       if ((err =
45001 +                            xdr_dec_raw_ag (dec, (void **) &lg->lfba,
45002 +                                            &lg->lfba_len, &x_keylen)) != 0)
45003 +                               break;
45004 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
45005 +                               break;
45006 +                       if ((err = xdr_dec_uint32 (dec, &x_flags)) != 0)
45007 +                               break;
45008 +                       if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
45009 +                               break;
45010 +                       if (x_flags & gio_lck_fg_hasLVB) {
45011 +                               if ((err =
45012 +                                    xdr_dec_raw_ag (dec, (void **) &lg->lfbb,
45013 +                                                    &lg->lfbb_len,
45014 +                                                    &x_lvblen)) != 0)
45015 +                                       break;
45016 +                       }
45017 +               } while (0);
45018 +               if (err != 0) {
45019 +                       goto exit;
45020 +               }
45021 +               if (x_keylen <= 4) {
45022 +                       err = -EPROTO;  /* or something */
45023 +                       goto exit;
45024 +               }
45025 +               if (cbp->lock_state == NULL) {
45026 +                       err = 0;
45027 +                       goto exit;
45028 +               }
45029 +               err = cbp->lock_state (misc, &lg->lfba[4], x_keylen - 4,
45030 +                                      x_state, x_flags, x_error,
45031 +                                      lg->lfbb, x_lvblen);
45032 +               goto exit;
45033 +       } else if (gulm_lock_action_rpl == x_code) {
45034 +               do {
45035 +                       if ((err =
45036 +                            xdr_dec_raw_ag (dec, (void **) &lg->lfba,
45037 +                                            &lg->lfba_len, &x_keylen)) != 0)
45038 +                               break;
45039 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
45040 +                               break;
45041 +                       if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
45042 +                               break;
45043 +               } while (0);
45044 +               if (err != 0) {
45045 +                       goto exit;
45046 +               }
45047 +               if (x_keylen <= 4) {
45048 +                       err = -EPROTO;  /* or something */
45049 +                       goto exit;
45050 +               }
45051 +               if (cbp->lock_action == NULL) {
45052 +                       err = 0;
45053 +                       goto exit;
45054 +               }
45055 +               err =
45056 +                   cbp->lock_action (misc, &lg->lfba[4], x_keylen - 4, x_state,
45057 +                                     x_error);
45058 +               goto exit;
45059 +       } else if (gulm_lock_cb_state == x_code) {
45060 +               do {
45061 +                       if ((err =
45062 +                            xdr_dec_raw_ag (dec, (void **) &lg->lfba,
45063 +                                            &lg->lfba_len, &x_keylen)) != 0)
45064 +                               break;
45065 +                       if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
45066 +                               break;
45067 +               } while (0);
45068 +               if (err != 0) {
45069 +                       goto exit;
45070 +               }
45071 +               if (cbp->drop_lock_req == NULL) {
45072 +                       err = 0;
45073 +                       goto exit;
45074 +               }
45075 +               err =
45076 +                   cbp->drop_lock_req (misc, &lg->lfba[4], x_keylen - 4,
45077 +                                       x_state);
45078 +               goto exit;
45079 +       } else if (gulm_lock_cb_dropall == x_code) {
45080 +               if (cbp->drop_all == NULL) {
45081 +                       err = 0;
45082 +                       goto exit;
45083 +               }
45084 +               err = cbp->drop_all (misc);
45085 +               goto exit;
45086 +       } else if (gulm_info_stats_rpl == x_code) {
45087 +               if (cbp->status != NULL) {
45088 +                       if ((err =
45089 +                            cbp->status (misc, lglcb_start, NULL, NULL)) != 0)
45090 +                               goto exit;
45091 +               }
45092 +               do {
45093 +                       if ((err = xdr_dec_list_start (dec)) != 0)
45094 +                               break;
45095 +                       while (xdr_dec_list_stop (dec) != 0) {
45096 +                               if ((err =
45097 +                                    xdr_dec_string_ag (dec, &lg->lfba,
45098 +                                                       &lg->lfba_len)) != 0)
45099 +                                       break;
45100 +                               if ((err =
45101 +                                    xdr_dec_string_ag (dec, &lg->lfbb,
45102 +                                                       &lg->lfbb_len)) != 0)
45103 +                                       break;
45104 +                               if (cbp->status != NULL) {
45105 +                                       if ((err =
45106 +                                            cbp->status (misc, lglcb_item,
45107 +                                                         lg->lfba,
45108 +                                                         lg->lfbb)) != 0) {
45109 +                                               break;
45110 +                                       }
45111 +                               }
45112 +                       }
45113 +               } while (0);
45114 +               if (err != 0) {
45115 +                       goto exit;
45116 +               }
45117 +               if (cbp->status == NULL) {
45118 +                       err = 0;
45119 +                       goto exit;
45120 +               }
45121 +               err = cbp->status (misc, lglcb_stop, NULL, NULL);
45122 +               goto exit;
45123 +       } else if (gulm_err_reply == x_code) {
45124 +               do {
45125 +                       if ((err = xdr_dec_uint32 (dec, &x_code)) != 0)
45126 +                               break;
45127 +                       if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
45128 +                               break;
45129 +               } while (0);
45130 +               if (err != 0)
45131 +                       goto exit;
45132 +               if (cbp->error == NULL) {
45133 +                       err = 0;
45134 +                       goto exit;
45135 +               }
45136 +               err = cbp->error (misc, x_error);
45137 +               goto exit;
45138 +       } else {
45139 +               err = -EPROTO;
45140 +               goto exit;
45141 +       }
45142 +
45143 +      exit:
45144 +       lg->in_lock_hm = FALSE;
45145 +       return err;
45146 +}
45147 +
45148 +/**
45149 + * lg_lock_login -
45150 + * @ulm_interface_p:
45151 + * @4:
45152 + *
45153 + *
45154 + * Returns: int
45155 + */
45156 +int
45157 +lg_lock_login (gulm_interface_p lgp, uint8_t lockspace[4])
45158 +{
45159 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45160 +       struct sockaddr_in6 adr;
45161 +       int err;
45162 +       xdr_socket cfd;
45163 +       xdr_enc_t *enc;
45164 +       xdr_dec_t *dec;
45165 +
45166 +       /* make sure it is a gulm_interface_p. */
45167 +       if (lg == NULL)
45168 +               return -EINVAL;
45169 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45170 +               return -EINVAL;
45171 +
45172 +       adr.sin6_family = AF_INET6;
45173 +       adr.sin6_addr = in6addr_loopback;
45174 +       adr.sin6_port = htons (lg->lock_port);
45175 +
45176 +       if ((err = xdr_open (&cfd)) < 0) {
45177 +               return err;
45178 +       }
45179 +
45180 +       if ((err = xdr_connect (&adr, cfd)) < 0) {
45181 +               xdr_close (&cfd);
45182 +               return err;
45183 +       }
45184 +
45185 +       enc = xdr_enc_init (cfd, 512);
45186 +       if (enc == NULL) {
45187 +               xdr_close (&cfd);
45188 +               return -ENOMEM;
45189 +       }
45190 +
45191 +       dec = xdr_dec_init (cfd, 512);
45192 +       if (enc == NULL) {
45193 +               xdr_enc_release (enc);
45194 +               xdr_close (&cfd);
45195 +               return -ENOMEM;
45196 +       }
45197 +
45198 +       do {
45199 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_login_req)) < 0)
45200 +                       break;
45201 +               if ((err = xdr_enc_uint32 (enc, GIO_WIREPROT_VERS)) < 0)
45202 +                       break;
45203 +               if ((err = xdr_enc_string (enc, lg->service_name)) < 0)
45204 +                       break;
45205 +               if ((err = xdr_enc_uint8 (enc, gio_lck_st_Client)) < 0)
45206 +                       break;
45207 +               if ((err = xdr_enc_flush (enc)) < 0)
45208 +                       break;
45209 +
45210 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_sel_lckspc)) < 0)
45211 +                       break;
45212 +               if ((err = xdr_enc_raw (enc, lockspace, 4)) < 0)
45213 +                       break;
45214 +               /* don't flush here.
45215 +                * dumb programmer stunt.  This way, the lockspace selection won't
45216 +                * happen until the next thing the user of this lib sends.  Which
45217 +                * means it will be after we have received the login reply.
45218 +                *
45219 +                * Is there really a good reason not to flush here?
45220 +                */
45221 +       } while (0);
45222 +       if (err != 0) {
45223 +               xdr_dec_release (dec);
45224 +               xdr_enc_release (enc);
45225 +               xdr_close (&cfd);
45226 +               return err;
45227 +       }
45228 +
45229 +       down (&lg->lock_sender);
45230 +       lg->lock_fd = cfd;
45231 +       lg->lock_enc = enc;
45232 +       lg->lock_dec = dec;
45233 +
45234 +       memcpy (lg->lockspace, lockspace, 4);
45235 +       up (&lg->lock_sender);
45236 +
45237 +       return 0;
45238 +}
45239 +
45240 +/**
45241 + * lg_lock_logout -
45242 + * @ulm_interface_p:
45243 + *
45244 + *
45245 + * Returns: int
45246 + */
45247 +int
45248 +lg_lock_logout (gulm_interface_p lgp)
45249 +{
45250 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45251 +       xdr_enc_t *enc;
45252 +       int err;
45253 +
45254 +       /* make sure it is a gulm_interface_p. */
45255 +       if (lg == NULL)
45256 +               return -EINVAL;
45257 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45258 +               return -EINVAL;
45259 +
45260 +       if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45261 +               return -EINVAL;
45262 +
45263 +       enc = lg->lock_enc;
45264 +
45265 +       down (&lg->lock_sender);
45266 +       do {
45267 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_logout_req)) != 0)
45268 +                       break;
45269 +               if ((err = xdr_enc_flush (enc)) != 0)
45270 +                       break;
45271 +       } while (0);
45272 +       up (&lg->lock_sender);
45273 +       return err;
45274 +}
45275 +
45276 +/**
45277 + * lg_lock_state_req -
45278 + * @lgp:
45279 + * @key:
45280 + * @keylen:
45281 + * @state:
45282 + * @flags:
45283 + * @LVB:
45284 + * @LVBlen:
45285 + *
45286 + *
45287 + * Returns: int
45288 + */
45289 +int
45290 +lg_lock_state_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
45291 +                  uint8_t state, uint32_t flags, uint8_t * LVB,
45292 +                  uint16_t LVBlen)
45293 +{
45294 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45295 +       struct iovec iov[2];
45296 +       xdr_enc_t *enc;
45297 +       uint32_t iflgs = 0;
45298 +       int err;
45299 +
45300 +       /* make sure it is a gulm_interface_p. */
45301 +       if (lg == NULL)
45302 +               return -EINVAL;
45303 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45304 +               return -EINVAL;
45305 +
45306 +       if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45307 +               return -EINVAL;
45308 +
45309 +       if (state != lg_lock_state_Unlock &&
45310 +           state != lg_lock_state_Exclusive &&
45311 +           state != lg_lock_state_Deferred && state != lg_lock_state_Shared)
45312 +               return -EINVAL;
45313 +
45314 +       /* make sure only the accepted flags get passed through. */
45315 +       if ((flags & lg_lock_flag_DoCB) == lg_lock_flag_DoCB)
45316 +               iflgs |= lg_lock_flag_DoCB;
45317 +       if ((flags & lg_lock_flag_Try) == lg_lock_flag_Try)
45318 +               iflgs |= lg_lock_flag_Try;
45319 +       if ((flags & lg_lock_flag_Any) == lg_lock_flag_Any)
45320 +               iflgs |= lg_lock_flag_Any;
45321 +       if ((flags & lg_lock_flag_IgnoreExp) == lg_lock_flag_IgnoreExp)
45322 +               iflgs |= lg_lock_flag_IgnoreExp;
45323 +       if ((flags & lg_lock_flag_Piority) == lg_lock_flag_Piority)
45324 +               iflgs |= lg_lock_flag_Piority;
45325 +
45326 +       enc = lg->lock_enc;
45327 +
45328 +       if (LVB != NULL && LVBlen > 0)
45329 +               iflgs |= gio_lck_fg_hasLVB;
45330 +
45331 +       iov[0].iov_base = lg->lockspace;
45332 +       iov[0].iov_len = 4;
45333 +       iov[1].iov_base = key;
45334 +       iov[1].iov_len = keylen;
45335 +
45336 +       down (&lg->lock_sender);
45337 +       do {
45338 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_state_req)) != 0)
45339 +                       break;
45340 +               if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45341 +                       break;
45342 +               if ((err = xdr_enc_uint8 (enc, state)) != 0)
45343 +                       break;
45344 +               if ((err = xdr_enc_uint32 (enc, iflgs)) != 0)
45345 +                       break;
45346 +               if (iflgs & gio_lck_fg_hasLVB)
45347 +                       if ((err = xdr_enc_raw (enc, LVB, LVBlen)) != 0)
45348 +                               break;
45349 +               if ((err = xdr_enc_flush (enc)) != 0)
45350 +                       break;
45351 +       } while (0);
45352 +       up (&lg->lock_sender);
45353 +       return err;
45354 +}
45355 +
45356 +/**
45357 + * lg_lock_cancel_req -
45358 + * @lgp:
45359 + * @key:
45360 + * @keylen:
45361 + *
45362 + *
45363 + * Returns: int
45364 + */
45365 +int
45366 +lg_lock_cancel_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen)
45367 +{
45368 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45369 +       struct iovec iov[2];
45370 +       xdr_enc_t *enc;
45371 +       int err;
45372 +
45373 +       /* make sure it is a gulm_interface_p. */
45374 +       if (lg == NULL)
45375 +               return -EINVAL;
45376 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45377 +               return -EINVAL;
45378 +
45379 +       if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45380 +               return -EINVAL;
45381 +
45382 +       enc = lg->lock_enc;
45383 +
45384 +       iov[0].iov_base = lg->lockspace;
45385 +       iov[0].iov_len = 4;
45386 +       iov[1].iov_base = key;
45387 +       iov[1].iov_len = keylen;
45388 +
45389 +       down (&lg->lock_sender);
45390 +       do {
45391 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_action_req)) != 0)
45392 +                       break;
45393 +               if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45394 +                       break;
45395 +               if ((err = xdr_enc_uint8 (enc, gio_lck_st_Cancel)) != 0)
45396 +                       break;
45397 +               if ((err = xdr_enc_flush (enc)) != 0)
45398 +                       break;
45399 +       } while (0);
45400 +       up (&lg->lock_sender);
45401 +       return err;
45402 +}
45403 +
45404 +/**
45405 + * lg_lock_action_req -
45406 + * @lgp:
45407 + * @key:
45408 + * @keylen:
45409 + * @action:
45410 + * @LVB:
45411 + * @LVBlen:
45412 + *
45413 + * XXX
45414 + * I wonder if I should actually break this into three seperate calls for
45415 + * the lvb stuff.  Does it really matter?
45416 + *
45417 + * Returns: int
45418 + */
45419 +int
45420 +lg_lock_action_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
45421 +                   uint8_t action, uint8_t * LVB, uint16_t LVBlen)
45422 +{
45423 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45424 +       struct iovec iov[2];
45425 +       xdr_enc_t *enc;
45426 +       int err;
45427 +
45428 +       /* make sure it is a gulm_interface_p. */
45429 +       if (lg == NULL)
45430 +               return -EINVAL;
45431 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45432 +               return -EINVAL;
45433 +
45434 +       if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45435 +               return -EINVAL;
45436 +
45437 +       if (action != lg_lock_act_HoldLVB &&
45438 +           action != lg_lock_act_UnHoldLVB && action != lg_lock_act_SyncLVB)
45439 +               return -EINVAL;
45440 +
45441 +       enc = lg->lock_enc;
45442 +
45443 +       iov[0].iov_base = lg->lockspace;
45444 +       iov[0].iov_len = 4;
45445 +       iov[1].iov_base = key;
45446 +       iov[1].iov_len = keylen;
45447 +
45448 +       down (&lg->lock_sender);
45449 +       do {
45450 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_action_req)) != 0)
45451 +                       break;
45452 +               if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45453 +                       break;
45454 +               if ((err = xdr_enc_uint8 (enc, action)) != 0)
45455 +                       break;
45456 +               if (action == gio_lck_st_SyncLVB)
45457 +                       if ((err = xdr_enc_raw (enc, LVB, LVBlen)) != 0)
45458 +                               break;
45459 +               if ((err = xdr_enc_flush (enc)) != 0)
45460 +                       break;
45461 +       } while (0);
45462 +       up (&lg->lock_sender);
45463 +       return err;
45464 +}
45465 +
45466 +/**
45467 + * lg_lock_drop_exp -
45468 + * @ulm_interface_p:
45469 + * @holder:
45470 + * @keymask:
45471 + * @kmlen:
45472 + *
45473 + * holder is the node name of the expired holder that you want to clear.
45474 + * Only locks matching the keymask will be looked at. (most of the time you
45475 + * will just set key to a bunch of 0xff to match all) The keymask lets you
45476 + * basically subdivide your lockspace into smaller seperate parts.
45477 + * (example, there is one gfs lockspace, but each filesystem gets its own
45478 + * subpart of that larger space)
45479 + *
45480 + * If holder is NULL, all expired holders in your lockspace will get
45481 + * dropped.
45482 + *
45483 + * Returns: int
45484 + */
45485 +int
45486 +lg_lock_drop_exp (gulm_interface_p lgp, uint8_t * holder, uint8_t * key,
45487 +                 uint16_t keylen)
45488 +{
45489 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45490 +       struct iovec iov[2];
45491 +       xdr_enc_t *enc;
45492 +       int err;
45493 +
45494 +       /* make sure it is a gulm_interface_p. */
45495 +       if (lg == NULL)
45496 +               return -EINVAL;
45497 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45498 +               return -EINVAL;
45499 +
45500 +       if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45501 +               return -EINVAL;
45502 +
45503 +       enc = lg->lock_enc;
45504 +
45505 +       iov[0].iov_base = lg->lockspace;
45506 +       iov[0].iov_len = 4;
45507 +       iov[1].iov_base = key;
45508 +       iov[1].iov_len = (key != NULL) ? keylen : 0;
45509 +
45510 +       down (&lg->lock_sender);
45511 +       do {
45512 +               if ((err = xdr_enc_uint32 (enc, gulm_lock_drop_exp)) != 0)
45513 +                       break;
45514 +               if ((err = xdr_enc_string (enc, holder)) != 0)
45515 +                       break;
45516 +               if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45517 +                       break;
45518 +               if ((err = xdr_enc_flush (enc)) != 0)
45519 +                       break;
45520 +       } while (0);
45521 +       up (&lg->lock_sender);
45522 +       return err;
45523 +}
45524 +
45525 +/**
45526 + * lg_lock_status -
45527 + * @lgp:
45528 + *
45529 + *
45530 + * Returns: int
45531 + */
45532 +int
45533 +lg_lock_status (gulm_interface_p lgp)
45534 +{
45535 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45536 +       xdr_enc_t *enc;
45537 +       int err;
45538 +
45539 +       /* make sure it is a gulm_interface_p. */
45540 +       if (lg == NULL)
45541 +               return -EINVAL;
45542 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45543 +               return -EINVAL;
45544 +
45545 +       if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45546 +               return -EINVAL;
45547 +
45548 +       enc = lg->lock_enc;
45549 +
45550 +       down (&lg->lock_sender);
45551 +       do {
45552 +               if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0)
45553 +                       break;
45554 +               if ((err = xdr_enc_flush (enc)) != 0)
45555 +                       break;
45556 +       } while (0);
45557 +       up (&lg->lock_sender);
45558 +       return err;
45559 +}
45560 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_main.c linux-patched/fs/gfs_locking/lock_gulm/lg_main.c
45561 --- linux-orig/fs/gfs_locking/lock_gulm/lg_main.c       1969-12-31 18:00:00.000000000 -0600
45562 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_main.c    2004-06-16 12:03:21.958894765 -0500
45563 @@ -0,0 +1,209 @@
45564 +/******************************************************************************
45565 +*******************************************************************************
45566 +**
45567 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
45568 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
45569 +**
45570 +**  This copyrighted material is made available to anyone wishing to use,
45571 +**  modify, copy, or redistribute it subject to the terms and conditions
45572 +**  of the GNU General Public License v.2.
45573 +**
45574 +*******************************************************************************
45575 +******************************************************************************/
45576 +
45577 +/* This is where all of the library specific functions exist.
45578 + * Not many, but keeps things clean.
45579 + */
45580 +
45581 +#include "lg_priv.h"
45582 +#include "gulm.h"
45583 +extern gulm_cm_t gulm_cm;
45584 +
45585 +/**
45586 + * lg_initialize -
45587 + * @gulm_interface_p:
45588 + * @cluster_name:
45589 + * @service_name:
45590 + *
45591 + * if returning an error, nothing was done to the value of gulm_interface_p
45592 + *
45593 + * Returns: gulm_interface_p
45594 + */
45595 +int
45596 +lg_initialize (gulm_interface_p * ret, char *cluster_name, char *service_name)
45597 +{
45598 +       gulm_interface_t *lg;
45599 +       int err, len;
45600 +
45601 +       lg = kmalloc (sizeof (gulm_interface_t), GFP_KERNEL);
45602 +       if (lg == NULL)
45603 +               return -ENOMEM;
45604 +
45605 +       memset (lg, 0, sizeof (gulm_interface_t));
45606 +       lg->first_magic = LGMAGIC;
45607 +       lg->last_magic = LGMAGIC;
45608 +
45609 +       if (cluster_name == NULL)
45610 +               cluster_name = "cluster";
45611 +       len = strlen (cluster_name) + 1;
45612 +       lg->clusterID = kmalloc (len, GFP_KERNEL);
45613 +       if (lg->clusterID == NULL) {
45614 +               err = -ENOMEM;
45615 +               goto fail_nomem;
45616 +       }
45617 +       memcpy (lg->clusterID, cluster_name, len);
45618 +
45619 +       len = strlen (service_name) + 1;
45620 +       lg->service_name = kmalloc (len, GFP_KERNEL);
45621 +       if (lg->service_name == NULL) {
45622 +               err = -ENOMEM;
45623 +               goto fail_nomem;
45624 +       }
45625 +       memcpy (lg->service_name, service_name, len);
45626 +
45627 +       /* set up flutter bufs. */
45628 +       lg->cfba_len = 64;
45629 +       lg->cfba = kmalloc (lg->cfba_len, GFP_KERNEL);
45630 +       if (lg->cfba == NULL) {
45631 +               err = -ENOMEM;
45632 +               goto fail_nomem;
45633 +       }
45634 +
45635 +       lg->cfbb_len = 64;
45636 +       lg->cfbb = kmalloc (lg->cfbb_len, GFP_KERNEL);
45637 +       if (lg->cfbb == NULL) {
45638 +               err = -ENOMEM;
45639 +               goto fail_nomem;
45640 +       }
45641 +
45642 +       lg->lfba_len = 128;
45643 +       lg->lfba = kmalloc (lg->lfba_len, GFP_KERNEL);
45644 +       if (lg->lfba == NULL) {
45645 +               err = -ENOMEM;
45646 +               goto fail_nomem;
45647 +       }
45648 +
45649 +       lg->lfbb_len = 128;
45650 +       lg->lfbb = kmalloc (lg->lfbb_len, GFP_KERNEL);
45651 +       if (lg->lfbb == NULL) {
45652 +               err = -ENOMEM;
45653 +               goto fail_nomem;
45654 +       }
45655 +
45656 +       /* setup mutexes */
45657 +       init_MUTEX (&lg->core_sender);
45658 +       init_MUTEX (&lg->core_recver);
45659 +       init_MUTEX (&lg->lock_sender);
45660 +       init_MUTEX (&lg->lock_recver);
45661 +
45662 +       lg->core_port = 40040;
45663 +       lg->lock_port = 40042;
45664 +
45665 +       *ret = lg;
45666 +       return 0;
45667 +      fail_nomem:
45668 +       if (lg->clusterID != NULL)
45669 +               kfree (lg->clusterID);
45670 +       if (lg->service_name != NULL)
45671 +               kfree (lg->service_name);
45672 +       if (lg->cfba != NULL)
45673 +               kfree (lg->cfba);
45674 +       if (lg->cfbb != NULL)
45675 +               kfree (lg->cfbb);
45676 +       if (lg->lfba != NULL)
45677 +               kfree (lg->lfba);
45678 +       if (lg->lfbb != NULL)
45679 +               kfree (lg->lfbb);
45680 +       kfree (lg);
45681 +       return err;
45682 +}
45683 +
45684 +/**
45685 + * lg_release -
45686 + * @lg:
45687 + *
45688 + */
45689 +void
45690 +lg_release (gulm_interface_p lgp)
45691 +{
45692 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45693 +       if (lgp == NULL)
45694 +               return;
45695 +       /* make sure it is a gulm_interface_p. */
45696 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45697 +               return;
45698 +
45699 +       if (lg->service_name != NULL)
45700 +               kfree (lg->service_name);
45701 +       if (lg->clusterID != NULL)
45702 +               kfree (lg->clusterID);
45703 +
45704 +       /* wonder if I should send a logout packet? */
45705 +       if (lg->core_enc != NULL)
45706 +               xdr_enc_release (lg->core_enc);
45707 +       if (lg->core_dec != NULL)
45708 +               xdr_dec_release (lg->core_dec);
45709 +       xdr_close (&lg->core_fd);
45710 +
45711 +       if (lg->lock_enc != NULL)
45712 +               xdr_enc_release (lg->lock_enc);
45713 +       if (lg->lock_dec != NULL)
45714 +               xdr_dec_release (lg->lock_dec);
45715 +       xdr_close (&lg->lock_fd);
45716 +
45717 +       if (lg->cfba != NULL)
45718 +               kfree (lg->cfba);
45719 +       if (lg->cfbb != NULL)
45720 +               kfree (lg->cfbb);
45721 +       if (lg->lfba != NULL)
45722 +               kfree (lg->lfba);
45723 +       if (lg->lfbb != NULL)
45724 +               kfree (lg->lfbb);
45725 +
45726 +       kfree (lg);
45727 +}
45728 +
45729 +/**
45730 + * lg_set_core_port -
45731 + * @lgp:
45732 + * @new:
45733 + *
45734 + *
45735 + * Returns: int
45736 + */
45737 +int
45738 +lg_set_core_port (gulm_interface_p lgp, uint16_t new)
45739 +{
45740 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45741 +       if (lgp == NULL)
45742 +               return -EINVAL;
45743 +       /* make sure it is a gulm_interface_p. */
45744 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45745 +               return -EINVAL;
45746 +
45747 +       lg->core_port = new;
45748 +       return 0;
45749 +}
45750 +
45751 +/**
45752 + * lg_set_ltpx_port -
45753 + * @lgp:
45754 + * @new:
45755 + *
45756 + *
45757 + * Returns: int
45758 + */
45759 +int
45760 +lg_set_lock_port (gulm_interface_p lgp, uint16_t new)
45761 +{
45762 +       gulm_interface_t *lg = (gulm_interface_t *) lgp;
45763 +       if (lgp == NULL)
45764 +               return -EINVAL;
45765 +       /* make sure it is a gulm_interface_p. */
45766 +       if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45767 +               return -EINVAL;
45768 +
45769 +       lg->lock_port = new;
45770 +
45771 +       return 0;
45772 +}
45773 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h
45774 --- linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h       1969-12-31 18:00:00.000000000 -0600
45775 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h    2004-06-16 12:03:21.958894765 -0500
45776 @@ -0,0 +1,86 @@
45777 +/******************************************************************************
45778 +*******************************************************************************
45779 +**
45780 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
45781 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
45782 +**
45783 +**  This copyrighted material is made available to anyone wishing to use,
45784 +**  modify, copy, or redistribute it subject to the terms and conditions
45785 +**  of the GNU General Public License v.2.
45786 +**
45787 +*******************************************************************************
45788 +******************************************************************************/
45789 +
45790 +#ifndef __lg_priv_h__
45791 +#define __lg_priv_h__
45792 +/* private details that we don't want to give the users of this lib access
45793 + * to go here.
45794 + */
45795 +
45796 +#ifdef __linux__
45797 +#include <linux/kernel.h>
45798 +#include <linux/sched.h>
45799 +#define __KERNEL_SYSCALLS__
45800 +#include <linux/unistd.h>
45801 +#endif /*__linux__*/
45802 +
45803 +#include "xdr.h"
45804 +#include "gio_wiretypes.h"
45805 +#include "libgulm.h"
45806 +
45807 +#define LGMAGIC (0x474d4354)
45808 +
45809 +struct gulm_interface_s {
45810 +       /* since we've masked this to a void* to the users, it is a nice safty
45811 +        * net to put a little magic in here so we know things stay good.
45812 +        */
45813 +       uint32_t first_magic;
45814 +
45815 +       /* WHAT IS YOUR NAME?!? */
45816 +       char *service_name;
45817 +
45818 +       char *clusterID;
45819 +
45820 +       uint16_t core_port;
45821 +       xdr_socket core_fd;
45822 +       xdr_enc_t *core_enc;
45823 +       xdr_dec_t *core_dec;
45824 +       struct semaphore core_sender;
45825 +       struct semaphore core_recver;
45826 +       int in_core_hm;
45827 +
45828 +       uint16_t lock_port;
45829 +       xdr_socket lock_fd;
45830 +       xdr_enc_t *lock_enc;
45831 +       xdr_dec_t *lock_dec;
45832 +       struct semaphore lock_sender;
45833 +       struct semaphore lock_recver;
45834 +       int in_lock_hm;
45835 +       uint8_t lockspace[4];
45836 +
45837 +       /* in the message recver func, we read data into these buffers and pass
45838 +        * them to the callback function.  This way we avoid doinf mallocs and
45839 +        * frees on every callback.
45840 +        */
45841 +       uint16_t cfba_len;
45842 +       uint8_t *cfba;
45843 +       uint16_t cfbb_len;
45844 +       uint8_t *cfbb;
45845 +       uint16_t lfba_len;
45846 +       uint8_t *lfba;
45847 +       uint16_t lfbb_len;
45848 +       uint8_t *lfbb;
45849 +
45850 +       uint32_t last_magic;
45851 +};
45852 +typedef struct gulm_interface_s gulm_interface_t;
45853 +
45854 +#ifndef TRUE
45855 +#define TRUE (1)
45856 +#endif
45857 +
45858 +#ifndef FALSE
45859 +#define FALSE (0)
45860 +#endif
45861 +
45862 +#endif /*__lg_priv_h__*/
45863 diff -urN linux-orig/fs/gfs_locking/lock_gulm/libgulm.h linux-patched/fs/gfs_locking/lock_gulm/libgulm.h
45864 --- linux-orig/fs/gfs_locking/lock_gulm/libgulm.h       1969-12-31 18:00:00.000000000 -0600
45865 +++ linux-patched/fs/gfs_locking/lock_gulm/libgulm.h    2004-06-16 12:03:21.958894765 -0500
45866 @@ -0,0 +1,191 @@
45867 +/******************************************************************************
45868 +*******************************************************************************
45869 +**
45870 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
45871 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
45872 +**
45873 +**  This copyrighted material is made available to anyone wishing to use,
45874 +**  modify, copy, or redistribute it subject to the terms and conditions
45875 +**  of the GNU General Public License v.2.
45876 +**
45877 +*******************************************************************************
45878 +******************************************************************************/
45879 +
45880 +#ifndef __libgulm_h__
45881 +#define __libgulm_h__
45882 +
45883 +/* bit messy, but we need this to be rather seemless in both kernel and
45884 + * userspace. and this seems the easiest way to do it.
45885 + */
45886 +
45887 +#ifdef __linux__
45888 +#include <linux/in6.h>
45889 +typedef struct socket *lg_socket;
45890 +#endif /*__linux__*/
45891 +
45892 +typedef void *gulm_interface_p;
45893 +
45894 +/* mallocs the interface structure.
45895 + */
45896 +int lg_initialize (gulm_interface_p *, char *cluster_name, char *service_name);
45897 +/* frees struct.
45898 + */
45899 +void lg_release (gulm_interface_p);
45900 +
45901 +/* Determins where we are with a itemlist callback */
45902 +typedef enum { lglcb_start, lglcb_item, lglcb_stop } lglcb_t;
45903 +
45904 +/****** Core specifics ******/
45905 +
45906 +/* leaving a callback pointer as NULL, will cause that message type to
45907 + * be ignored. */
45908 +typedef struct lg_core_callbacks_s {
45909 +       int (*login_reply) (void *misc, uint64_t gen, uint32_t error,
45910 +                           uint32_t rank, uint8_t corestate);
45911 +       int (*logout_reply) (void *misc);
45912 +       int (*nodelist) (void *misc, lglcb_t type, char *name,
45913 +                        struct in6_addr * ip, uint8_t state);
45914 +       int (*statechange) (void *misc, uint8_t corestate,
45915 +                           struct in6_addr * masterip, char *mastername);
45916 +       int (*nodechange) (void *misc, char *nodename,
45917 +                          struct in6_addr * nodeip, uint8_t nodestate);
45918 +       int (*service_list) (void *misc, lglcb_t type, char *service);
45919 +       int (*status) (void *misc, lglcb_t type, char *key, char *value);
45920 +       int (*error) (void *misc, uint32_t err);
45921 +} lg_core_callbacks_t;
45922 +
45923 +/* this will trigger a callback from gulm_core_callbacks_t
45924 + * handles one message! Either stick this inside of a thread,
45925 + * or in a poll()/select() loop using the function below.
45926 + * This will block until there is a message sent from core.
45927 + */
45928 +int lg_core_handle_messages (gulm_interface_p, lg_core_callbacks_t *,
45929 +                            void *misc);
45930 +
45931 +/* this returns the filedescriptor that the library is using to
45932 + * communicate with the core. This is only for using in a poll()
45933 + * or select() call to avoid having the gulm_core_handle_messages()
45934 + * call block.
45935 + */
45936 +lg_socket lg_core_selector (gulm_interface_p);
45937 +
45938 +/* Queue requests. */
45939 +int lg_core_login (gulm_interface_p, int important);
45940 +int lg_core_logout (gulm_interface_p);
45941 +int lg_core_nodeinfo (gulm_interface_p, char *nodename);
45942 +int lg_core_nodelist (gulm_interface_p);
45943 +int lg_core_servicelist (gulm_interface_p);
45944 +int lg_core_corestate (gulm_interface_p);
45945 +
45946 +/* for completeness mostly. */
45947 +int lg_core_shutdown (gulm_interface_p);
45948 +int lg_core_forceexpire (gulm_interface_p, char *node_name);
45949 +int lg_core_forcepending (gulm_interface_p);
45950 +
45951 +int lg_core_status (gulm_interface_p);
45952 +
45953 +/* Node states
45954 + * First three are actual states, as well as changes.  Last is only a node
45955 + * change message.
45956 + * */
45957 +#define lg_core_Logged_in  (0x05)
45958 +#define lg_core_Logged_out (0x06)
45959 +#define lg_core_Expired    (0x07)
45960 +#define lg_core_Fenced     (0x08)
45961 +/* Core states */
45962 +#define lg_core_Slave       (0x01)
45963 +#define lg_core_Master      (0x02)
45964 +#define lg_core_Pending     (0x03)
45965 +#define lg_core_Arbitrating (0x04)
45966 +#define lg_core_Client      (0x06)
45967 +
45968 +/****** lock space specifics *****/
45969 +/* note that this library masks out the lock table seperation.
45970 + */
45971 +
45972 +typedef struct lg_lockspace_callbacks_s {
45973 +       int (*login_reply) (void *misc, uint32_t error, uint8_t which);
45974 +       int (*logout_reply) (void *misc);
45975 +       int (*lock_state) (void *misc, uint8_t * key, uint16_t keylen,
45976 +                          uint8_t state, uint32_t flags, uint32_t error,
45977 +                          uint8_t * LVB, uint16_t LVBlen);
45978 +       int (*lock_action) (void *misc, uint8_t * key, uint16_t keylen,
45979 +                           uint8_t action, uint32_t error);
45980 +       int (*cancel_reply) (void *misc, uint8_t * key, uint16_t keylen,
45981 +                            uint32_t error);
45982 +       int (*drop_lock_req) (void *misc, uint8_t * key, uint16_t keylen,
45983 +                             uint8_t state);
45984 +       int (*drop_all) (void *misc);
45985 +       int (*status) (void *misc, lglcb_t type, char *key, char *value);
45986 +       int (*error) (void *misc, uint32_t err);
45987 +} lg_lockspace_callbacks_t;
45988 +
45989 +/* Like the core handle messages function, but for the lockspace.
45990 + * Handles one message, blocks.
45991 + */
45992 +
45993 +int lg_lock_handle_messages (gulm_interface_p, lg_lockspace_callbacks_t *,
45994 +                            void *misc);
45995 +
45996 +/* this returns the filedescriptor that the library is using to
45997 + * communicate with the ltpx. This is only for using in a poll()
45998 + * or select() call to avoid having the gulm_lock_handle_messages()
45999 + * call block.
46000 + */
46001 +lg_socket lg_lock_selector (gulm_interface_p);
46002 +
46003 +/* Lockspace request calls */
46004 +int lg_lock_login (gulm_interface_p, uint8_t lockspace[4]);
46005 +int lg_lock_logout (gulm_interface_p);
46006 +int lg_lock_state_req (gulm_interface_p, uint8_t * key, uint16_t keylen,
46007 +                      uint8_t state, uint32_t flags, uint8_t * LVB,
46008 +                      uint16_t LVBlen);
46009 +int lg_lock_cancel_req (gulm_interface_p, uint8_t * key, uint16_t keylen);
46010 +int lg_lock_action_req (gulm_interface_p, uint8_t * key,
46011 +                       uint16_t keylen, uint8_t action,
46012 +                       uint8_t * LVB, uint16_t LVBlen);
46013 +int lg_lock_drop_exp (gulm_interface_p, uint8_t * holder,
46014 +                     uint8_t * keymask, uint16_t kmlen);
46015 +int lg_lock_status (gulm_interface_p);
46016 +
46017 +/* state requests */
46018 +#define lg_lock_state_Unlock    (0x00)
46019 +#define lg_lock_state_Exclusive (0x01)
46020 +#define lg_lock_state_Deferred  (0x02)
46021 +#define lg_lock_state_Shared    (0x03)
46022 +
46023 +/* actions */
46024 +#define lg_lock_act_HoldLVB     (0x0b)
46025 +#define lg_lock_act_UnHoldLVB   (0x0c)
46026 +#define lg_lock_act_SyncLVB     (0x0d)
46027 +
46028 +/* flags */
46029 +#define lg_lock_flag_DoCB        (0x00000001)
46030 +#define lg_lock_flag_Try         (0x00000002)
46031 +#define lg_lock_flag_Any         (0x00000004)
46032 +#define lg_lock_flag_IgnoreExp   (0x00000008)
46033 +#define lg_lock_flag_Cachable    (0x00000020)
46034 +#define lg_lock_flag_Piority     (0x00000040)
46035 +
46036 +/* These are the possible values that can be in the error fields. */
46037 +#define lg_err_Ok              (0)
46038 +#define lg_err_BadLogin        (1001)
46039 +#define lg_err_BadCluster      (1003)
46040 +#define lg_err_BadConfig       (1004)
46041 +#define lg_err_BadGeneration   (1005)
46042 +#define lg_err_BadWireProto    (1019)
46043 +
46044 +#define lg_err_NotAllowed      (1006)
46045 +#define lg_err_Unknown_Cs      (1007)
46046 +#define lg_err_BadStateChg     (1008)
46047 +#define lg_err_MemoryIssues    (1009)
46048 +
46049 +#define lg_err_TryFailed       (1011)
46050 +#define lg_err_AlreadyPend     (1013)
46051 +#define lg_err_Canceled        (1015)
46052 +
46053 +#define lg_err_NoSuchFS        (1016)
46054 +#define lg_err_NoSuchJID       (1017)
46055 +#define lg_err_NoSuchName      (1018)
46056 +
46057 +#endif /*__libgulm_h__*/
46058 diff -urN linux-orig/fs/gfs_locking/lock_gulm/linux_gulm_main.c linux-patched/fs/gfs_locking/lock_gulm/linux_gulm_main.c
46059 --- linux-orig/fs/gfs_locking/lock_gulm/linux_gulm_main.c       1969-12-31 18:00:00.000000000 -0600
46060 +++ linux-patched/fs/gfs_locking/lock_gulm/linux_gulm_main.c    2004-06-16 12:03:21.958894765 -0500
46061 @@ -0,0 +1,109 @@
46062 +/******************************************************************************
46063 +*******************************************************************************
46064 +**
46065 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46066 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46067 +**
46068 +**  This copyrighted material is made available to anyone wishing to use,
46069 +**  modify, copy, or redistribute it subject to the terms and conditions
46070 +**  of the GNU General Public License v.2.
46071 +**
46072 +*******************************************************************************
46073 +******************************************************************************/
46074 +
46075 +#define EXPORT_SYMTAB
46076 +#define WANT_DEBUG_NAMES
46077 +#define WANT_GMALLOC_NAMES
46078 +#define EXTERN
46079 +#include "gulm.h"
46080 +
46081 +#include <linux/init.h>
46082 +
46083 +#include "util.h"
46084 +#include "gulm_procinfo.h"
46085 +
46086 +MODULE_DESCRIPTION ("Grand Unified Locking Module " GULM_RELEASE_NAME);
46087 +MODULE_AUTHOR ("Red Hat, Inc.");
46088 +MODULE_LICENSE ("GPL");
46089 +
46090 +extern gulm_cm_t gulm_cm;
46091 +
46092 +/**
46093 + * init_gulm - Initialize the gulm module
46094 + *
46095 + * Returns: 0 on success, -EXXX on failure
46096 + */
46097 +int __init
46098 +init_gulm (void)
46099 +{
46100 +       int error;
46101 +
46102 +       memset (&gulm_cm, 0, sizeof (gulm_cm_t));
46103 +       gulm_cm.loaded = FALSE;
46104 +       gulm_cm.hookup = NULL;
46105 +
46106 +       /* register with the lm layers. */
46107 +       error = lm_register_proto (&gulm_ops);
46108 +       if (error)
46109 +               goto fail;
46110 +
46111 +       error = init_proc_dir ();
46112 +       if (error != 0) {
46113 +               goto fail_lm;
46114 +       }
46115 +
46116 +       init_gulm_fs ();
46117 +
46118 +       printk ("Gulm %s (built %s %s) installed\n",
46119 +               GULM_RELEASE_NAME, __DATE__, __TIME__);
46120 +
46121 +       return 0;
46122 +
46123 +      fail_lm:
46124 +       lm_unregister_proto (&gulm_ops);
46125 +
46126 +      fail:
46127 +       return error;
46128 +}
46129 +
46130 +/**
46131 + * exit_gulm - cleanup the gulm module
46132 + *
46133 + */
46134 +
46135 +void __exit
46136 +exit_gulm (void)
46137 +{
46138 +       remove_proc_dir ();
46139 +       lm_unregister_proto (&gulm_ops);
46140 +}
46141 +
46142 +module_init (init_gulm);
46143 +module_exit (exit_gulm);
46144 +
46145 +/* the libgulm.h interface. */
46146 +EXPORT_SYMBOL (lg_initialize);
46147 +EXPORT_SYMBOL (lg_release);
46148 +
46149 +EXPORT_SYMBOL (lg_core_handle_messages);
46150 +EXPORT_SYMBOL (lg_core_selector);
46151 +EXPORT_SYMBOL (lg_core_login);
46152 +EXPORT_SYMBOL (lg_core_logout);
46153 +EXPORT_SYMBOL (lg_core_nodeinfo);
46154 +EXPORT_SYMBOL (lg_core_nodelist);
46155 +EXPORT_SYMBOL (lg_core_servicelist);
46156 +EXPORT_SYMBOL (lg_core_corestate);
46157 +EXPORT_SYMBOL (lg_core_shutdown);
46158 +EXPORT_SYMBOL (lg_core_forceexpire);
46159 +EXPORT_SYMBOL (lg_core_forcepending);
46160 +EXPORT_SYMBOL (lg_core_status);
46161 +
46162 +EXPORT_SYMBOL (lg_lock_handle_messages);
46163 +EXPORT_SYMBOL (lg_lock_selector);
46164 +EXPORT_SYMBOL (lg_lock_login);
46165 +EXPORT_SYMBOL (lg_lock_logout);
46166 +EXPORT_SYMBOL (lg_lock_state_req);
46167 +EXPORT_SYMBOL (lg_lock_cancel_req);
46168 +EXPORT_SYMBOL (lg_lock_action_req);
46169 +EXPORT_SYMBOL (lg_lock_drop_exp);
46170 +EXPORT_SYMBOL (lg_lock_status);
46171 diff -urN linux-orig/fs/gfs_locking/lock_gulm/load_info.c linux-patched/fs/gfs_locking/lock_gulm/load_info.c
46172 --- linux-orig/fs/gfs_locking/lock_gulm/load_info.c     1969-12-31 18:00:00.000000000 -0600
46173 +++ linux-patched/fs/gfs_locking/lock_gulm/load_info.c  2004-06-16 12:03:21.958894765 -0500
46174 @@ -0,0 +1,96 @@
46175 +/******************************************************************************
46176 +*******************************************************************************
46177 +**
46178 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46179 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46180 +**
46181 +**  This copyrighted material is made available to anyone wishing to use,
46182 +**  modify, copy, or redistribute it subject to the terms and conditions
46183 +**  of the GNU General Public License v.2.
46184 +**
46185 +*******************************************************************************
46186 +******************************************************************************/
46187 +
46188 +#include "gulm.h"
46189 +
46190 +#include <linux/kernel.h>
46191 +#include <linux/sched.h>
46192 +#define __KERNEL_SYSCALLS__
46193 +#include <linux/unistd.h>
46194 +
46195 +#include <linux/utsname.h>     /* for extern system_utsname */
46196 +
46197 +#include "util.h"
46198 +#include "utils_verb_flags.h"
46199 +
46200 +gulm_cm_t gulm_cm;
46201 +
46202 +/**
46203 + * init_ltpx -
46204 + */
46205 +int
46206 +init_ltpx (void)
46207 +{
46208 +       int j;
46209 +       lock_table_t *lt = &gulm_cm.ltpx;
46210 +
46211 +       INIT_LIST_HEAD (&lt->to_be_sent);
46212 +       spin_lock_init (&lt->queue_sender);
46213 +       init_waitqueue_head (&lt->send_wchan);
46214 +       lt->magic_one = 0xAAAAAAAA;
46215 +       init_MUTEX (&lt->sender);
46216 +       init_completion (&lt->startup);
46217 +       atomic_set (&lt->locks_pending, 0);
46218 +       lt->hashbuckets = 8191;
46219 +       lt->hshlk = kmalloc (sizeof (spinlock_t) * lt->hashbuckets, GFP_KERNEL);
46220 +       if (lt->hshlk == NULL)
46221 +               return -ENOMEM;
46222 +       lt->lkhsh =
46223 +           kmalloc (sizeof (struct list_head) * lt->hashbuckets, GFP_KERNEL);
46224 +       if (lt->lkhsh == NULL) {
46225 +               kfree (lt->hshlk);
46226 +               return -ENOMEM;
46227 +       }
46228 +       for (j = 0; j < lt->hashbuckets; j++) {
46229 +               spin_lock_init (&lt->hshlk[j]);
46230 +               INIT_LIST_HEAD (&lt->lkhsh[j]);
46231 +       }
46232 +       return 0;
46233 +}
46234 +
46235 +/**
46236 + * load_info -
46237 + * @hostdata: < optionally override the name of this node.
46238 + *
46239 + * Returns: int
46240 + */
46241 +int
46242 +load_info (char *hostdata)
46243 +{
46244 +       int err = 0;
46245 +
46246 +       if (gulm_cm.loaded)
46247 +               goto exit;
46248 +
46249 +       gulm_cm.verbosity = 0;
46250 +       if (hostdata != NULL && strlen (hostdata) > 0) {
46251 +               strncpy (gulm_cm.myName, hostdata, 64);
46252 +       } else {
46253 +               strncpy (gulm_cm.myName, system_utsname.nodename, 64);
46254 +       }
46255 +       gulm_cm.myName[63] = '\0';
46256 +
46257 +       /* breaking away from ccs. just hardcoding defaults here.
46258 +        * Noone really used these anyways and if ppl want them badly, we'll
46259 +        * find another way to set them. (modprobe options for example.)
46260 +        * */
46261 +       gulm_cm.handler_threads = 2;
46262 +       set_verbosity ("Default", &gulm_cm.verbosity);
46263 +
46264 +       init_ltpx ();
46265 +
46266 +       gulm_cm.loaded = TRUE;
46267 +      exit:
46268 +       return err;
46269 +}
46270 +/* vim: set ai cin noet sw=8 ts=8 : */
46271 diff -urN linux-orig/fs/gfs_locking/lock_gulm/load_info.h linux-patched/fs/gfs_locking/lock_gulm/load_info.h
46272 --- linux-orig/fs/gfs_locking/lock_gulm/load_info.h     1969-12-31 18:00:00.000000000 -0600
46273 +++ linux-patched/fs/gfs_locking/lock_gulm/load_info.h  2004-06-16 12:03:21.958894765 -0500
46274 @@ -0,0 +1,17 @@
46275 +/******************************************************************************
46276 +*******************************************************************************
46277 +**
46278 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46279 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46280 +**
46281 +**  This copyrighted material is made available to anyone wishing to use,
46282 +**  modify, copy, or redistribute it subject to the terms and conditions
46283 +**  of the GNU General Public License v.2.
46284 +**
46285 +*******************************************************************************
46286 +******************************************************************************/
46287 +
46288 +#ifndef __load_info_h__
46289 +#define __load_info_h__
46290 +int load_info (char *);
46291 +#endif /*__load_info_h__*/
46292 diff -urN linux-orig/fs/gfs_locking/lock_gulm/util.c linux-patched/fs/gfs_locking/lock_gulm/util.c
46293 --- linux-orig/fs/gfs_locking/lock_gulm/util.c  1969-12-31 18:00:00.000000000 -0600
46294 +++ linux-patched/fs/gfs_locking/lock_gulm/util.c       2004-06-16 12:03:21.958894765 -0500
46295 @@ -0,0 +1,109 @@
46296 +/******************************************************************************
46297 +*******************************************************************************
46298 +**
46299 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46300 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46301 +**
46302 +**  This copyrighted material is made available to anyone wishing to use,
46303 +**  modify, copy, or redistribute it subject to the terms and conditions
46304 +**  of the GNU General Public License v.2.
46305 +**
46306 +*******************************************************************************
46307 +******************************************************************************/
46308 +
46309 +#include <linux/kernel.h>
46310 +#include <linux/types.h>
46311 +#include <linux/string.h>
46312 +#include "utils_crc.h"
46313 +
46314 +/**
46315 + * atoi
46316 + *
46317 + * @c:
46318 + *
46319 + */
46320 +
46321 +int
46322 +atoi (char *c)
46323 +{
46324 +       int x = 0;
46325 +
46326 +       while ('0' <= *c && *c <= '9') {
46327 +               x = x * 10 + (*c - '0');
46328 +               c++;
46329 +       }
46330 +
46331 +       return (x);
46332 +}
46333 +
46334 +/**
46335 + * inet_aton
46336 + *
46337 + * @ascii:
46338 + * @ip:
46339 + *
46340 + */
46341 +
46342 +int
46343 +inet_aton (char *ascii, uint32_t * ip)
46344 +{
46345 +       uint32_t value;
46346 +       int x;
46347 +
46348 +       *ip = 0;
46349 +
46350 +       for (x = 0; x < 4; x++) {
46351 +               value = atoi (ascii);
46352 +               if (value > 255)
46353 +                       return (-1);
46354 +
46355 +               *ip = (*ip << 8) | value;
46356 +
46357 +               if (x != 3) {
46358 +                       for (; *ascii != '.' && *ascii != '\0'; ascii++) {
46359 +                               if (*ascii < '0' || *ascii > '9') {
46360 +                                       /* not a number. stop */
46361 +                                       return -1;
46362 +                               }
46363 +                       }
46364 +                       if (*ascii == '\0')
46365 +                               return (-1);
46366 +
46367 +                       ascii++;
46368 +               }
46369 +       }
46370 +
46371 +       return (0);
46372 +}
46373 +
46374 +/**
46375 + * inet_ntoa
46376 + *
46377 + * @ascii:
46378 + * @ip:
46379 + *
46380 + */
46381 +void
46382 +inet_ntoa (uint32_t ip, char *buf)
46383 +{
46384 +       int i;
46385 +       char *p;
46386 +
46387 +       p = buf;
46388 +
46389 +       for (i = 3; i >= 0; i--) {
46390 +               p += sprintf (p, "%d", (ip >> (8 * i)) & 0xFF);
46391 +               if (i > 0)
46392 +                       *(p++) = '.';
46393 +       }
46394 +
46395 +}
46396 +
46397 +/* public functions */
46398 +#define hash_init_val 0x6d696b65
46399 +
46400 +uint32_t __inline__
46401 +hash_lock_key (uint8_t * in, uint8_t len)
46402 +{                              /* other hash function was to variable */
46403 +       return crc32 (in, len, hash_init_val);
46404 +}
46405 diff -urN linux-orig/fs/gfs_locking/lock_gulm/util.h linux-patched/fs/gfs_locking/lock_gulm/util.h
46406 --- linux-orig/fs/gfs_locking/lock_gulm/util.h  1969-12-31 18:00:00.000000000 -0600
46407 +++ linux-patched/fs/gfs_locking/lock_gulm/util.h       2004-06-16 12:03:21.959894533 -0500
46408 @@ -0,0 +1,29 @@
46409 +/******************************************************************************
46410 +*******************************************************************************
46411 +**
46412 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46413 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46414 +**
46415 +**  This copyrighted material is made available to anyone wishing to use,
46416 +**  modify, copy, or redistribute it subject to the terms and conditions
46417 +**  of the GNU General Public License v.2.
46418 +**
46419 +*******************************************************************************
46420 +******************************************************************************/
46421 +
46422 +#ifndef __UTIL_DOT_H__
46423 +#define __UTIL_DOT_H__
46424 +
46425 +int atoi (char *c);
46426 +int inet_aton (char *ascii, uint32_t * ip);
46427 +void inet_ntoa (uint32_t ip, char *buf);
46428 +void dump_buffer (void *buf, int len);
46429 +
46430 +uint32_t __inline__ hash_lock_key (uint8_t * in, uint8_t len);
46431 +uint8_t __inline__ fourtoone (uint32_t);
46432 +
46433 +__inline__ int testbit (uint16_t bit, uint8_t * set);
46434 +__inline__ void setbit (uint16_t bit, uint8_t * set);
46435 +__inline__ void clearbit (uint16_t bit, uint8_t * set);
46436 +
46437 +#endif                         /*  __UTIL_DOT_H__  */
46438 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_crc.c linux-patched/fs/gfs_locking/lock_gulm/utils_crc.c
46439 --- linux-orig/fs/gfs_locking/lock_gulm/utils_crc.c     1969-12-31 18:00:00.000000000 -0600
46440 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_crc.c  2004-06-16 12:03:21.959894533 -0500
46441 @@ -0,0 +1,92 @@
46442 +/******************************************************************************
46443 +*******************************************************************************
46444 +**
46445 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46446 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46447 +**
46448 +**  This copyrighted material is made available to anyone wishing to use,
46449 +**  modify, copy, or redistribute it subject to the terms and conditions
46450 +**  of the GNU General Public License v.2.
46451 +**
46452 +*******************************************************************************
46453 +******************************************************************************/
46454 +
46455 +#include <linux/types.h>
46456 +
46457 +static const uint32_t crc_32_tab[] = {
46458 +       0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
46459 +       0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
46460 +       0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
46461 +       0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
46462 +       0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
46463 +       0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
46464 +       0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
46465 +       0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
46466 +       0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
46467 +       0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
46468 +       0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
46469 +       0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
46470 +       0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
46471 +       0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
46472 +       0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
46473 +       0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
46474 +       0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
46475 +       0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
46476 +       0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
46477 +       0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
46478 +       0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
46479 +       0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
46480 +       0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
46481 +       0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
46482 +       0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
46483 +       0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
46484 +       0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
46485 +       0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
46486 +       0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
46487 +       0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
46488 +       0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
46489 +       0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
46490 +       0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
46491 +       0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
46492 +       0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
46493 +       0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
46494 +       0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
46495 +       0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
46496 +       0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
46497 +       0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
46498 +       0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
46499 +       0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
46500 +       0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
46501 +};
46502 +
46503 +/**
46504 + * crc32 - hash an array of data
46505 + * @data: the data to be hashed
46506 + * @len: the length of data to be hashed
46507 + *
46508 + * completely copied from GFS/src/fs.c
46509 + *
46510 + * Take some data and convert it to a 32-bit hash.
46511 + *
46512 + * The hash function is a 32-bit CRC of the data.  The algorithm uses
46513 + * the crc_32_tab table above.
46514 + *
46515 + * This may not be the fastest hash function, but it does a fair bit better
46516 + * at providing uniform results than the others I've looked at.  That's
46517 + * really important for efficient directories.
46518 + *
46519 + * Returns: the hash
46520 + */
46521 +
46522 +uint32_t
46523 +crc32 (const char *data, int len, uint32_t init)
46524 +{
46525 +       uint32_t hash = init;
46526 +
46527 +       for (; len--; data++)
46528 +               hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
46529 +
46530 +       hash = ~hash;
46531 +
46532 +       return hash;
46533 +}
46534 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_crc.h linux-patched/fs/gfs_locking/lock_gulm/utils_crc.h
46535 --- linux-orig/fs/gfs_locking/lock_gulm/utils_crc.h     1969-12-31 18:00:00.000000000 -0600
46536 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_crc.h  2004-06-16 12:03:21.959894533 -0500
46537 @@ -0,0 +1,17 @@
46538 +/******************************************************************************
46539 +*******************************************************************************
46540 +**
46541 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46542 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46543 +**
46544 +**  This copyrighted material is made available to anyone wishing to use,
46545 +**  modify, copy, or redistribute it subject to the terms and conditions
46546 +**  of the GNU General Public License v.2.
46547 +**
46548 +*******************************************************************************
46549 +******************************************************************************/
46550 +
46551 +#ifndef __utils_crc_h__
46552 +#define __utils_crc_h__
46553 +uint32_t crc32 (const char *data, int len, uint32_t init);
46554 +#endif /*__utils_crc_h__*/
46555 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c
46556 --- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c   1969-12-31 18:00:00.000000000 -0600
46557 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c        2004-06-16 12:03:21.959894533 -0500
46558 @@ -0,0 +1,207 @@
46559 +/******************************************************************************
46560 +*******************************************************************************
46561 +**
46562 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46563 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46564 +**
46565 +**  This copyrighted material is made available to anyone wishing to use,
46566 +**  modify, copy, or redistribute it subject to the terms and conditions
46567 +**  of the GNU General Public License v.2.
46568 +**
46569 +*******************************************************************************
46570 +******************************************************************************/
46571 +
46572 +#include "gio_wiretypes.h"
46573 +
46574 +char *
46575 +gio_Err_to_str (int x)
46576 +{
46577 +       char *t = "Unknown GULM Err";
46578 +       switch (x) {
46579 +       case gio_Err_Ok:
46580 +               t = "Ok";
46581 +               break;
46582 +
46583 +       case gio_Err_BadLogin:
46584 +               t = "Bad Login";
46585 +               break;
46586 +       case gio_Err_BadCluster:
46587 +               t = "Bad Cluster ID";
46588 +               break;
46589 +       case gio_Err_BadConfig:
46590 +               t = "Incompatible configurations";
46591 +               break;
46592 +       case gio_Err_BadGeneration:
46593 +               t = "Bad Generation ID";
46594 +               break;
46595 +       case gio_Err_BadWireProto:
46596 +               t = "Bad Wire Protocol Version";
46597 +               break;
46598 +
46599 +       case gio_Err_NotAllowed:
46600 +               t = "Not Allowed";
46601 +               break;
46602 +       case gio_Err_Unknown_Cs:
46603 +               t = "Uknown Client";
46604 +               break;
46605 +       case gio_Err_BadStateChg:
46606 +               t = "Bad State Change";
46607 +               break;
46608 +       case gio_Err_MemoryIssues:
46609 +               t = "Memory Problems";
46610 +               break;
46611 +
46612 +       case gio_Err_PushQu:
46613 +               t = "Push Queue";
46614 +               break;
46615 +       case gio_Err_TryFailed:
46616 +               t = "Try Failed";
46617 +               break;
46618 +       case gio_Err_AlreadyPend:
46619 +               t = "Request Already Pending";
46620 +               break;
46621 +       case gio_Err_Canceled:
46622 +               t = "Request Canceled";
46623 +               break;
46624 +
46625 +       case gio_Err_NoSuchFS:
46626 +               t = "No Such Filesystem";
46627 +               break;
46628 +       case gio_Err_NoSuchJID:
46629 +               t = "No Such JID";
46630 +               break;
46631 +       case gio_Err_NoSuchName:
46632 +               t = "No Such Node";
46633 +               break;
46634 +       }
46635 +       return t;
46636 +}
46637 +
46638 +char *
46639 +gio_mbrupdate_to_str (int x)
46640 +{
46641 +       char *t = "Unknown Membership Update";
46642 +       switch (x) {
46643 +       case gio_Mbr_Logged_in:
46644 +               t = "Logged in";
46645 +               break;
46646 +       case gio_Mbr_Logged_out:
46647 +               t = "Logged out";
46648 +               break;
46649 +       case gio_Mbr_Expired:
46650 +               t = "Expired";
46651 +               break;
46652 +       case gio_Mbr_Killed:
46653 +               t = "Fenced";
46654 +               break;
46655 +       case gio_Mbr_OM_lgin:
46656 +               t = "Was Logged in";
46657 +               break;
46658 +       }
46659 +       return t;
46660 +}
46661 +
46662 +char *
46663 +gio_I_am_to_str (int x)
46664 +{
46665 +       switch (x) {
46666 +       case gio_Mbr_ama_Slave:
46667 +               return "Slave";
46668 +               break;
46669 +       case gio_Mbr_ama_Pending:
46670 +               return "Pending";
46671 +               break;
46672 +       case gio_Mbr_ama_Arbitrating:
46673 +               return "Arbitrating";
46674 +               break;
46675 +       case gio_Mbr_ama_Master:
46676 +               return "Master";
46677 +               break;
46678 +       case gio_Mbr_ama_Resource:
46679 +               return "Service";
46680 +               break;
46681 +       case gio_Mbr_ama_Client:
46682 +               return "Client";
46683 +               break;
46684 +       default:
46685 +               return "Unknown I_am state";
46686 +               break;
46687 +       }
46688 +}
46689 +
46690 +char *
46691 +gio_license_states (int x)
46692 +{
46693 +       switch (x) {
46694 +       case 0:
46695 +               return "valid";
46696 +               break;
46697 +       case 1:
46698 +               return "expired";
46699 +               break;
46700 +       case 2:
46701 +               return "invalid";
46702 +               break;
46703 +       default:
46704 +               return "unknown";
46705 +               break;
46706 +       }
46707 +}
46708 +
46709 +char *
46710 +gio_opcodes (int x)
46711 +{
46712 +       switch (x) {
46713 +#define CP(x) case (x): return #x ; break
46714 +               CP (gulm_err_reply);
46715 +
46716 +               CP (gulm_core_login_req);
46717 +               CP (gulm_core_login_rpl);
46718 +               CP (gulm_core_logout_req);
46719 +               CP (gulm_core_logout_rpl);
46720 +               CP (gulm_core_reslgn_req);
46721 +               CP (gulm_core_beat_req);
46722 +               CP (gulm_core_beat_rpl);
46723 +               CP (gulm_core_mbr_req);
46724 +               CP (gulm_core_mbr_updt);
46725 +               CP (gulm_core_mbr_lstreq);
46726 +               CP (gulm_core_mbr_lstrpl);
46727 +               CP (gulm_core_mbr_force);
46728 +               CP (gulm_core_res_req);
46729 +               CP (gulm_core_res_list);
46730 +               CP (gulm_core_state_req);
46731 +               CP (gulm_core_state_chgs);
46732 +               CP (gulm_core_shutdown);
46733 +               CP (gulm_core_forcepend);
46734 +
46735 +               CP (gulm_info_stats_req);
46736 +               CP (gulm_info_stats_rpl);
46737 +               CP (gulm_info_set_verbosity);
46738 +               CP (gulm_socket_close);
46739 +               CP (gulm_info_slave_list_req);
46740 +               CP (gulm_info_slave_list_rpl);
46741 +
46742 +               CP (gulm_lock_login_req);
46743 +               CP (gulm_lock_login_rpl);
46744 +               CP (gulm_lock_logout_req);
46745 +               CP (gulm_lock_logout_rpl);
46746 +               CP (gulm_lock_state_req);
46747 +               CP (gulm_lock_state_rpl);
46748 +               CP (gulm_lock_state_updt);
46749 +               CP (gulm_lock_action_req);
46750 +               CP (gulm_lock_action_rpl);
46751 +               CP (gulm_lock_action_updt);
46752 +               CP (gulm_lock_update_rpl);
46753 +               CP (gulm_lock_cb_state);
46754 +               CP (gulm_lock_cb_dropall);
46755 +               CP (gulm_lock_drop_exp);
46756 +               CP (gulm_lock_dump_req);
46757 +               CP (gulm_lock_dump_rpl);
46758 +               CP (gulm_lock_rerunqueues);
46759 +
46760 +#undef CP
46761 +       default:
46762 +               return "Unknown Op Code";
46763 +               break;
46764 +       }
46765 +}
46766 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h
46767 --- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h   1969-12-31 18:00:00.000000000 -0600
46768 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h        2004-06-16 12:03:21.959894533 -0500
46769 @@ -0,0 +1,22 @@
46770 +/******************************************************************************
46771 +*******************************************************************************
46772 +**
46773 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46774 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46775 +**
46776 +**  This copyrighted material is made available to anyone wishing to use,
46777 +**  modify, copy, or redistribute it subject to the terms and conditions
46778 +**  of the GNU General Public License v.2.
46779 +**
46780 +*******************************************************************************
46781 +******************************************************************************/
46782 +
46783 +#ifndef __utils_tostr_h__
46784 +#define __utils_tostr_h__
46785 +char *gio_Err_to_str (int x);
46786 +char *gio_mbrupdate_to_str (int x);
46787 +char *gio_mbrama_to_str (int x);
46788 +char *gio_I_am_to_str (int x);
46789 +char *gio_license_states (int x);
46790 +char *gio_opcodes (int x);
46791 +#endif /*__utils_tostr_h__*/
46792 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.c linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.c
46793 --- linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.c      1969-12-31 18:00:00.000000000 -0600
46794 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.c   2004-06-16 12:03:21.959894533 -0500
46795 @@ -0,0 +1,271 @@
46796 +/******************************************************************************
46797 +*******************************************************************************
46798 +**
46799 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
46800 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
46801 +**
46802 +**  This copyrighted material is made available to anyone wishing to use,
46803 +**  modify, copy, or redistribute it subject to the terms and conditions
46804 +**  of the GNU General Public License v.2.
46805 +**
46806 +*******************************************************************************
46807 +******************************************************************************/
46808 +
46809 +#ifdef __linux__
46810 +#include <linux/kernel.h>
46811 +#include <linux/sched.h>
46812 +#define __KERNEL_SYSCALLS__
46813 +#include <linux/unistd.h>
46814 +#endif /*__linux__*/
46815 +
46816 +#include "gulm_log_msg_bits.h"
46817 +
46818 +static __inline__ int
46819 +strncasecmp (const char *s1, const char *s2, size_t l)
46820 +{
46821 +       char c1 = '\0', c2 = '\0';
46822 +
46823 +       while (*s1 && *s2 && l-- > 0) {
46824 +               c1 = *s1++;
46825 +               c2 = *s2++;
46826 +
46827 +               if (c1 >= 'A' && c1 <= 'Z')
46828 +                       c1 += 'a' - 'A';
46829 +
46830 +               if (c2 >= 'A' && c2 <= 'Z')
46831 +                       c2 += 'a' - 'A';
46832 +
46833 +               if (c1 != c2)
46834 +                       break;
46835 +       }
46836 +       return (c1 - c2);
46837 +}
46838 +
46839 +static int bit_array[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
46840 +
46841 +#define BITCOUNT(x) (bit_array[x & 0x000F] + \
46842 +                     bit_array[(x >> 4) & 0x000F] + \
46843 +                     bit_array[(x >> 8) & 0x000F] + \
46844 +                     bit_array[(x >> 12) & 0x000F] + \
46845 +                     bit_array[(x >> 16) & 0x000F] + \
46846 +                     bit_array[(x >> 20) & 0x000F] + \
46847 +                     bit_array[(x >> 24) & 0x000F] + \
46848 +                     bit_array[(x >> 28) & 0x000F])
46849 +
46850 +struct {
46851 +       char *name;
46852 +       uint32_t val;
46853 +} verbose_flags[] = {
46854 +       {
46855 +       "Network", lgm_Network,}, {
46856 +       "Network2", lgm_Network2,}, {
46857 +       "Network3", lgm_Network3,}, {
46858 +       "Fencing", lgm_Stomith,}, {
46859 +       "Heartbeat", lgm_Heartbeat,}, {
46860 +       "Locking", lgm_locking,}, {
46861 +       "Forking", lgm_Forking,}, {
46862 +       "JIDMap", lgm_JIDMap,}, {
46863 +       "JIDUpdates", lgm_JIDUpdates,}, {
46864 +       "Subscribers", lgm_Subscribers,}, {
46865 +       "LockUpdates", lgm_LockUpdates,}, {
46866 +       "LoginLoops", lgm_LoginLoops,}, {
46867 +       "ServerState", lgm_ServerState,}, {
46868 +       "Default", lgm_Network | lgm_Stomith | lgm_Forking,},
46869 +/* Since I really don't want people really doing *all* flags with all,
46870 + * there is AlmostAll, which users really get, and ReallyAll, which is all
46871 + * bits on.
46872 + * This is mostly due to Network3, which dumps messages on nearly
46873 + * every packet. (should actually be every packet.)
46874 + * Also drop the slave updates, since that is on every packet as well.
46875 + */
46876 +       {
46877 +       "All",
46878 +                   (lgm_ReallyAll &
46879 +                            ~(lgm_Network3 | lgm_JIDUpdates |
46880 +                                      lgm_LockUpdates)),}, {
46881 +       "AlmostAll",
46882 +                   lgm_ReallyAll & ~(lgm_Network3 | lgm_JIDUpdates |
46883 +                                             lgm_LockUpdates),}, {
46884 +       "ReallyAll", lgm_ReallyAll,}
46885 +};
46886 +
46887 +static int
46888 +add_string (char *name, size_t * cur, char *str, size_t slen)
46889 +{
46890 +       size_t nl;
46891 +
46892 +       nl = strlen (name);
46893 +       if (*cur + nl > slen) {
46894 +               memcpy (str + *cur, "...", 3);
46895 +               cur += 3;
46896 +               str[*cur] = '\0';
46897 +               return -1;
46898 +       }
46899 +       memcpy (str + *cur, name, nl);
46900 +       *cur += nl;
46901 +       str[*cur] = ',';
46902 +       *cur += 1;
46903 +
46904 +       return 0;
46905 +}
46906 +
46907 +/**
46908 + * get_verbosity_string -
46909 + * @str:
46910 + * @verb:
46911 + *
46912 + *
46913 + * Returns: int
46914 + */
46915 +int
46916 +get_verbosity_string (char *str, size_t slen, uint32_t verb)
46917 +{
46918 +       int i, vlen = sizeof (verbose_flags) / sizeof (verbose_flags[0]);
46919 +       size_t cur = 0;
46920 +       int combo_match = -1, error = 0;
46921 +
46922 +       memset (str, 0, slen);
46923 +       slen -= 4;              /* leave room for dots and null */
46924 +
46925 +       if (verb == 0) {
46926 +               error = add_string ("Quiet", &cur, str, slen);
46927 +               goto end;
46928 +       }
46929 +
46930 +       /* Combo verb flag phase */
46931 +       for (i = 0; i < vlen; i++) {
46932 +               if (BITCOUNT (verbose_flags[i].val) > 1) {
46933 +                       /* check to see if this flag matches exclusively */
46934 +                       if ((verbose_flags[i].val ^ verb) == 0) {
46935 +                               error =
46936 +                                   add_string (verbose_flags[i].name, &cur,
46937 +                                               str, slen);
46938 +                               goto end;
46939 +                       }
46940 +
46941 +                       if ((verbose_flags[i].val & verb) ==
46942 +                           verbose_flags[i].val) {
46943 +                               if (combo_match < 0) {
46944 +                                       combo_match = i;
46945 +                               } else {
46946 +                                       /* Compare this combo with the one in combo_match */
46947 +                                       if (BITCOUNT (verbose_flags[i].val) >
46948 +                                           BITCOUNT (verbose_flags
46949 +                                                     [combo_match].val)) {
46950 +                                               combo_match = i;
46951 +                                       }
46952 +                               }
46953 +
46954 +                       }
46955 +               }
46956 +       }
46957 +       /* Add the best combo to the string */
46958 +       if (combo_match > -1) {
46959 +               if (add_string
46960 +                   (verbose_flags[combo_match].name, &cur, str, slen) == -1) {
46961 +                       error = -1;
46962 +                       goto end;
46963 +               }
46964 +       }
46965 +
46966 +       /* Single verb flag phase */
46967 +       for (i = 0; i < vlen; i++) {
46968 +               if (BITCOUNT (verbose_flags[i].val) == 1) {
46969 +                       if (combo_match > -1) {
46970 +                               if ((verbose_flags[combo_match].
46971 +                                    val & verbose_flags[i].val) ==
46972 +                                   verbose_flags[i].val) {
46973 +                                       continue;
46974 +                               }
46975 +                       }
46976 +
46977 +                       if ((verbose_flags[i].val & verb) ==
46978 +                           verbose_flags[i].val) {
46979 +                               if (add_string
46980 +                                   (verbose_flags[i].name, &cur, str,
46981 +                                    slen) == -1) {
46982 +                                       error = -1;
46983 +                                       goto end;
46984 +                               }
46985 +                       }
46986 +               }
46987 +       }
46988 +      end:
46989 +       /* Clear trailing ',' */
46990 +       if (str[cur - 1] == ',') {
46991 +               str[cur - 1] = '\0';
46992 +       }
46993 +       return error;
46994 +}
46995 +
46996 +/**
46997 + * set_verbosity -
46998 + * @str:
46999 + * @verb:
47000 + *
47001 + * toggle bits according to the `rules' in the str.
47002 + * str is a list of verb flags. can be prefexed with '+' or '-'
47003 + * No prefix is the same as '+' prefix
47004 + * '+' sets bits
47005 + * '-' unsets bits.
47006 + * special 'clear' unsets all.
47007 + */
47008 +void
47009 +set_verbosity (char *str, uint32_t * verb)
47010 +{
47011 +       char *token, *next;
47012 +       int i, wl, tl, len = sizeof (verbose_flags) / sizeof (verbose_flags[0]);
47013 +
47014 +       if (str == NULL)
47015 +               return;
47016 +
47017 +       wl = strlen (str);
47018 +       if (wl == 0)
47019 +               return;
47020 +       for (token = str, tl = 0; tl < wl &&
47021 +            token[tl] != ',' &&
47022 +            token[tl] != ' ' && token[tl] != '|' && token[tl] != '\0'; tl++) ;
47023 +       next = token + tl + 1;
47024 +
47025 +       for (;;) {
47026 +               if (token[0] == '-') {
47027 +                       token++;
47028 +                       for (i = 0; i < len; i++) {
47029 +                               if (strncasecmp
47030 +                                   (token, verbose_flags[i].name, tl) == 0) {
47031 +                                       (*verb) &= ~(verbose_flags[i].val);
47032 +                               }
47033 +                       }
47034 +               } else if (token[0] == '+') {
47035 +                       token++;
47036 +                       for (i = 0; i < len; i++) {
47037 +                               if (strncasecmp
47038 +                                   (token, verbose_flags[i].name, tl) == 0) {
47039 +                                       (*verb) |= verbose_flags[i].val;
47040 +                               }
47041 +                       }
47042 +               } else {
47043 +                       if (strncasecmp (token, "clear", tl) == 0) {
47044 +                               (*verb) = 0;
47045 +                       } else {
47046 +                               for (i = 0; i < len; i++) {
47047 +                                       if (strncasecmp
47048 +                                           (token, verbose_flags[i].name,
47049 +                                            tl) == 0) {
47050 +                                               (*verb) |= verbose_flags[i].val;
47051 +                                       }
47052 +                               }
47053 +                       }
47054 +               }
47055 +
47056 +               if (next >= str + wl)
47057 +                       return;
47058 +               for (token = next, tl = 0;
47059 +                    tl < wl &&
47060 +                    token[tl] != ',' &&
47061 +                    token[tl] != ' ' &&
47062 +                    token[tl] != '|' && token[tl] != '\0'; tl++) ;
47063 +               next = token + tl + 1;
47064 +
47065 +       }
47066 +}
47067 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.h linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.h
47068 --- linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.h      1969-12-31 18:00:00.000000000 -0600
47069 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.h   2004-06-16 12:03:21.959894533 -0500
47070 @@ -0,0 +1,18 @@
47071 +/******************************************************************************
47072 +*******************************************************************************
47073 +**
47074 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
47075 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
47076 +**
47077 +**  This copyrighted material is made available to anyone wishing to use,
47078 +**  modify, copy, or redistribute it subject to the terms and conditions
47079 +**  of the GNU General Public License v.2.
47080 +**
47081 +*******************************************************************************
47082 +******************************************************************************/
47083 +
47084 +#ifndef __utils_verb_flags_h__
47085 +#define __utils_verb_flags_h__
47086 +int get_verbosity_string (char *str, size_t slen, uint32_t verb);
47087 +void set_verbosity (char *str, uint32_t * verb);
47088 +#endif /*__utils_verb_flags_h__*/
47089 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr.h linux-patched/fs/gfs_locking/lock_gulm/xdr.h
47090 --- linux-orig/fs/gfs_locking/lock_gulm/xdr.h   1969-12-31 18:00:00.000000000 -0600
47091 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr.h        2004-06-16 12:03:21.959894533 -0500
47092 @@ -0,0 +1,98 @@
47093 +/******************************************************************************
47094 +*******************************************************************************
47095 +**
47096 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
47097 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
47098 +**
47099 +**  This copyrighted material is made available to anyone wishing to use,
47100 +**  modify, copy, or redistribute it subject to the terms and conditions
47101 +**  of the GNU General Public License v.2.
47102 +**
47103 +*******************************************************************************
47104 +******************************************************************************/
47105 +
47106 +#ifndef __gulm_xdr_h__
47107 +#define __gulm_xdr_h__
47108 +typedef struct xdr_enc_s xdr_enc_t;
47109 +typedef struct xdr_dec_s xdr_dec_t;
47110 +
47111 +/* sockets in kernel space are done a bit different than socket in
47112 + * userspace.  But we need to have them appear to be the same.
47113 + */
47114 +#ifdef __KERNEL__
47115 +
47116 +#ifdef __linux__
47117 +#include <linux/net.h>
47118 +#include <linux/in.h>
47119 +#include <linux/in6.h>
47120 +#include <linux/socket.h>
47121 +#include <net/sock.h>
47122 +
47123 +typedef struct socket *xdr_socket;
47124 +#endif /*__linux__*/
47125 +#else /*__KERNEL__*/
47126 +#include <sys/types.h>
47127 +#include <sys/uio.h>
47128 +#include <sys/socket.h>
47129 +#include <netinet/in.h>
47130 +#include <netinet/tcp.h>
47131 +#include <unistd.h>
47132 +#include <errno.h>
47133 +typedef int xdr_socket;
47134 +#endif /*__KERNEL__*/
47135 +
47136 +/* start things up */
47137 +int xdr_open (xdr_socket * sk);
47138 +int xdr_connect (struct sockaddr_in6 *adr, xdr_socket sk);
47139 +void xdr_close (xdr_socket * sk);
47140 +
47141 +/* deep, basic io */
47142 +#ifdef __KERNEL__
47143 +#ifdef __linux__
47144 +size_t xdr_send (struct socket *sock, void *buf, size_t size);
47145 +size_t xdr_recv (struct socket *sock, void *buf, size_t size);
47146 +#endif /*__linux__*/
47147 +#else /*__KERNEL__*/
47148 +ssize_t xdr_recv (int fd, void *buf, size_t len);
47149 +ssize_t xdr_send (int fd, void *buf, size_t len);
47150 +#endif /*__KERNEL__*/
47151 +
47152 +xdr_enc_t *xdr_enc_init (xdr_socket sk, int buffer_size);
47153 +xdr_dec_t *xdr_dec_init (xdr_socket sk, int buffer_size);
47154 +int xdr_enc_flush (xdr_enc_t * xdr);
47155 +int xdr_enc_release (xdr_enc_t * xdr); /* calls xdr_enc_flush() */
47156 +void xdr_enc_force_release (xdr_enc_t * xdr);  /* doesn't call xdr_enc_flush() */
47157 +void xdr_dec_release (xdr_dec_t * xdr);
47158 +/* xdr_enc_force_release() is for when you get and error sending and you
47159 + * want to free that stuff up right away.  If you use the regular release
47160 + * for enc, it will fail if it cannot send data over the filedesciptor.
47161 + */
47162 +
47163 +/* encoders add to a stream */
47164 +int __inline__ xdr_enc_uint64 (xdr_enc_t * xdr, uint64_t i);
47165 +int __inline__ xdr_enc_uint32 (xdr_enc_t * xdr, uint32_t i);
47166 +int __inline__ xdr_enc_uint16 (xdr_enc_t * xdr, uint16_t i);
47167 +int __inline__ xdr_enc_uint8 (xdr_enc_t * xdr, uint8_t i);
47168 +int __inline__ xdr_enc_ipv6 (xdr_enc_t * enc, struct in6_addr *ip);
47169 +int xdr_enc_raw (xdr_enc_t * xdr, void *pointer, uint16_t len);
47170 +int xdr_enc_raw_iov (xdr_enc_t * xdr, int count, struct iovec *iov);
47171 +int xdr_enc_string (xdr_enc_t * xdr, uint8_t * s);
47172 +int xdr_enc_list_start (xdr_enc_t * xdr);
47173 +int xdr_enc_list_stop (xdr_enc_t * xdr);
47174 +
47175 +/* decoders remove from stream */
47176 +int xdr_dec_uint64 (xdr_dec_t * xdr, uint64_t * i);
47177 +int xdr_dec_uint32 (xdr_dec_t * xdr, uint32_t * i);
47178 +int xdr_dec_uint16 (xdr_dec_t * xdr, uint16_t * i);
47179 +int xdr_dec_uint8 (xdr_dec_t * xdr, uint8_t * i);
47180 +int xdr_dec_ipv6 (xdr_dec_t * xdr, struct in6_addr *ip);
47181 +int xdr_dec_raw (xdr_dec_t * xdr, void *p, uint16_t * l);      /* no malloc */
47182 +int xdr_dec_raw_m (xdr_dec_t * xdr, void **p, uint16_t * l);   /* mallocs p */
47183 +int xdr_dec_raw_ag (xdr_dec_t * xdr, void **p, uint16_t * bl, uint16_t * rl);
47184 +int xdr_dec_string (xdr_dec_t * xdr, uint8_t ** strp); /* mallocs s */
47185 +int xdr_dec_string_nm (xdr_dec_t * xdr, uint8_t * strp, size_t l);     /* no malloc */
47186 +int xdr_dec_string_ag (xdr_dec_t * xdr, uint8_t ** s, uint16_t * bl);
47187 +int xdr_dec_list_start (xdr_dec_t * xdr);
47188 +int xdr_dec_list_stop (xdr_dec_t * xdr);
47189 +
47190 +#endif /*__gulm_xdr_h__*/
47191 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c
47192 --- linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c      1969-12-31 18:00:00.000000000 -0600
47193 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c   2004-06-16 12:03:21.959894533 -0500
47194 @@ -0,0 +1,904 @@
47195 +/******************************************************************************
47196 +*******************************************************************************
47197 +**
47198 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
47199 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
47200 +**
47201 +**  This copyrighted material is made available to anyone wishing to use,
47202 +**  modify, copy, or redistribute it subject to the terms and conditions
47203 +**  of the GNU General Public License v.2.
47204 +**
47205 +*******************************************************************************
47206 +******************************************************************************/
47207 +
47208 +/*
47209 + * This is a bit of an abstraction layer to get this working in both kernel
47210 + * and userspace.
47211 + */
47212 +#define TRUE  (1)
47213 +#define FALSE (0)
47214 +#define MIN(a,b) ((a<b)?a:b)
47215 +
47216 +#ifdef __linux__
47217 +#include <linux/kernel.h>
47218 +#include <linux/sched.h>
47219 +#include <linux/slab.h>
47220 +#include <linux/vmalloc.h>
47221 +#define __KERNEL_SYSCALLS__
47222 +#include <linux/unistd.h>
47223 +#endif /*__linux__*/
47224 +
47225 +#include "xdr.h"
47226 +
47227 +/**
47228 + * xdr_realloc - a realloc for kernel space.
47229 + * @a: < pointer to realloc
47230 + * @nl: < desired new size
47231 + * @ol: < current old size
47232 + *
47233 + * Not as good as the real realloc, since it always moves memory.  But good
47234 + * enough for as little as it will get used here.
47235 + *
47236 + * XXX this is broken.
47237 + *
47238 + * Returns: void*
47239 + */
47240 +static void *
47241 +xdr_realloc (void *a, size_t nl, size_t ol)
47242 +{
47243 +       if (nl == ol) {
47244 +               return a;
47245 +       } else if (nl == 0) {
47246 +               kfree (a);
47247 +               return NULL;
47248 +       } else if (a == NULL && nl > 0) {
47249 +               return kmalloc (nl, GFP_KERNEL);
47250 +       } else {
47251 +               void *tmp;
47252 +               tmp = kmalloc (nl, GFP_KERNEL);
47253 +               if (tmp == NULL)
47254 +                       return NULL;
47255 +               memcpy (tmp, a, MIN (nl, ol));
47256 +               kfree (a);
47257 +               return tmp;
47258 +       }
47259 +}
47260 +
47261 +typedef enum { xdr_enc, xdr_dec } xdr_type;
47262 +
47263 +/* encoders have this sorta non-blocking, growing buffering stunt.
47264 + * makes them a bit different from the decoders now.
47265 + */
47266 +struct xdr_enc_s {
47267 +       size_t default_buf_size;
47268 +       xdr_socket fd;
47269 +       xdr_type type;
47270 +       size_t length;
47271 +       size_t curloc;
47272 +       uint8_t *stream;
47273 +};
47274 +
47275 +/* decoders only pull a single item off of the socket at a time.
47276 + * so this is all they need.
47277 + */
47278 +struct xdr_dec_s {
47279 +       size_t length;          /* total byte length of the stream */
47280 +       size_t curloc;          /* current byte offset from start */
47281 +       uint8_t *stream;        /* start of the encoded stream. */
47282 +       xdr_socket fd;
47283 +       xdr_type type;
47284 +};
47285 +
47286 +/* the types of data we support. */
47287 +
47288 +#define XDR_NULL          0x00 /* NOT A VALID TAG!!! used in dec code. */
47289 +#define XDR_LIST_START    0x01
47290 +#define XDR_LIST_STOP     0x02
47291 +/* list is a variable length device.  It is a start tag, some number of
47292 + * xdr_enc_*, then an stop tag.  It's main purpose is to provide a method
47293 + * of encasing data.
47294 + * */
47295 +#define XDR_STRING        0x04
47296 +/* string tag is followed by a uint16 which is the byte length */
47297 +#define XDR_RAW           0x05
47298 +/* raw tag is followed by a uint16 which is the byte length
47299 + * if 65535 bytes isn't enough, split your data and put multiples of these
47300 + * back to back.  (idea of xdr is to avoid this twit.)
47301 + * */
47302 +
47303 +/* note, if the size of these should variate, I'm screwed.  Should consider
47304 + * changing this all to the bit shift and array access to be more concrete.
47305 + * later.
47306 + */
47307 +#define XDR_UINT64        0x06
47308 +#define XDR_UINT32        0x07
47309 +#define XDR_UINT16        0x08
47310 +#define XDR_UINT8         0x09
47311 +/* should add signed ints */
47312 +
47313 +#define XDR_IPv6          0x0a /* 16 bytes, IPv6 address */
47314 +
47315 +/* any other base types?
47316 + */
47317 +
47318 +#define XDR_DEFAULT_BUFFER_SIZE 4096
47319 +/*****************************************************************************/
47320 +
47321 +/**
47322 + * xdr_enc_init -
47323 + * @fd:
47324 + * @buffer_size:
47325 + *
47326 + *
47327 + * Returns: xdr_enc_t*
47328 + */
47329 +xdr_enc_t *
47330 +xdr_enc_init (xdr_socket fd, int buffer_size)
47331 +{
47332 +       xdr_enc_t *xdr;
47333 +
47334 +       if (buffer_size <= 0)
47335 +               buffer_size = XDR_DEFAULT_BUFFER_SIZE;
47336 +
47337 +       xdr = kmalloc (sizeof (xdr_enc_t), GFP_KERNEL);
47338 +       if (xdr == NULL)
47339 +               return NULL;
47340 +       xdr->stream = kmalloc (buffer_size, GFP_KERNEL);
47341 +       if (xdr->stream == NULL) {
47342 +               kfree (xdr);
47343 +               return NULL;
47344 +       }
47345 +       xdr->fd = fd;
47346 +       xdr->type = xdr_enc;
47347 +       xdr->default_buf_size = buffer_size;
47348 +       xdr->length = buffer_size;
47349 +       xdr->curloc = 0;
47350 +
47351 +       return xdr;
47352 +}
47353 +
47354 +/**
47355 + * xdr_dec_init -
47356 + * @fd:
47357 + * @buffer_size:
47358 + *
47359 + *
47360 + * Returns: xdr_dec_t*
47361 + */
47362 +xdr_dec_t *
47363 +xdr_dec_init (xdr_socket fd, int buffer_size)
47364 +{
47365 +       xdr_dec_t *xdr;
47366 +
47367 +       if (buffer_size <= 0)
47368 +               buffer_size = XDR_DEFAULT_BUFFER_SIZE;
47369 +
47370 +       xdr = kmalloc (sizeof (xdr_dec_t), GFP_KERNEL);
47371 +       if (xdr == NULL)
47372 +               return NULL;
47373 +       xdr->length = buffer_size;
47374 +       xdr->curloc = 0;
47375 +       xdr->stream = kmalloc (buffer_size, GFP_KERNEL);
47376 +       xdr->fd = fd;
47377 +       xdr->type = xdr_dec;
47378 +       if (xdr->stream == NULL) {
47379 +               kfree (xdr);
47380 +               return NULL;
47381 +       }
47382 +       *(xdr->stream) = XDR_NULL;      /* so the first dec_call will call get_next */
47383 +       return xdr;
47384 +}
47385 +
47386 +/*****************************************************************************/
47387 +/**
47388 + * xdr_enc_flush -
47389 + * @xdr:
47390 + *
47391 + * Returns: int
47392 + */
47393 +int
47394 +xdr_enc_flush (xdr_enc_t * xdr)
47395 +{
47396 +       int err;
47397 +       if (xdr == NULL)
47398 +               return -EINVAL;
47399 +       if (xdr->type != xdr_enc)
47400 +               return -EINVAL;
47401 +       if (xdr->curloc == 0)
47402 +               return 0;
47403 +
47404 +       err = xdr_send (xdr->fd, xdr->stream, xdr->curloc);
47405 +       if (err < 0)
47406 +               return err;
47407 +       if (err == 0)
47408 +               return -EPROTO; /* why? */
47409 +       xdr->curloc = 0;
47410 +
47411 +       return 0;
47412 +}
47413 +
47414 +/**
47415 + * xdr_release -
47416 + * @xdr:
47417 + *
47418 + * Free the memory, losing whatever may be there.
47419 + */
47420 +void
47421 +xdr_dec_release (xdr_dec_t * xdr)
47422 +{
47423 +       if (xdr == NULL)
47424 +               return;
47425 +       kfree (xdr->stream);
47426 +       kfree (xdr);
47427 +}
47428 +
47429 +/**
47430 + * xdr_enc_force_release -
47431 + * @xdr:
47432 + *
47433 + * Free the memory, losing whatever may be there.
47434 + */
47435 +void
47436 +xdr_enc_force_release (xdr_enc_t * xdr)
47437 +{
47438 +       if (xdr == NULL)
47439 +               return;
47440 +       if (xdr->stream != NULL)
47441 +               kfree (xdr->stream);
47442 +       kfree (xdr);
47443 +}
47444 +
47445 +/**
47446 + * xdr_enc_release -
47447 + * @xdr:
47448 + *
47449 + * Free things up, trying to send any possible leftover data first.
47450 + *
47451 + * Returns: int
47452 + */
47453 +int
47454 +xdr_enc_release (xdr_enc_t * xdr)
47455 +{
47456 +       int e;
47457 +       if (xdr == NULL)
47458 +               return -EINVAL;
47459 +       if ((e = xdr_enc_flush (xdr)) != 0)
47460 +               return e;
47461 +       xdr_enc_force_release (xdr);
47462 +       return 0;
47463 +}
47464 +
47465 +/*****************************************************************************/
47466 +/**
47467 + * grow_stream -
47468 + * @xdr:
47469 + * @len:
47470 + *
47471 + * each single encoded call needs to fit within a buffer.  So we make sure
47472 + * the buffer is big enough.
47473 + *
47474 + * If the buffer is big enough, but just doesn't have room, we send the
47475 + * data in the buffer, emptying it, first.
47476 + *
47477 + * Returns: int
47478 + */
47479 +static int
47480 +grow_stream (xdr_enc_t * enc, size_t len)
47481 +{
47482 +       int err;
47483 +       uint8_t *c;
47484 +
47485 +       /* buffer must be big enough for one type entry. */
47486 +       if (len > enc->length) {
47487 +               c = xdr_realloc (enc->stream, len, enc->length);
47488 +               if (c == NULL)
47489 +                       return -ENOMEM;
47490 +               enc->stream = c;
47491 +               enc->length = len;
47492 +       }
47493 +
47494 +       /* if there isn't room on the end of this chunk,
47495 +        * try sending what we've got.
47496 +        */
47497 +       if (enc->curloc + len > enc->length) {
47498 +               err = xdr_enc_flush (enc);
47499 +               if (err != 0) {
47500 +                       /* error, better pass this up. */
47501 +                       return err;
47502 +               }
47503 +       }
47504 +
47505 +       return 0;
47506 +}
47507 +
47508 +/**
47509 + * append_bytes -
47510 + * @xdr:
47511 + * @xdr_type:
47512 + * @bytes:
47513 + * @len:
47514 + *
47515 + *
47516 + * Returns: int
47517 + */
47518 +static int
47519 +append_bytes (xdr_enc_t * xdr, uint8_t xdr_type, void *bytes, size_t len)
47520 +{
47521 +       int e;
47522 +       if (xdr == NULL)
47523 +               return -EINVAL;
47524 +       if (xdr->type != xdr_enc)
47525 +               return -EINVAL;
47526 +
47527 +       /* len + 1; need the one byte for the type code. */
47528 +       if ((e = grow_stream (xdr, len + 1)) != 0)
47529 +               return e;
47530 +       *(xdr->stream + xdr->curloc) = xdr_type;
47531 +       xdr->curloc += 1;
47532 +       memcpy ((xdr->stream + xdr->curloc), bytes, len);
47533 +       xdr->curloc += len;
47534 +
47535 +       return 0;
47536 +}
47537 +
47538 +int __inline__
47539 +xdr_enc_uint64 (xdr_enc_t * xdr, uint64_t i)
47540 +{
47541 +       uint64_t b = cpu_to_be64 (i);
47542 +       return append_bytes (xdr, XDR_UINT64, &b, sizeof (uint64_t));
47543 +}
47544 +
47545 +int __inline__
47546 +xdr_enc_uint32 (xdr_enc_t * xdr, uint32_t i)
47547 +{
47548 +       uint32_t b = cpu_to_be32 (i);
47549 +       return append_bytes (xdr, XDR_UINT32, &b, sizeof (uint32_t));
47550 +}
47551 +
47552 +int __inline__
47553 +xdr_enc_uint16 (xdr_enc_t * xdr, uint16_t i)
47554 +{
47555 +       uint16_t b = cpu_to_be16 (i);
47556 +       return append_bytes (xdr, XDR_UINT16, &b, sizeof (uint16_t));
47557 +}
47558 +
47559 +int __inline__
47560 +xdr_enc_uint8 (xdr_enc_t * xdr, uint8_t i)
47561 +{
47562 +       return append_bytes (xdr, XDR_UINT8, &i, sizeof (uint8_t));
47563 +}
47564 +
47565 +int __inline__
47566 +xdr_enc_ipv6 (xdr_enc_t * xdr, struct in6_addr *ip)
47567 +{                              /* bytes should already be in the right order. */
47568 +       return append_bytes (xdr, XDR_IPv6, ip->s6_addr, 16);
47569 +}
47570 +
47571 +int
47572 +xdr_enc_raw (xdr_enc_t * xdr, void *p, uint16_t len)
47573 +{
47574 +       int e;
47575 +       if (xdr == NULL)
47576 +               return -EINVAL;
47577 +       if ((e = grow_stream (xdr, len + 3)) != 0)
47578 +               return e;
47579 +       *(xdr->stream + xdr->curloc) = XDR_RAW;
47580 +       xdr->curloc += 1;
47581 +       (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) =
47582 +           cpu_to_be16 (len);
47583 +       xdr->curloc += 2;
47584 +       memcpy ((xdr->stream + xdr->curloc), p, len);
47585 +       xdr->curloc += len;
47586 +       return 0;
47587 +}
47588 +
47589 +int
47590 +xdr_enc_raw_iov (xdr_enc_t * xdr, int count, struct iovec *iov)
47591 +{
47592 +       size_t total = 0;
47593 +       int i, err;
47594 +       if (xdr == NULL || count < 1 || iov == NULL)
47595 +               return -EINVAL;
47596 +       for (i = 0; i < count; i++)
47597 +               total += iov[i].iov_len;
47598 +       /* make sure it fits in a uint16_t */
47599 +       if (total > 0xffff)
47600 +               return -EFBIG;
47601 +       /* grow to fit */
47602 +       if ((err = grow_stream (xdr, total + 3)) != 0)
47603 +               return err;
47604 +       /* copy in header and size */
47605 +       *(xdr->stream + xdr->curloc) = XDR_RAW;
47606 +       xdr->curloc += 1;
47607 +       (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) =
47608 +           cpu_to_be16 (total);
47609 +       xdr->curloc += 2;
47610 +       /* copy in all iovbufs */
47611 +       for (i = 0; i < count; i++) {
47612 +               if (iov[i].iov_base == NULL)
47613 +                       continue;
47614 +               memcpy ((xdr->stream + xdr->curloc), iov[i].iov_base,
47615 +                       iov[i].iov_len);
47616 +               xdr->curloc += iov[i].iov_len;
47617 +       }
47618 +       return 0;
47619 +}
47620 +
47621 +int
47622 +xdr_enc_string (xdr_enc_t * xdr, uint8_t * s)
47623 +{
47624 +       int len, e;
47625 +       if (xdr == NULL)
47626 +               return -EINVAL;
47627 +       if (s == NULL)
47628 +               len = 0;
47629 +       else
47630 +               len = strlen (s);
47631 +       if ((e = grow_stream (xdr, len + 3)) != 0)
47632 +               return e;
47633 +       *(xdr->stream + xdr->curloc) = XDR_STRING;
47634 +       xdr->curloc += 1;
47635 +       (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) =
47636 +           cpu_to_be16 (len);
47637 +       xdr->curloc += 2;
47638 +       if (len > 0) {
47639 +               memcpy ((xdr->stream + xdr->curloc), s, len);
47640 +               xdr->curloc += len;
47641 +       }
47642 +       return 0;
47643 +}
47644 +
47645 +int
47646 +xdr_enc_list_start (xdr_enc_t * xdr)
47647 +{
47648 +       int e;
47649 +       if (xdr == NULL)
47650 +               return -EINVAL;
47651 +       if ((e = grow_stream (xdr, 1)) != 0)
47652 +               return e;
47653 +       *(xdr->stream + xdr->curloc) = XDR_LIST_START;
47654 +       xdr->curloc += 1;
47655 +       return 0;
47656 +}
47657 +
47658 +int
47659 +xdr_enc_list_stop (xdr_enc_t * xdr)
47660 +{
47661 +       int e;
47662 +       if (xdr == NULL)
47663 +               return -EINVAL;
47664 +       if ((e = grow_stream (xdr, 1)) != 0)
47665 +               return e;
47666 +       *(xdr->stream + xdr->curloc) = XDR_LIST_STOP;
47667 +       xdr->curloc += 1;
47668 +       return 0;
47669 +}
47670 +
47671 +/*****************************************************************************/
47672 +
47673 +/**
47674 + * get_next -
47675 + * @xdr:
47676 + *
47677 + * get what ever may be next, and put it into the buffer.
47678 + *
47679 + * Returns: int
47680 + */
47681 +static int
47682 +get_next (xdr_dec_t * xdr)
47683 +{
47684 +       int err;
47685 +       uint16_t len;
47686 +       if ((err = xdr_recv (xdr->fd, xdr->stream, 1)) < 0)
47687 +               return err;
47688 +       if (err == 0)
47689 +               return -EPROTO;
47690 +       xdr->curloc = 1;
47691 +       if (*(xdr->stream) == XDR_UINT64) {
47692 +               len = sizeof (uint64_t);
47693 +       } else if (*(xdr->stream) == XDR_UINT32) {
47694 +               len = sizeof (uint32_t);
47695 +       } else if (*(xdr->stream) == XDR_UINT16) {
47696 +               len = sizeof (uint16_t);
47697 +       } else if (*(xdr->stream) == XDR_UINT8) {
47698 +               len = sizeof (uint8_t);
47699 +       } else if (*(xdr->stream) == XDR_IPv6) {
47700 +               len = 16;
47701 +       } else if (*(xdr->stream) == XDR_STRING) {
47702 +               if ((err = xdr_recv (xdr->fd, (xdr->stream + 1), 2)) < 0)
47703 +                       return err;
47704 +               if (err == 0)
47705 +                       return -EPROTO;
47706 +               len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47707 +               xdr->curloc += 2;
47708 +       } else if (*(xdr->stream) == XDR_RAW) {
47709 +               if ((err = xdr_recv (xdr->fd, (xdr->stream + 1), 2)) < 0)
47710 +                       return err;
47711 +               if (err == 0)
47712 +                       return -EPROTO;
47713 +               len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47714 +               xdr->curloc += 2;
47715 +       } else if (*(xdr->stream) == XDR_LIST_START) {
47716 +               xdr->curloc = 0;
47717 +               return 0;
47718 +       } else if (*(xdr->stream) == XDR_LIST_STOP) {
47719 +               xdr->curloc = 0;
47720 +               return 0;
47721 +       } else {
47722 +               return -1;
47723 +       }
47724 +
47725 +       /* grow buffer if need be. */
47726 +       if (xdr->curloc + len > xdr->length) {
47727 +               uint8_t *c;
47728 +               c = xdr_realloc (xdr->stream, xdr->curloc + len, xdr->length);
47729 +               if (c == NULL)
47730 +                       return -ENOMEM;
47731 +               xdr->stream = c;
47732 +               xdr->length = xdr->curloc + len;
47733 +       }
47734 +
47735 +       if (len > 0) {
47736 +               if ((err =
47737 +                    xdr_recv (xdr->fd, (xdr->stream + xdr->curloc), len)) < 0)
47738 +                       return err;
47739 +               if (err == 0)
47740 +                       return -EPROTO;
47741 +       }
47742 +       xdr->curloc = 0;
47743 +       return 0;
47744 +}
47745 +
47746 +int
47747 +xdr_dec_uint64 (xdr_dec_t * xdr, uint64_t * i)
47748 +{
47749 +       int err;
47750 +       if (xdr == NULL || i == NULL)
47751 +               return -EINVAL;
47752 +       if (*(xdr->stream) == XDR_NULL) {
47753 +               if ((err = get_next (xdr)) != 0)
47754 +                       return err;
47755 +       }
47756 +       if (*(xdr->stream) != XDR_UINT64)
47757 +               return -ENOMSG;
47758 +       *i = be64_to_cpu (*((uint64_t *) (xdr->stream + 1)));
47759 +       /* read the item out, mark that */
47760 +       *(xdr->stream) = XDR_NULL;
47761 +       return 0;
47762 +}
47763 +
47764 +int
47765 +xdr_dec_uint32 (xdr_dec_t * xdr, uint32_t * i)
47766 +{
47767 +       int err;
47768 +       if (xdr == NULL || i == NULL)
47769 +               return -EINVAL;
47770 +       if (*(xdr->stream) == XDR_NULL) {
47771 +               if ((err = get_next (xdr)) != 0)
47772 +                       return err;
47773 +       }
47774 +       if (*(xdr->stream) != XDR_UINT32)
47775 +               return -ENOMSG;
47776 +       *i = be32_to_cpu (*((uint32_t *) (xdr->stream + 1)));
47777 +       /* read the item out, mark that */
47778 +       *(xdr->stream) = XDR_NULL;
47779 +       return 0;
47780 +}
47781 +
47782 +int
47783 +xdr_dec_uint16 (xdr_dec_t * xdr, uint16_t * i)
47784 +{
47785 +       int err;
47786 +       if (xdr == NULL || i == NULL)
47787 +               return -EINVAL;
47788 +       if (*(xdr->stream) == XDR_NULL) {
47789 +               if ((err = get_next (xdr)) != 0)
47790 +                       return err;
47791 +       }
47792 +       if (*(xdr->stream) != XDR_UINT16)
47793 +               return -ENOMSG;
47794 +       *i = be16_to_cpu (*((uint16_t *) (xdr->stream + 1)));
47795 +       /* read the item out, mark that */
47796 +       *(xdr->stream) = XDR_NULL;
47797 +       return 0;
47798 +}
47799 +
47800 +int
47801 +xdr_dec_uint8 (xdr_dec_t * xdr, uint8_t * i)
47802 +{
47803 +       int err;
47804 +       if (xdr == NULL || i == NULL)
47805 +               return -EINVAL;
47806 +
47807 +       if (*(xdr->stream) == XDR_NULL) {
47808 +               if ((err = get_next (xdr)) != 0)
47809 +                       return err;
47810 +       }
47811 +       if (*(xdr->stream) != XDR_UINT8)
47812 +               return -ENOMSG;
47813 +       *i = *((uint8_t *) (xdr->stream + 1));
47814 +       /* read the item out, mark that */
47815 +       *(xdr->stream) = XDR_NULL;
47816 +       return 0;
47817 +}
47818 +
47819 +int
47820 +xdr_dec_ipv6 (xdr_dec_t * xdr, struct in6_addr *ip)
47821 +{
47822 +       int err;
47823 +       if (xdr == NULL || ip == NULL)
47824 +               return -EINVAL;
47825 +       if (*(xdr->stream) == XDR_NULL) {
47826 +               if ((err = get_next (xdr)) != 0)
47827 +                       return err;
47828 +       }
47829 +       if (*(xdr->stream) != XDR_IPv6)
47830 +               return -ENOMSG;
47831 +       memcpy (ip, xdr->stream + 1, 16);
47832 +       /* read the item out, mark that */
47833 +       *(xdr->stream) = XDR_NULL;
47834 +       return 0;
47835 +}
47836 +
47837 +/* mallocing version */
47838 +int
47839 +xdr_dec_raw_m (xdr_dec_t * xdr, void **p, uint16_t * l)
47840 +{
47841 +       int len;
47842 +       void *str;
47843 +       int err;
47844 +
47845 +       if (xdr == NULL || p == NULL || l == NULL)
47846 +               return -EINVAL;
47847 +       if (*(xdr->stream) == XDR_NULL) {
47848 +               if ((err = get_next (xdr)) != 0)
47849 +                       return err;
47850 +       }
47851 +       if (*(xdr->stream) != XDR_RAW)
47852 +               return -ENOMSG;
47853 +       xdr->curloc = 1;
47854 +
47855 +       len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47856 +       xdr->curloc += 2;
47857 +
47858 +       str = kmalloc (len, GFP_KERNEL);
47859 +       if (str == NULL)
47860 +               return -ENOMEM;
47861 +       memcpy (str, (xdr->stream + xdr->curloc), len);
47862 +       xdr->curloc += len;
47863 +
47864 +       *p = str;
47865 +       *l = len;
47866 +       /* read the item out, mark that */
47867 +       *(xdr->stream) = XDR_NULL;
47868 +       return 0;
47869 +}
47870 +
47871 +/* non-mallocing version */
47872 +int
47873 +xdr_dec_raw (xdr_dec_t * xdr, void *p, uint16_t * l)
47874 +{
47875 +       int len;
47876 +       int err;
47877 +
47878 +       if (xdr == NULL || p == NULL || l == NULL)
47879 +               return -EINVAL;
47880 +       if (*(xdr->stream) == XDR_NULL) {
47881 +               if ((err = get_next (xdr)) != 0)
47882 +                       return err;
47883 +       }
47884 +       if (*(xdr->stream) != XDR_RAW)
47885 +               return -ENOMSG;
47886 +       xdr->curloc = 1;
47887 +
47888 +       len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47889 +       xdr->curloc += 2;
47890 +
47891 +       if (len > *l)
47892 +               return -1;
47893 +
47894 +       memcpy (p, (xdr->stream + xdr->curloc), len);
47895 +       xdr->curloc += len;
47896 +
47897 +       *l = len;
47898 +
47899 +       /* read the item out, mark that */
47900 +       *(xdr->stream) = XDR_NULL;
47901 +       return 0;
47902 +}
47903 +
47904 +/**
47905 + * xdr_dec_raw_ag - auto-growing version
47906 + * @xdr:
47907 + * @p: <> pointer to buffer
47908 + * @bl: <> size of the buffer
47909 + * @rl: > size of data read from stream
47910 + *
47911 + * This form of xdr_dec_raw will increase the size of a pre-malloced buffer
47912 + * to fit the data it is reading.  It is kind of a merger of the
47913 + * non-mallocing and mallocing versions.
47914 + *
47915 + * Returns: int
47916 + */
47917 +int
47918 +xdr_dec_raw_ag (xdr_dec_t * xdr, void **p, uint16_t * bl, uint16_t * rl)
47919 +{
47920 +       int len;
47921 +       int err;
47922 +
47923 +       if (xdr == NULL || p == NULL || bl == NULL || rl == NULL)
47924 +               return -EINVAL;
47925 +       if (*(xdr->stream) == XDR_NULL) {
47926 +               if ((err = get_next (xdr)) != 0)
47927 +                       return err;
47928 +       }
47929 +       if (*(xdr->stream) != XDR_RAW)
47930 +               return -ENOMSG;
47931 +       xdr->curloc = 1;
47932 +
47933 +       len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47934 +       xdr->curloc += 2;
47935 +
47936 +       if (len > *bl) {        /* grow p */
47937 +               void *temp;
47938 +               temp = xdr_realloc (*p, len, *bl);
47939 +               if (temp == NULL)
47940 +                       return -ENOMEM;
47941 +               *bl = len;
47942 +               *p = temp;
47943 +       }
47944 +
47945 +       memcpy (*p, (xdr->stream + xdr->curloc), len);
47946 +       xdr->curloc += len;
47947 +
47948 +       *rl = len;
47949 +
47950 +       *(xdr->stream) = XDR_NULL;
47951 +       return 0;
47952 +}
47953 +
47954 +/* mallocing version */
47955 +int
47956 +xdr_dec_string (xdr_dec_t * xdr, uint8_t ** strp)
47957 +{
47958 +       int len;
47959 +       char *str;
47960 +       int err;
47961 +       if (xdr == NULL || strp == NULL)
47962 +               return -EINVAL;
47963 +       if (*(xdr->stream) == XDR_NULL) {
47964 +               if ((err = get_next (xdr)) != 0)
47965 +                       return err;
47966 +       }
47967 +       if (*(xdr->stream) != XDR_STRING)
47968 +               return -ENOMSG;
47969 +       xdr->curloc = 1;
47970 +
47971 +       len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47972 +       xdr->curloc += 2;
47973 +
47974 +       if (len > 0) {
47975 +               str = kmalloc (len + 1, GFP_KERNEL);
47976 +               if (str == NULL)
47977 +                       return -ENOMEM;
47978 +               str[len] = '\0';
47979 +               memcpy (str, (xdr->stream + xdr->curloc), len);
47980 +               xdr->curloc += len;
47981 +
47982 +               *strp = str;
47983 +       } else {
47984 +               *strp = NULL;
47985 +       }
47986 +
47987 +       /* read the item out, mark that */
47988 +       *(xdr->stream) = XDR_NULL;
47989 +       return 0;
47990 +}
47991 +
47992 +/* non-mallocing version */
47993 +int
47994 +xdr_dec_string_nm (xdr_dec_t * xdr, uint8_t * string, size_t l)
47995 +{
47996 +       int len;
47997 +       int err;
47998 +       if (xdr == NULL || string == NULL)
47999 +               return -EINVAL;
48000 +       if (*(xdr->stream) == XDR_NULL) {
48001 +               if ((err = get_next (xdr)) != 0)
48002 +                       return err;
48003 +       }
48004 +       if (*(xdr->stream) != XDR_STRING)
48005 +               return -ENOMSG;
48006 +       xdr->curloc = 1;
48007 +
48008 +       len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
48009 +       xdr->curloc += 2;
48010 +
48011 +       if (len > 0) {
48012 +               memcpy (string, (xdr->stream + xdr->curloc), MIN (len, l));
48013 +               if (l > len) {
48014 +                       string[len] = '\0';
48015 +               }
48016 +               string[l - 1] = '\0';
48017 +       } else {
48018 +               string[0] = '\0';
48019 +       }
48020 +
48021 +       /* read the item out, mark that */
48022 +       *(xdr->stream) = XDR_NULL;
48023 +       return 0;
48024 +}
48025 +
48026 +int
48027 +xdr_dec_string_ag (xdr_dec_t * xdr, uint8_t ** s, uint16_t * bl)
48028 +{
48029 +       int len;
48030 +       int err;
48031 +       if (xdr == NULL || s == NULL || bl == NULL)
48032 +               return -EINVAL;
48033 +       if (*(xdr->stream) == XDR_NULL) {
48034 +               if ((err = get_next (xdr)) != 0)
48035 +                       return err;
48036 +       }
48037 +       if (*(xdr->stream) != XDR_STRING)
48038 +               return -ENOMSG;
48039 +       xdr->curloc = 1;
48040 +
48041 +       len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
48042 +       xdr->curloc += 2;
48043 +
48044 +       if (len == 0) {         /* empty string */
48045 +               **s = '\0';
48046 +               *(xdr->stream) = XDR_NULL;
48047 +               return 0;
48048 +       }
48049 +
48050 +       if (len >= *bl) {       /* grow s */
48051 +               void *temp;
48052 +               temp = xdr_realloc (*s, len + 1, *bl);
48053 +               if (temp == NULL)
48054 +                       return -ENOMEM;
48055 +               *bl = len + 1;
48056 +               *s = temp;
48057 +       }
48058 +
48059 +       memcpy (*s, (xdr->stream + xdr->curloc), len);
48060 +       (*s)[len] = '\0';
48061 +
48062 +       *(xdr->stream) = XDR_NULL;
48063 +       return 0;
48064 +}
48065 +
48066 +int
48067 +xdr_dec_list_start (xdr_dec_t * xdr)
48068 +{
48069 +       int err;
48070 +       if (xdr == NULL)
48071 +               return -EINVAL;
48072 +       if (*(xdr->stream) == XDR_NULL) {
48073 +               if ((err = get_next (xdr)) != 0)
48074 +                       return err;
48075 +       }
48076 +       if (*(xdr->stream) != XDR_LIST_START)
48077 +               return -ENOMSG;
48078 +       /* read the item out, mark that */
48079 +       *(xdr->stream) = XDR_NULL;
48080 +       return 0;
48081 +}
48082 +
48083 +int
48084 +xdr_dec_list_stop (xdr_dec_t * xdr)
48085 +{
48086 +       int err;
48087 +       if (xdr == NULL)
48088 +               return -EINVAL;
48089 +       if (*(xdr->stream) == XDR_NULL) {
48090 +               if ((err = get_next (xdr)) != 0)
48091 +                       return err;
48092 +       }
48093 +       if (*(xdr->stream) != XDR_LIST_STOP)
48094 +               return -ENOMSG;
48095 +       /* read the item out, mark that */
48096 +       *(xdr->stream) = XDR_NULL;
48097 +       return 0;
48098 +}
48099 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c
48100 --- linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c        1969-12-31 18:00:00.000000000 -0600
48101 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c     2004-06-16 12:03:21.959894533 -0500
48102 @@ -0,0 +1,169 @@
48103 +/******************************************************************************
48104 +*******************************************************************************
48105 +**
48106 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
48107 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
48108 +**
48109 +**  This copyrighted material is made available to anyone wishing to use,
48110 +**  modify, copy, or redistribute it subject to the terms and conditions
48111 +**  of the GNU General Public License v.2.
48112 +**
48113 +*******************************************************************************
48114 +******************************************************************************/
48115 +
48116 +/*
48117 + * does the lowest level of reads and writes.
48118 + * In kernel and/or userspace.
48119 + */
48120 +
48121 +#include "xdr.h"
48122 +
48123 +#ifdef __KERNEL__
48124 +#ifdef __linux__
48125 +#include <linux/net.h>
48126 +#include <linux/in.h>
48127 +#include <linux/socket.h>
48128 +#include <net/sock.h>
48129 +#include "asm/uaccess.h"
48130 +
48131 +/**
48132 + * do_tfer - transfers data over a socket
48133 + * @sock: < socket
48134 + * @iov: <> iovec of buffers
48135 + * @n:    < how many iovecs
48136 + * @size: < total data size to send/recv
48137 + * @dir:  < send or recv
48138 + * @timeout: < how many sec to wait. 0 == forever.
48139 + *
48140 + * Returns: <0: Error
48141 + *         >=0: Bytes transfered
48142 + */
48143 +static int
48144 +do_tfer (struct socket *sock, struct iovec *iov, int n, int size, int dir)
48145 +{
48146 +       unsigned long flags;
48147 +       sigset_t oldset;
48148 +       struct msghdr m;
48149 +       mm_segment_t fs;
48150 +       int rv, moved = 0;
48151 +
48152 +       fs = get_fs ();
48153 +       set_fs (get_ds ());
48154 +
48155 +       /* XXX do I still want the signal stuff? */
48156 +       spin_lock_irqsave (&current->sighand->siglock, flags);
48157 +       oldset = current->blocked;
48158 +       siginitsetinv (&current->blocked,
48159 +                      sigmask (SIGKILL) | sigmask (SIGTERM));
48160 +       recalc_sigpending ();
48161 +       spin_unlock_irqrestore (&current->sighand->siglock, flags);
48162 +
48163 +       memset (&m, 0, sizeof (struct msghdr));
48164 +       for (;;) {
48165 +               m.msg_iov = iov;
48166 +               m.msg_iovlen = n;
48167 +               m.msg_flags = MSG_NOSIGNAL;
48168 +
48169 +               if (dir)
48170 +                       rv = sock_sendmsg (sock, &m, size - moved);
48171 +               else
48172 +                       rv = sock_recvmsg (sock, &m, size - moved, 0);
48173 +
48174 +               if (rv <= 0)
48175 +                       goto out_err;
48176 +               moved += rv;
48177 +
48178 +               if (moved >= size)
48179 +                       break;
48180 +
48181 +               /* adjust iov's for next transfer */
48182 +               while (iov->iov_len == 0) {
48183 +                       iov++;
48184 +                       n--;
48185 +               }
48186 +
48187 +       }
48188 +       rv = moved;
48189 +      out_err:
48190 +       spin_lock_irqsave (&current->sighand->siglock, flags);
48191 +       current->blocked = oldset;
48192 +       recalc_sigpending ();
48193 +       spin_unlock_irqrestore (&current->sighand->siglock, flags);
48194 +
48195 +       set_fs (fs);
48196 +
48197 +       return rv;
48198 +}
48199 +
48200 +size_t
48201 +xdr_send (struct socket * sock, void *buf, size_t size)
48202 +{
48203 +       struct iovec iov;
48204 +       int res;
48205 +
48206 +       iov.iov_base = buf;
48207 +       iov.iov_len = size;
48208 +
48209 +       res = do_tfer (sock, &iov, 1, size, 1);
48210 +
48211 +       return res;
48212 +}
48213 +
48214 +size_t
48215 +xdr_recv (struct socket * sock, void *buf, size_t size)
48216 +{
48217 +       struct iovec iov;
48218 +       int res;
48219 +
48220 +       iov.iov_base = buf;
48221 +       iov.iov_len = size;
48222 +
48223 +       res = do_tfer (sock, &iov, 1, size, 0);
48224 +
48225 +       return res;
48226 +}
48227 +
48228 +#endif /*__linux__*/
48229 +#else /*__KERNEL__*/
48230 +
48231 +#include <errno.h>
48232 +#include <sys/types.h>
48233 +#include <sys/socket.h>
48234 +
48235 +ssize_t
48236 +xdr_recv (int fd, void *buf, size_t len)
48237 +{
48238 +       ssize_t cnt = 0;
48239 +       size_t ttl = 0;
48240 +       while (len > 0) {
48241 +               cnt = recv (fd, buf, len, 0);
48242 +               if (cnt == 0)
48243 +                       return 0;
48244 +               if (cnt < 0)
48245 +                       return -errno;
48246 +               len -= cnt;
48247 +               buf += cnt;
48248 +               ttl += cnt;
48249 +       }
48250 +       return ttl;
48251 +}
48252 +
48253 +ssize_t
48254 +xdr_send (int fd, void *buf, size_t len)
48255 +{
48256 +       ssize_t cnt = 0;
48257 +       size_t ttl = 0;
48258 +       while (len > 0) {
48259 +               cnt = send (fd, buf, len, 0);
48260 +               if (cnt == 0)
48261 +                       return 0;
48262 +               if (cnt < 0)
48263 +                       return -errno;
48264 +               len -= cnt;
48265 +               buf += cnt;
48266 +               ttl += cnt;
48267 +       }
48268 +       return ttl;
48269 +}
48270 +
48271 +#endif /*__KERNEL__*/
48272 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c
48273 --- linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c    1969-12-31 18:00:00.000000000 -0600
48274 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c 2004-06-16 12:03:21.959894533 -0500
48275 @@ -0,0 +1,82 @@
48276 +/******************************************************************************
48277 +*******************************************************************************
48278 +**
48279 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
48280 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
48281 +**
48282 +**  This copyrighted material is made available to anyone wishing to use,
48283 +**  modify, copy, or redistribute it subject to the terms and conditions
48284 +**  of the GNU General Public License v.2.
48285 +**
48286 +*******************************************************************************
48287 +******************************************************************************/
48288 +
48289 +/*
48290 + * This file opens and closes a socket.
48291 + * In kernel and/or userspace.
48292 + */
48293 +
48294 +#include "xdr.h"
48295 +
48296 +#ifdef __KERNEL__
48297 +#ifdef __linux__
48298 +
48299 +int
48300 +xdr_open (xdr_socket * xsk)
48301 +{
48302 +       return sock_create (AF_INET6, SOCK_STREAM, 0, xsk);
48303 +}
48304 +
48305 +int
48306 +xdr_connect (struct sockaddr_in6 *adr, xdr_socket xsk)
48307 +{
48308 +       return xsk->ops->connect (xsk,
48309 +                                 (struct sockaddr *) adr,
48310 +                                 sizeof (struct sockaddr_in6), 0);
48311 +}
48312 +
48313 +void
48314 +xdr_close (xdr_socket * xsk)
48315 +{
48316 +       if (*xsk == NULL)
48317 +               return;
48318 +       sock_release (*xsk);
48319 +       *xsk = NULL;
48320 +}
48321 +
48322 +#endif /*__linux__*/
48323 +#else /*__KERNEL__*/
48324 +
48325 +int
48326 +xdr_open (xdr_socket * xsk)
48327 +{
48328 +       int sk;
48329 +       sk = socket (AF_INET6, SOCK_STREAM, 0);
48330 +       if (sk < 0)
48331 +               return -errno;
48332 +       *xsk = sk;
48333 +       return 0;
48334 +}
48335 +
48336 +int
48337 +xdr_connect (struct sockaddr_in6 *adr, xdr_socket xsk)
48338 +{
48339 +       int err;
48340 +       err =
48341 +           connect (xsk, (struct sockaddr *) adr,
48342 +                    sizeof (struct sockaddr_in6));
48343 +       if (err < 0)
48344 +               return -errno;
48345 +       return 0;
48346 +}
48347 +
48348 +void
48349 +xdr_close (xdr_socket * xsk)
48350 +{
48351 +       if (*xsk < 0)
48352 +               return;
48353 +       close (*xsk);
48354 +       *xsk = -1;
48355 +}
48356 +
48357 +#endif /*__KERNEL__*/
48358 diff -urN linux-orig/fs/gfs_locking/lock_harness/main.c linux-patched/fs/gfs_locking/lock_harness/main.c
48359 --- linux-orig/fs/gfs_locking/lock_harness/main.c       1969-12-31 18:00:00.000000000 -0600
48360 +++ linux-patched/fs/gfs_locking/lock_harness/main.c    2004-06-16 12:03:10.006671787 -0500
48361 @@ -0,0 +1,226 @@
48362 +/******************************************************************************
48363 +*******************************************************************************
48364 +**
48365 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
48366 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
48367 +**
48368 +**  This copyrighted material is made available to anyone wishing to use,
48369 +**  modify, copy, or redistribute it subject to the terms and conditions
48370 +**  of the GNU General Public License v.2.
48371 +**
48372 +*******************************************************************************
48373 +******************************************************************************/
48374 +
48375 +#include <linux/module.h>
48376 +#include <linux/init.h>
48377 +#include <linux/string.h>
48378 +#include <linux/slab.h>
48379 +#include <linux/wait.h>
48380 +#include <linux/sched.h>
48381 +#include <linux/kmod.h>
48382 +#include <linux/lm_interface.h>
48383 +
48384 +#define RELEASE_NAME "<CVS>"
48385 +
48386 +struct lmh_wrapper {
48387 +       struct list_head lw_list;
48388 +       struct lm_lockops *lw_ops;
48389 +};
48390 +
48391 +static struct semaphore lmh_lock;
48392 +static struct list_head lmh_list;
48393 +
48394 +/**
48395 + * lm_register_proto - Register a low-level locking protocol
48396 + * @proto: the protocol definition
48397 + *
48398 + * Returns: 0 on success, -EXXX on failure
48399 + */
48400 +
48401 +int
48402 +lm_register_proto(struct lm_lockops *proto)
48403 +{
48404 +       struct list_head *tmp, *head;
48405 +       struct lmh_wrapper *lw;
48406 +
48407 +       down(&lmh_lock);
48408 +
48409 +       for (head = &lmh_list, tmp = head->next; tmp != head; tmp = tmp->next) {
48410 +               lw = list_entry(tmp, struct lmh_wrapper, lw_list);
48411 +
48412 +               if (strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name) == 0) {
48413 +                       up(&lmh_lock);
48414 +                       printk("lock_harness:  protocol %s already exists\n",
48415 +                              proto->lm_proto_name);
48416 +                       return -EEXIST;
48417 +               }
48418 +       }
48419 +
48420 +       lw = kmalloc(sizeof (struct lmh_wrapper), GFP_KERNEL);
48421 +       if (!lw) {
48422 +               up(&lmh_lock);
48423 +               return -ENOMEM;
48424 +       }
48425 +       memset(lw, 0, sizeof (struct lmh_wrapper));
48426 +
48427 +       lw->lw_ops = proto;
48428 +       list_add(&lw->lw_list, &lmh_list);
48429 +
48430 +       up(&lmh_lock);
48431 +
48432 +       return 0;
48433 +}
48434 +
48435 +/**
48436 + * lm_unregister_proto - Unregister a low-level locking protocol
48437 + * @proto: the protocol definition
48438 + *
48439 + */
48440 +
48441 +void
48442 +lm_unregister_proto(struct lm_lockops *proto)
48443 +{
48444 +       struct list_head *tmp, *head;
48445 +       struct lmh_wrapper *lw = NULL;
48446 +
48447 +       down(&lmh_lock);
48448 +
48449 +       for (head = &lmh_list, tmp = head->next; tmp != head; tmp = tmp->next) {
48450 +               lw = list_entry(tmp, struct lmh_wrapper, lw_list);
48451 +
48452 +               if (strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name) == 0) {
48453 +                       list_del(&lw->lw_list);
48454 +                       up(&lmh_lock);
48455 +                       kfree(lw);
48456 +                       return;
48457 +               }
48458 +       }
48459 +
48460 +       up(&lmh_lock);
48461 +
48462 +       printk("lock_harness:  can't unregister lock protocol %s\n",
48463 +              proto->lm_proto_name);
48464 +}
48465 +
48466 +/**
48467 + * lm_mount - Mount a lock protocol
48468 + * @proto_name - the name of the protocol
48469 + * @table_name - the name of the lock space
48470 + * @host_data - data specific to this host
48471 + * @cb - the callback to the code using the lock module
48472 + * @fsdata - data to pass back with the callback
48473 + * @min_lvb_size - the mininum LVB size that the caller can deal with
48474 + * @lockstruct - a structure returned describing the mount
48475 + *
48476 + * Returns: 0 on success, -EXXX on failure
48477 + */
48478 +
48479 +int
48480 +lm_mount(char *proto_name, char *table_name, char *host_data,
48481 +        lm_callback_t cb, lm_fsdata_t * fsdata,
48482 +        unsigned int min_lvb_size, struct lm_lockstruct *lockstruct)
48483 +{
48484 +       struct list_head *tmp;
48485 +       struct lmh_wrapper *lw = NULL;
48486 +       int try = 0;
48487 +       int error;
48488 +
48489 +      retry:
48490 +       down(&lmh_lock);
48491 +
48492 +       for (tmp = lmh_list.next; tmp != &lmh_list; tmp = tmp->next) {
48493 +               lw = list_entry(tmp, struct lmh_wrapper, lw_list);
48494 +
48495 +               if (strcmp(lw->lw_ops->lm_proto_name, proto_name) == 0)
48496 +                       break;
48497 +               else
48498 +                       lw = NULL;
48499 +       }
48500 +
48501 +       if (!lw) {
48502 +               if (!try && capable(CAP_SYS_MODULE)) {
48503 +                       try = 1;
48504 +                       up(&lmh_lock);
48505 +                       request_module(proto_name);
48506 +                       goto retry;
48507 +               }
48508 +               printk("lock_harness:  can't find protocol %s\n", proto_name);
48509 +               error = -ENOENT;
48510 +               goto out;
48511 +       }
48512 +
48513 +       if (!try_module_get(lw->lw_ops->lm_owner)) {
48514 +               try = 0;
48515 +               up(&lmh_lock);
48516 +               current->state = TASK_UNINTERRUPTIBLE;
48517 +               schedule_timeout(HZ);
48518 +               goto retry;
48519 +       }
48520 +
48521 +       error = lw->lw_ops->lm_mount(table_name, host_data,
48522 +                                    cb, fsdata, min_lvb_size, lockstruct);
48523 +       if (error)
48524 +               module_put(lw->lw_ops->lm_owner);
48525 +
48526 +      out:
48527 +       up(&lmh_lock);
48528 +
48529 +       return error;
48530 +}
48531 +
48532 +/**
48533 + * lm_unmount - unmount a lock module
48534 + * @lockstruct: the lockstruct passed into mount
48535 + *
48536 + */
48537 +
48538 +void
48539 +lm_unmount(struct lm_lockstruct *lockstruct)
48540 +{
48541 +       down(&lmh_lock);
48542 +       lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
48543 +       if (lockstruct->ls_ops->lm_owner)
48544 +               module_put(lockstruct->ls_ops->lm_owner);
48545 +       up(&lmh_lock);
48546 +}
48547 +
48548 +/**
48549 + * init_lmh - Initialize the lock module harness
48550 + *
48551 + * Returns: 0 on success, -EXXX on failure
48552 + */
48553 +
48554 +int __init
48555 +init_lmh(void)
48556 +{
48557 +       init_MUTEX(&lmh_lock);
48558 +       INIT_LIST_HEAD(&lmh_list);
48559 +
48560 +       printk("Lock_Harness %s (built %s %s) installed\n",
48561 +              RELEASE_NAME, __DATE__, __TIME__);
48562 +
48563 +       return 0;
48564 +}
48565 +
48566 +/**
48567 + * exit_lmh - cleanup the Lock Module Harness
48568 + *
48569 + * Returns: 0 on success, -EXXX on failure
48570 + */
48571 +
48572 +void __exit
48573 +exit_lmh(void)
48574 +{
48575 +}
48576 +
48577 +module_init(init_lmh);
48578 +module_exit(exit_lmh);
48579 +
48580 +MODULE_DESCRIPTION("GFS Lock Module Harness " RELEASE_NAME);
48581 +MODULE_AUTHOR("Red Hat, Inc.");
48582 +MODULE_LICENSE("GPL");
48583 +
48584 +EXPORT_SYMBOL_GPL(lm_register_proto);
48585 +EXPORT_SYMBOL_GPL(lm_unregister_proto);
48586 +EXPORT_SYMBOL_GPL(lm_mount);
48587 +EXPORT_SYMBOL_GPL(lm_unmount);
48588 diff -urN linux-orig/include/linux/lm_interface.h linux-patched/include/linux/lm_interface.h
48589 --- linux-orig/include/linux/lm_interface.h     1969-12-31 18:00:00.000000000 -0600
48590 +++ linux-patched/include/linux/lm_interface.h  2004-06-16 12:03:10.005672019 -0500
48591 @@ -0,0 +1,193 @@
48592 +/******************************************************************************
48593 +*******************************************************************************
48594 +**
48595 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
48596 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
48597 +**
48598 +**  This copyrighted material is made available to anyone wishing to use,
48599 +**  modify, copy, or redistribute it subject to the terms and conditions
48600 +**  of the GNU General Public License v.2.
48601 +**
48602 +*******************************************************************************
48603 +******************************************************************************/
48604 +
48605 +/*
48606 +
48607 +   Sooner or later, I need to put all the documentation back into this file.
48608 +   In the mean time, here are some notes.
48609 +
48610 +   -  The lock module is now responsible for STOMITHing the an expired
48611 +   client before calling the callback with type LM_CB_NEED_RECOVERY.
48612 +
48613 +   -  If mount() operation returns first == TRUE, GFS will check all the
48614 +   journals.  GFS itself can't/shouldn't stomith the machines, so the lock module
48615 +   needs to make sure that there are no zombie machines on any of the
48616 +   journals.  (i.e. this should probably be on the first mount of the lock
48617 +   space where all mounts by other machines are blocked.)  GFS will call
48618 +   others_may_mount() when the filesystem is in a consistent state.
48619 +
48620 +   -  GFS can issue multiple simultaneous get_lock()s for the same lockname.
48621 +   The lock module needs to deal with it, either by 1)  building a hash table
48622 +   to lookup the structures and keeping a reference count so there is only
48623 +   on lm_lock_t for a given lockname. or 2) just dealing with multiple
48624 +   lm_lock_t structures for a given lockname.
48625 +
48626 +*/
48627 +
48628 +#ifndef __LM_INTERFACE_DOT_H__
48629 +#define __LM_INTERFACE_DOT_H__
48630 +
48631 +typedef void lm_lockspace_t;
48632 +typedef void lm_lock_t;
48633 +typedef void lm_fsdata_t;
48634 +typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
48635 +                              void *data);
48636 +
48637 +/* Flags for the struct lm_lockstruct->ls_flags field */
48638 +
48639 +#define LM_LSFLAG_LOCAL        (0x00000001)
48640 +#define LM_LSFLAG_ASYNC        (0x00000002)
48641 +
48642 +/* Lock types */
48643 +
48644 +#define LM_TYPE_RESERVED       (0x00)
48645 +#define LM_TYPE_NONDISK        (0x01)
48646 +#define LM_TYPE_INODE          (0x02)
48647 +#define LM_TYPE_RGRP           (0x03)
48648 +#define LM_TYPE_META           (0x04)
48649 +#define LM_TYPE_IOPEN          (0x05)
48650 +#define LM_TYPE_FLOCK          (0x06)
48651 +#define LM_TYPE_PLOCK          (0x07)
48652 +#define LM_TYPE_QUOTA          (0x08)
48653 +
48654 +/* States passed to lock() */
48655 +
48656 +#define LM_ST_UNLOCKED         (0)
48657 +#define LM_ST_EXCLUSIVE        (1)
48658 +#define LM_ST_DEFERRED         (2)
48659 +#define LM_ST_SHARED           (3)
48660 +
48661 +/* Flags passed to lock() */
48662 +
48663 +#define LM_FLAG_TRY            (0x00000001)
48664 +#define LM_FLAG_TRY_1CB        (0x00000002)
48665 +#define LM_FLAG_NOEXP          (0x00000004)
48666 +#define LM_FLAG_ANY            (0x00000008)
48667 +#define LM_FLAG_PRIORITY       (0x00000010)
48668 +
48669 +/* Flags returned by lock() */
48670 +
48671 +#define LM_OUT_ST_MASK         (0x00000003)
48672 +#define LM_OUT_CACHEABLE       (0x00000004)
48673 +#define LM_OUT_CANCELED        (0x00000008)
48674 +#define LM_OUT_NEED_E          (0x00000010)
48675 +#define LM_OUT_NEED_D          (0x00000020)
48676 +#define LM_OUT_NEED_S          (0x00000040)
48677 +#define LM_OUT_ASYNC           (0x00000080)
48678 +#define LM_OUT_LVB_INVALID     (0x00000100)
48679 +
48680 +/* Callback types */
48681 +
48682 +#define LM_CB_NEED_E           (257)
48683 +#define LM_CB_NEED_D           (258)
48684 +#define LM_CB_NEED_S           (259)
48685 +#define LM_CB_NEED_RECOVERY    (260)
48686 +#define LM_CB_DROPLOCKS        (261)
48687 +#define LM_CB_ASYNC            (262)
48688 +
48689 +/* Reset_exp messages */
48690 +
48691 +#define LM_RD_GAVEUP           (308)
48692 +#define LM_RD_SUCCESS          (309)
48693 +
48694 +struct lm_lockname {
48695 +       uint64_t ln_number;
48696 +       unsigned int ln_type;
48697 +};
48698 +
48699 +#define lm_name_equal(name1, name2) \
48700 +(((name1)->ln_number == (name2)->ln_number) && \
48701 + ((name1)->ln_type == (name2)->ln_type)) \
48702 +
48703 +struct lm_async_cb {
48704 +       struct lm_lockname lc_name;
48705 +       int lc_ret;
48706 +};
48707 +
48708 +struct lm_lockstruct;
48709 +
48710 +struct lm_lockops {
48711 +       char lm_proto_name[256];
48712 +
48713 +       /* Mount/Unmount */
48714 +
48715 +       int (*lm_mount) (char *table_name, char *host_data,
48716 +                        lm_callback_t cb, lm_fsdata_t *fsdata,
48717 +                        unsigned int min_lvb_size,
48718 +                        struct lm_lockstruct *lockstruct);
48719 +       void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
48720 +       void (*lm_unmount) (lm_lockspace_t *lockspace);
48721 +
48722 +       /* Lock oriented operations */
48723 +
48724 +       int (*lm_get_lock) (lm_lockspace_t *lockspace,
48725 +                           struct lm_lockname *name, lm_lock_t **lockp);
48726 +       void (*lm_put_lock) (lm_lock_t *lock);
48727 +
48728 +       unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
48729 +                                unsigned int req_state, unsigned int flags);
48730 +       unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
48731 +
48732 +       void (*lm_cancel) (lm_lock_t *lock);
48733 +
48734 +       int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
48735 +       void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
48736 +       void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
48737 +
48738 +       /* Posix Lock oriented operations  */
48739 +
48740 +       int (*lm_plock_get) (lm_lockspace_t *lockspace,
48741 +                            struct lm_lockname *name, unsigned long owner,
48742 +                            uint64_t *start, uint64_t *end, int *exclusive,
48743 +                            unsigned long *rowner);
48744 +
48745 +       int (*lm_plock) (lm_lockspace_t *lockspace,
48746 +                        struct lm_lockname *name, unsigned long owner,
48747 +                        int wait, int exclusive, uint64_t start,
48748 +                        uint64_t end);
48749 +
48750 +       int (*lm_punlock) (lm_lockspace_t *lockspace,
48751 +                          struct lm_lockname *name, unsigned long owner,
48752 +                          uint64_t start, uint64_t end);
48753 +
48754 +       /* Client oriented operations */
48755 +
48756 +       void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
48757 +                                 unsigned int message);
48758 +
48759 +       struct module *lm_owner;
48760 +};
48761 +
48762 +struct lm_lockstruct {
48763 +       unsigned int ls_jid;
48764 +       unsigned int ls_first;
48765 +       unsigned int ls_lvb_size;
48766 +       lm_lockspace_t *ls_lockspace;
48767 +       struct lm_lockops *ls_ops;
48768 +       int ls_flags;
48769 +};
48770 +
48771 +/* Bottom interface */
48772 +
48773 +int lm_register_proto(struct lm_lockops *proto);
48774 +void lm_unregister_proto(struct lm_lockops *proto);
48775 +
48776 +/* Top interface */
48777 +
48778 +int lm_mount(char *proto_name,
48779 +            char *table_name, char *host_data,
48780 +            lm_callback_t cb, lm_fsdata_t *fsdata,
48781 +            unsigned int min_lvb_size, struct lm_lockstruct *lockstruct);
48782 +void lm_unmount(struct lm_lockstruct *lockstruct);
48783 +
48784 +#endif /* __LM_INTERFACE_DOT_H__ */
48785 diff -urN linux-orig/fs/gfs_locking/lock_nolock/main.c linux-patched/fs/gfs_locking/lock_nolock/main.c
48786 --- linux-orig/fs/gfs_locking/lock_nolock/main.c        1969-12-31 18:00:00.000000000 -0600
48787 +++ linux-patched/fs/gfs_locking/lock_nolock/main.c     2004-06-16 12:03:13.918762838 -0500
48788 @@ -0,0 +1,350 @@
48789 +/******************************************************************************
48790 +*******************************************************************************
48791 +**
48792 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
48793 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
48794 +**
48795 +**  This copyrighted material is made available to anyone wishing to use,
48796 +**  modify, copy, or redistribute it subject to the terms and conditions
48797 +**  of the GNU General Public License v.2.
48798 +**
48799 +*******************************************************************************
48800 +******************************************************************************/
48801 +
48802 +#include <linux/module.h>
48803 +#include <linux/slab.h>
48804 +#include <linux/module.h>
48805 +#include <linux/init.h>
48806 +#include <linux/types.h>
48807 +#include <linux/lm_interface.h>
48808 +
48809 +#define RELEASE_NAME "<CVS>"
48810 +
48811 +struct nolock_lockspace {
48812 +       unsigned int nl_lvb_size;
48813 +};
48814 +
48815 +struct lm_lockops nolock_ops;
48816 +
48817 +/**
48818 + * nolock_mount - mount a nolock lockspace
48819 + * @table_name: the name of the space to mount
48820 + * @host_data: host specific data
48821 + * @cb: the callback
48822 + * @lockstruct: the structure of crap to fill in
48823 + *
48824 + * Returns: 0 on success, -EXXX on failure
48825 + */
48826 +
48827 +static int
48828 +nolock_mount(char *table_name, char *host_data,
48829 +            lm_callback_t cb, lm_fsdata_t *fsdata,
48830 +            unsigned int min_lvb_size, struct lm_lockstruct *lockstruct)
48831 +{
48832 +       char *c;
48833 +       unsigned int jid;
48834 +       struct nolock_lockspace *nl;
48835 +
48836 +       /* If there is a "jid=" in the hostdata, return that jid.
48837 +          Otherwise, return zero. */
48838 +
48839 +       c = strstr(host_data, "jid=");
48840 +       if (!c)
48841 +               jid = 0;
48842 +       else {
48843 +               c += 4;
48844 +               sscanf(c, "%u", &jid);
48845 +       }
48846 +
48847 +       nl = kmalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
48848 +       if (!nl)
48849 +               return -ENOMEM;
48850 +
48851 +       memset(nl, 0, sizeof(struct nolock_lockspace));
48852 +       nl->nl_lvb_size = min_lvb_size;
48853 +
48854 +       lockstruct->ls_jid = jid;
48855 +       lockstruct->ls_first = 1;
48856 +       lockstruct->ls_lvb_size = min_lvb_size;
48857 +       lockstruct->ls_lockspace = (lm_lockspace_t *)nl;
48858 +       lockstruct->ls_ops = &nolock_ops;
48859 +       lockstruct->ls_flags = LM_LSFLAG_LOCAL | LM_LSFLAG_ASYNC;
48860 +
48861 +       return 0;
48862 +}
48863 +
48864 +/**
48865 + * nolock_others_may_mount - unmount a lock space
48866 + * @lockspace: the lockspace to unmount
48867 + *
48868 + */
48869 +
48870 +static void
48871 +nolock_others_may_mount(lm_lockspace_t *lockspace)
48872 +{
48873 +}
48874 +
48875 +/**
48876 + * nolock_unmount - unmount a lock space
48877 + * @lockspace: the lockspace to unmount
48878 + *
48879 + */
48880 +
48881 +static void
48882 +nolock_unmount(lm_lockspace_t *lockspace)
48883 +{
48884 +       struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace;
48885 +       kfree(nl);
48886 +}
48887 +
48888 +/**
48889 + * nolock_get_lock - get a lm_lock_t given a descripton of the lock
48890 + * @lockspace: the lockspace the lock lives in
48891 + * @name: the name of the lock
48892 + * @lockp: return the lm_lock_t here
48893 + *
48894 + * Returns: 0 on success, -EXXX on failure
48895 + */
48896 +
48897 +static int
48898 +nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
48899 +               lm_lock_t ** lockp)
48900 +{
48901 +       *lockp = (lm_lock_t *)lockspace;
48902 +       return 0;
48903 +}
48904 +
48905 +/**
48906 + * nolock_put_lock - get rid of a lock structure
48907 + * @lock: the lock to throw away
48908 + *
48909 + */
48910 +
48911 +static void
48912 +nolock_put_lock(lm_lock_t *lock)
48913 +{
48914 +}
48915 +
48916 +/**
48917 + * nolock_lock - acquire a lock
48918 + * @lock: the lock to manipulate
48919 + * @cur_state: the current state
48920 + * @req_state: the requested state
48921 + * @flags: modifier flags
48922 + *
48923 + * Returns: A bitmap of LM_OUT_*
48924 + */
48925 +
48926 +static unsigned int
48927 +nolock_lock(lm_lock_t *lock, unsigned int cur_state, unsigned int req_state,
48928 +           unsigned int flags)
48929 +{
48930 +       return req_state | LM_OUT_CACHEABLE;
48931 +}
48932 +
48933 +/**
48934 + * nolock_unlock - unlock a lock
48935 + * @lock: the lock to manipulate
48936 + * @cur_state: the current state
48937 + *
48938 + * Returns: 0
48939 + */
48940 +
48941 +static unsigned int
48942 +nolock_unlock(lm_lock_t *lock, unsigned int cur_state)
48943 +{
48944 +       return 0;
48945 +}
48946 +
48947 +/**
48948 + * nolock_cancel - cancel a request on a lock
48949 + * @lock: the lock to cancel request for
48950 + *
48951 + */
48952 +
48953 +static void
48954 +nolock_cancel(lm_lock_t *lock)
48955 +{
48956 +}
48957 +
48958 +/**
48959 + * nolock_hold_lvb - hold on to a lock value block
48960 + * @lock: the lock the LVB is associated with
48961 + * @lvbp: return the lm_lvb_t here
48962 + *
48963 + * Returns: 0 on success, -EXXX on failure
48964 + */
48965 +
48966 +static int
48967 +nolock_hold_lvb(lm_lock_t *lock, char **lvbp)
48968 +{
48969 +       struct nolock_lockspace *nl = (struct nolock_lockspace *)lock;
48970 +       int error = 0;
48971 +
48972 +       *lvbp = kmalloc(nl->nl_lvb_size, GFP_KERNEL);
48973 +       if (*lvbp)
48974 +               memset(*lvbp, 0, nl->nl_lvb_size);
48975 +       else
48976 +               error = -ENOMEM;
48977 +
48978 +       return error;
48979 +}
48980 +
48981 +/**
48982 + * nolock_unhold_lvb - release a LVB
48983 + * @lock: the lock the LVB is associated with
48984 + * @lvb: the lock value block
48985 + *
48986 + */
48987 +
48988 +static void
48989 +nolock_unhold_lvb(lm_lock_t *lock, char *lvb)
48990 +{
48991 +       kfree(lvb);
48992 +}
48993 +
48994 +/**
48995 + * nolock_sync_lvb - sync out the value of a lvb
48996 + * @lock: the lock the LVB is associated with
48997 + * @lvb: the lock value block
48998 + *
48999 + */
49000 +
49001 +static void
49002 +nolock_sync_lvb(lm_lock_t *lock, char *lvb)
49003 +{
49004 +}
49005 +
49006 +/**
49007 + * nolock_plock_get -
49008 + * @lockspace: the lockspace
49009 + * @name:
49010 + * @owner:
49011 + * @start:
49012 + * @end:
49013 + * @exclusive:
49014 + * @rowner:
49015 + *
49016 + */
49017 +
49018 +static int
49019 +nolock_plock_get(lm_lockspace_t *lockspace,
49020 +                struct lm_lockname *name, unsigned long owner,
49021 +                uint64_t *start, uint64_t *end, int *exclusive,
49022 +                unsigned long *rowner)
49023 +{
49024 +       return -ENOSYS;
49025 +}
49026 +
49027 +/**
49028 + * nolock_plock -
49029 + * @lockspace: the lockspace
49030 + * @name:
49031 + * @owner:
49032 + * @wait:
49033 + * @exclusive:
49034 + * @start:
49035 + * @end:
49036 + *
49037 + */
49038 +
49039 +static int
49040 +nolock_plock(lm_lockspace_t *lockspace,
49041 +            struct lm_lockname *name, unsigned long owner,
49042 +            int wait, int exclusive, uint64_t start,
49043 +            uint64_t end)
49044 +{
49045 +       return -ENOSYS;
49046 +}
49047 +
49048 +/**
49049 + * nolock_punlock -
49050 + * @lockspace: the lockspace
49051 + * @name:
49052 + * @owner:
49053 + * @start:
49054 + * @end:
49055 + *
49056 + */
49057 +
49058 +static int
49059 +nolock_punlock(lm_lockspace_t *lockspace,
49060 +              struct lm_lockname *name, unsigned long owner,
49061 +              uint64_t start, uint64_t end)
49062 +{
49063 +       return -ENOSYS;
49064 +}
49065 +
49066 +/**
49067 + * nolock_recovery_done - reset the expired locks for a given jid
49068 + * @lockspace: the lockspace
49069 + * @jid: the jid
49070 + *
49071 + */
49072 +
49073 +static void
49074 +nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
49075 +                    unsigned int message)
49076 +{
49077 +}
49078 +
49079 +struct lm_lockops nolock_ops = {
49080 +       .lm_proto_name = "lock_nolock",
49081 +       .lm_mount = nolock_mount,
49082 +       .lm_others_may_mount = nolock_others_may_mount,
49083 +       .lm_unmount = nolock_unmount,
49084 +       .lm_get_lock = nolock_get_lock,
49085 +       .lm_put_lock = nolock_put_lock,
49086 +       .lm_lock = nolock_lock,
49087 +       .lm_unlock = nolock_unlock,
49088 +       .lm_cancel = nolock_cancel,
49089 +       .lm_hold_lvb = nolock_hold_lvb,
49090 +       .lm_unhold_lvb = nolock_unhold_lvb,
49091 +       .lm_sync_lvb = nolock_sync_lvb,
49092 +       .lm_plock_get = nolock_plock_get,
49093 +       .lm_plock = nolock_plock,
49094 +       .lm_punlock = nolock_punlock,
49095 +       .lm_recovery_done = nolock_recovery_done,
49096 +       .lm_owner = THIS_MODULE,
49097 +};
49098 +
49099 +/**
49100 + * init_nolock - Initialize the nolock module
49101 + *
49102 + * Returns: 0 on success, -EXXX on failure
49103 + */
49104 +
49105 +int __init
49106 +init_nolock(void)
49107 +{
49108 +       int error;
49109 +
49110 +       error = lm_register_proto(&nolock_ops);
49111 +       if (error) {
49112 +               printk("lock_nolock: can't register protocol: %d\n", error);
49113 +               return error;
49114 +       }
49115 +
49116 +       printk("Lock_Nolock %s (built %s %s) installed\n",
49117 +              RELEASE_NAME, __DATE__, __TIME__);
49118 +
49119 +       return 0;
49120 +}
49121 +
49122 +/**
49123 + * exit_nolock - cleanup the nolock module
49124 + *
49125 + */
49126 +
49127 +void __exit
49128 +exit_nolock(void)
49129 +{
49130 +       lm_unregister_proto(&nolock_ops);
49131 +}
49132 +
49133 +module_init(init_nolock);
49134 +module_exit(exit_nolock);
49135 +
49136 +MODULE_DESCRIPTION("GFS Nolock Locking Module " RELEASE_NAME);
49137 +MODULE_AUTHOR("Red Hat, Inc.");
49138 +MODULE_LICENSE("GPL");