# Make the VFS call down into the FS on flock calls. diff -urN -p linux-2.6.7/fs/locks.c linux/fs/locks.c --- linux-2.6.7/fs/locks.c 2004-06-16 12:00:44.567463632 -0500 +++ linux/fs/locks.c 2004-06-16 12:01:58.844205936 -0500 @@ -1294,6 +1294,27 @@ out_unlock: return error; } +/* + * Wrapper function around the file_operations lock routine when called for + * flock(). The lock routine is called for both fcntl() and flock(), so + * the flock parameters must be translated to an equivalent fcntl()-like + * lock. + * + * Don't use locks_alloc_lock() (or flock_make_lock()) here, as + * this is just a temporary lock structure. We especially don't + * want to fail because we couldn't allocate a lock structure if + * this is an unlock operation. + */ +int flock_fs_file(struct file *filp, int type, int wait) +{ + struct file_lock fl = { .fl_flags = FL_FLOCK, + .fl_type = type }; + + return filp->f_op->lock(filp, + (wait) ? F_SETLKW : F_SETLK, + &fl); +} + /** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. @@ -1342,6 +1363,50 @@ asmlinkage long sys_flock(unsigned int f if (error) goto out_free; + /* + * Execute any filesystem-specific flock routines. The filesystem may + * maintain supplemental locks. This code allows the supplemental locks + * to be kept in sync with the vfs flock lock. If flock() is called on + * a lock already held for the given filp, the current flock lock is + * dropped before obtaining the requested lock. This unlock operation + * must be completed for the any filesystem specific locks and the vfs + * flock lock before proceeding with obtaining the requested lock. When + * the filesystem routine drops a lock for such a request, it must + * return -EDEADLK, allowing the vfs lock to be dropped, and the + * filesystem code is then re-executed to obtain the lock. + * + * A non-blocking request that returns EWOULDBLOCK also causes any vfs + * flock lock to be released, but then returns the error to the caller. + */ + if (filp->f_op && filp->f_op->lock) { + repeat: + error = flock_fs_file(filp, lock->fl_type, can_sleep); + if (error < 0) { + /* + * We may have dropped a lock. We need to + * finish unlocking before returning or + * continuing with lock acquisition. + */ + if (error != -ENOLCK) + flock_lock_file(filp, &(struct file_lock){.fl_type = F_UNLCK}); + + /* + * We already held the lock in some mode, and + * had to drop filesystem-specific locks before + * proceeding. We come back through this + * routine to unlock the vfs flock lock. Now go + * back and try again. Using EAGAIN as the + * error here would be better, but the one valid + * error value defined for flock(), EWOULDBLOCK, + * is defined as EAGAIN. + */ + if (error == -EDEADLK) + goto repeat; + + goto out_free; + } + } + for (;;) { error = flock_lock_file(filp, lock); if ((error != -EAGAIN) || !can_sleep) @@ -1354,6 +1419,13 @@ asmlinkage long sys_flock(unsigned int f break; } + /* + * If we failed to get the vfs flock, we need to clean up any + * filesystem-specific lock state that we previously obtained. + */ + if (error && filp->f_op && filp->f_op->lock) + flock_fs_file(filp, F_UNLCK, 1); + out_free: if (list_empty(&lock->fl_link)) { locks_free_lock(lock); @@ -1714,6 +1786,8 @@ void locks_remove_flock(struct file *fil if (fl->fl_file == filp) { if (IS_FLOCK(fl)) { locks_delete_lock(before); + if (filp->f_op && filp->f_op->lock) + flock_fs_file(filp, F_UNLCK, 1); continue; } if (IS_LEASE(fl)) { # Add lock harness to the build system. diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig --- linux-2.6.7/fs/Kconfig 2004-06-16 12:00:44.558465722 -0500 +++ linux/fs/Kconfig 2004-06-16 12:02:02.401379449 -0500 @@ -1669,6 +1669,14 @@ config AFS_FS config RXRPC tristate +config LOCK_HARNESS + tristate "GFS Lock Harness" + help + The module that connects GFS to the modules that provide + locking for GFS. + + If you want to use GFS (a cluster filesystem) say Y here. + endmenu menu "Partition Types" diff -urN -p linux-2.6.7/fs/Makefile linux/fs/Makefile --- linux-2.6.7/fs/Makefile 2004-06-16 12:00:44.558465722 -0500 +++ linux/fs/Makefile 2004-06-16 12:02:02.402379216 -0500 @@ -91,3 +91,4 @@ obj-$(CONFIG_JFS_FS) += jfs/ obj-$(CONFIG_XFS_FS) += xfs/ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ +obj-$(CONFIG_LOCK_HARNESS) += gfs_locking/ diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile --- linux-2.6.7/fs/gfs_locking/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:02.402379216 -0500 @@ -0,0 +1,14 @@ +############################################################################### +############################################################################### +## +## Copyright (C) 2004 Red Hat, Inc. All rights reserved. +## +## This copyrighted material is made available to anyone wishing to use, +## modify, copy, or redistribute it subject to the terms and conditions +## of the GNU General Public License v.2. +## +############################################################################### +############################################################################### + +obj-$(CONFIG_LOCK_HARNESS) += lock_harness/ + diff -urN -p linux-2.6.7/fs/gfs_locking/lock_harness/Makefile linux/fs/gfs_locking/lock_harness/Makefile --- linux-2.6.7/fs/gfs_locking/lock_harness/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux/fs/gfs_locking/lock_harness/Makefile 2004-06-16 12:02:02.402379216 -0500 @@ -0,0 +1,16 @@ +############################################################################### +############################################################################### +## +## Copyright (C) 2004 Red Hat, Inc. All rights reserved. +## +## This copyrighted material is made available to anyone wishing to use, +## modify, copy, or redistribute it subject to the terms and conditions +## of the GNU General Public License v.2. +## +############################################################################### +############################################################################### + +obj-$(CONFIG_LOCK_HARNESS) += lock_harness.o + +lock_harness-y := main.o + # Add GFS to the build system. diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig --- linux-2.6.7/fs/Kconfig 2004-06-25 13:57:24.435829621 -0500 +++ linux/fs/Kconfig 2004-06-25 13:59:16.786347614 -0500 @@ -316,13 +316,13 @@ config JFS_STATISTICS to be made available to the user in the /proc/fs/jfs/ directory. config FS_POSIX_ACL -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs) +# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/GFS) # # NOTE: you can implement Posix ACLs without these helpers (XFS does). # Never use this symbol for ifdefs. # bool - depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL + depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || GFS_FS default y config XFS_FS @@ -1677,6 +1677,20 @@ config LOCK_HARNESS If you want to use GFS (a cluster filesystem) say Y here. +config GFS_FS + tristate "GFS file system support" + depends on LOCK_HARNESS + help + A cluster filesystem. + + Allows a cluster of computers to simultaneously use a block device + that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads + and writes to the block device like a local filesystem, but also uses + a lock module to allow the computers coordinate their I/O so + filesystem consistency is maintained. One of the nifty features of + GFS is perfect consistency -- changes made to the filesystem on one + machine show up immediately on all other machines in the cluster. + endmenu menu "Partition Types" diff -urN -p linux-2.6.7/fs/Makefile linux/fs/Makefile --- linux-2.6.7/fs/Makefile 2004-06-25 13:57:24.436829391 -0500 +++ linux/fs/Makefile 2004-06-25 13:57:24.447826863 -0500 @@ -92,3 +92,4 @@ obj-$(CONFIG_XFS_FS) += xfs/ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_LOCK_HARNESS) += gfs_locking/ +obj-$(CONFIG_GFS_FS) += gfs/ diff -urN -p linux-2.6.7/fs/gfs/Makefile linux/fs/gfs/Makefile --- linux-2.6.7/fs/gfs/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux/fs/gfs/Makefile 2004-06-25 13:57:24.448826633 -0500 @@ -0,0 +1,51 @@ +############################################################################### +############################################################################### +## +## Copyright (C) 2004 Red Hat, Inc. All rights reserved. +## +## This copyrighted material is made available to anyone wishing to use, +## modify, copy, or redistribute it subject to the terms and conditions +## of the GNU General Public License v.2. +## +############################################################################### +############################################################################### + +obj-$(CONFIG_GFS_FS) += gfs.o + +gfs-y := acl.o \ + bits.o \ + bmap.o \ + daemon.o \ + dio.o \ + dir.o \ + eattr.o \ + file.o \ + flock.o \ + glock.o \ + glops.o \ + inode.o \ + ioctl.o \ + locking.o \ + log.o \ + lops.o \ + lvb.o \ + main.o \ + mount.o \ + ondisk.o \ + ops_address.o \ + ops_dentry.o \ + ops_export.o \ + ops_file.o \ + ops_fstype.o \ + ops_inode.o \ + ops_super.o \ + ops_vm.o \ + page.o \ + quota.o \ + recovery.o \ + rgrp.o \ + super.o \ + trans.o \ + unlinked.o \ + util.o + # Add lock_nolock to the build system. diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig --- linux-2.6.7/fs/Kconfig 2004-06-16 12:02:09.563715325 -0500 +++ linux/fs/Kconfig 2004-06-16 12:02:09.574712769 -0500 @@ -1691,6 +1691,12 @@ config GFS_FS GFS is perfect consistency -- changes made to the filesystem on one machine show up immediately on all other machines in the cluster. +config LOCK_NOLOCK + tristate "Lock Nolock" + depends on LOCK_HARNESS + help + A "fake" lock module that allows GFS to run as a local filesystem. + endmenu menu "Partition Types" diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:05.985546690 -0500 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:09.574712769 -0500 @@ -11,4 +11,5 @@ ############################################################################### obj-$(CONFIG_LOCK_HARNESS) += lock_harness/ +obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/ diff -urN -p linux-2.6.7/fs/gfs_locking/lock_nolock/Makefile linux/fs/gfs_locking/lock_nolock/Makefile --- linux-2.6.7/fs/gfs_locking/lock_nolock/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux/fs/gfs_locking/lock_nolock/Makefile 2004-06-16 12:02:09.575712537 -0500 @@ -0,0 +1,16 @@ +############################################################################### +############################################################################### +## +## Copyright (C) 2004 Red Hat, Inc. All rights reserved. +## +## This copyrighted material is made available to anyone wishing to use, +## modify, copy, or redistribute it subject to the terms and conditions +## of the GNU General Public License v.2. +## +############################################################################### +############################################################################### + +obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock.o + +lock_nolock-y := main.o + # Add lock_dlm to the build system. diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig --- linux-2.6.7/fs/Kconfig 2004-06-16 12:02:13.145883030 -0500 +++ linux/fs/Kconfig 2004-06-16 12:02:13.157880243 -0500 @@ -1697,6 +1697,12 @@ config LOCK_NOLOCK help A "fake" lock module that allows GFS to run as a local filesystem. +config LOCK_DLM + tristate "Lock DLM" + depends on LOCK_HARNESS + help + A lock module that allows GFS to use a Distributed Lock Manager. + endmenu menu "Partition Types" diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:13.146882798 -0500 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:13.157880243 -0500 @@ -12,4 +12,5 @@ obj-$(CONFIG_LOCK_HARNESS) += lock_harness/ obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/ +obj-$(CONFIG_LOCK_DLM) += lock_dlm/ diff -urN -p linux-2.6.7/fs/gfs_locking/lock_dlm/Makefile linux/fs/gfs_locking/lock_dlm/Makefile --- linux-2.6.7/fs/gfs_locking/lock_dlm/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux/fs/gfs_locking/lock_dlm/Makefile 2004-06-16 12:02:13.157880243 -0500 @@ -0,0 +1,16 @@ +############################################################################### +############################################################################### +## +## Copyright (C) 2004 Red Hat, Inc. All rights reserved. +## +## This copyrighted material is made available to anyone wishing to use, +## modify, copy, or redistribute it subject to the terms and conditions +## of the GNU General Public License v.2. +## +############################################################################### +############################################################################### + +obj-$(CONFIG_LOCK_DLM) += lock_dlm.o + +lock_dlm-y := main.o group.o lock.o mount.o thread.o plock.o + # Add lock_gulm to the build system. diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig --- linux-2.6.7/fs/Kconfig 2004-06-16 12:02:16.816030294 -0500 +++ linux/fs/Kconfig 2004-06-16 12:02:16.827027739 -0500 @@ -1703,6 +1703,12 @@ config LOCK_DLM help A lock module that allows GFS to use a Distributed Lock Manager. +config LOCK_GULM + tristate "Lock GULM" + depends on LOCK_HARNESS + help + A lock module that allows GFS to use a Failover Lock Manager. + endmenu menu "Partition Types" diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:16.817030062 -0500 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:16.828027507 -0500 @@ -13,4 +13,5 @@ obj-$(CONFIG_LOCK_HARNESS) += lock_harness/ obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/ obj-$(CONFIG_LOCK_DLM) += lock_dlm/ +obj-$(CONFIG_LOCK_GULM) += lock_gulm/ diff -urN -p linux-2.6.7/fs/gfs_locking/lock_gulm/Makefile linux/fs/gfs_locking/lock_gulm/Makefile --- linux-2.6.7/fs/gfs_locking/lock_gulm/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux/fs/gfs_locking/lock_gulm/Makefile 2004-06-16 12:02:16.828027507 -0500 @@ -0,0 +1,33 @@ +############################################################################### +############################################################################### +## +## Copyright (C) 2004 Red Hat, Inc. All rights reserved. +## +## This copyrighted material is made available to anyone wishing to use, +## modify, copy, or redistribute it subject to the terms and conditions +## of the GNU General Public License v.2. +## +############################################################################### +############################################################################### + +obj-$(CONFIG_LOCK_GULM) += lock_gulm.o + +lock_gulm-y := gulm_core.o \ + gulm_fs.o \ + gulm_jid.o \ + gulm_lt.o \ + gulm_procinfo.o \ + handler.o \ + lg_core.o \ + lg_lock.o \ + lg_main.o \ + linux_gulm_main.o \ + load_info.o \ + util.o \ + utils_crc.o \ + utils_tostr.o \ + utils_verb_flags.o \ + xdr_base.o \ + xdr_io.o \ + xdr_socket.o + diff -urN linux-orig/fs/gfs/acl.c linux-patched/fs/gfs/acl.c --- linux-orig/fs/gfs/acl.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/acl.c 2004-06-30 13:27:49.332713682 -0500 @@ -0,0 +1,397 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "acl.h" +#include "dio.h" +#include "eattr.h" +#include "glock.h" +#include "trans.h" +#include "inode.h" + +/* + * Check to make sure that the acl is actually valid + */ +int +gfs_validate_acl(struct gfs_inode *ip, const char *value, int size, int access) +{ + int err = 0; + struct posix_acl *acl = NULL; + struct gfs_sbd *sdp = ip->i_sbd; + + if ((current->fsuid != ip->i_di.di_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (ip->i_di.di_type == GFS_FILE_LNK) + return -EOPNOTSUPP; + if (!access && ip->i_di.di_type != GFS_FILE_DIR) + return -EACCES; + if (!sdp->sd_args.ar_posixacls) + return -EOPNOTSUPP; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + err = posix_acl_valid(acl); + posix_acl_release(acl); + } + } + return err; +} + +void +gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl) +{ + struct inode *inode; + mode_t mode; + + inode = gfs_iget(ip, NO_CREATE); + mode = inode->i_mode; + posix_acl_equiv_mode(acl, &mode); + inode->i_mode = mode; + iput(inode); + gfs_inode_attr_out(ip); +} + + +/** + * gfs_replace_acl - replace the value of the ea to the value of the acl + * + * NOTE: The new value must be the same size as the old one. + */ +int +gfs_replace_acl(struct inode *inode, struct posix_acl *acl, int access, + struct gfs_ea_location location) +{ + struct gfs_inode *ip = vn2ip(inode); + struct gfs_easet_io req; + int size; + void *data; + int error; + + size = posix_acl_to_xattr(acl, NULL, 0); + GFS_ASSERT(size == GFS_EA_DATA_LEN(location.ea), + printk("new acl size = %d, ea size = %u\n", size, + GFS_EA_DATA_LEN(location.ea));); + + data = gmalloc(size); + + posix_acl_to_xattr(acl, data, size); + + req.es_data = data; + req.es_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT; + req.es_data_len = size; + req.es_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN; + req.es_cmd = GFS_EACMD_REPLACE; + req.es_type = GFS_EATYPE_SYS; + + error = replace_ea(ip->i_sbd, ip, location.ea, &req); + if (!error) + gfs_trans_add_bh(ip->i_gl, location.bh); + + kfree(data); + + return error; +} + +/** + * gfs_findacl - returns the requested posix acl + * + * this function does not log the inode. It assumes that a lock is already + * held on it. + */ +int +gfs_findacl(struct gfs_inode *ip, int access, struct posix_acl **acl_ptr, + struct gfs_ea_location *location) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct posix_acl *acl; + uint32_t avail_size; + void *data; + int error; + + avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header); + *acl_ptr = NULL; + + if (!ip->i_di.di_eattr) + return 0; + + error = find_eattr(ip, + (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT, + (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN, + GFS_EATYPE_SYS, location); + if (error <= 0) + return error; + + data = gmalloc(GFS_EA_DATA_LEN(location->ea)); + + error = 0; + if (GFS_EA_IS_UNSTUFFED(location->ea)) + error = read_unstuffed(data, ip, sdp, location->ea, avail_size, + gfs_ea_memcpy); + else + gfs_ea_memcpy(data, GFS_EA_DATA(location->ea), + GFS_EA_DATA_LEN(location->ea)); + if (error) + goto out; + + acl = posix_acl_from_xattr(data, GFS_EA_DATA_LEN(location->ea)); + if (IS_ERR(acl)) + error = PTR_ERR(acl); + else + *acl_ptr = acl; + + out: + kfree(data); + if (error) + brelse(location->bh); + + return error; +} + +int +gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr) +{ + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_eaget_io req; + struct posix_acl *acl; + int size; + void *data; + int error = 0; + + *acl_ptr = NULL; + + if (!sdp->sd_args.ar_posixacls) + return 0; + + req.eg_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT; + req.eg_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN; + req.eg_type = GFS_EATYPE_SYS; + req.eg_len = NULL; + req.eg_data = NULL; + req.eg_data_len = 0; + + error = gfs_ea_read_permission(&req, ip); + if (error) + return error; + + if (!ip->i_di.di_eattr) + return error; + + size = get_ea(sdp, ip, &req, gfs_ea_memcpy); + if (size < 0) { + if (size != -ENODATA) + error = size; + return error; + } + + data = gmalloc(size); + + req.eg_data = data; + req.eg_data_len = size; + + size = get_ea(sdp, ip, &req, gfs_ea_memcpy); + if (size < 0) { + error = size; + goto out_free; + } + + acl = posix_acl_from_xattr(data, size); + if (IS_ERR(acl)) + error = PTR_ERR(acl); + else + *acl_ptr = acl; + + out_free: + kfree(data); + + return error; +} + +int +gfs_setup_new_acl(struct gfs_inode *dip, + unsigned int type, unsigned int *mode, + struct posix_acl **acl_ptr) +{ + struct gfs_ea_location location; + struct posix_acl *acl = NULL; + mode_t access_mode = *mode; + int error; + + if (type == GFS_FILE_LNK) + return 0; + + error = gfs_findacl(dip, FALSE, &acl, &location); + if (error) + return error; + if (!acl) { + (*mode) &= ~current->fs->umask; + return 0; + } + brelse(location.bh); + + if (type == GFS_FILE_DIR) { + *acl_ptr = acl; + return 0; + } + + error = posix_acl_create_masq(acl, &access_mode); + *mode = access_mode; + if (error > 0) { + *acl_ptr = acl; + return 0; + } + + posix_acl_release(acl); + + return error; +} + +/** + * gfs_init_default_acl - initializes the default acl + * + * NOTE: gfs_init_access_acl must be called first + */ +int +gfs_create_default_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data, + int size) +{ + struct gfs_easet_io req; + struct gfs_ea_location avail; + int error; + + memset(&avail, 0, sizeof(struct gfs_ea_location)); + + req.es_data = data; + req.es_name = GFS_POSIX_ACL_DEFAULT; + req.es_data_len = size; + req.es_name_len = GFS_POSIX_ACL_DEFAULT_LEN; + req.es_cmd = GFS_EACMD_CREATE; + req.es_type = GFS_EATYPE_SYS; + + error = find_sys_space(dip, ip, size, &avail); + if (error) + return error; + + avail.ea = prep_ea(avail.ea); + + error = write_ea(ip->i_sbd, dip, ip, avail.ea, &req); + if (!error) + gfs_trans_add_bh(ip->i_gl, avail.bh); /* Huh!?! */ + + brelse(avail.bh); + + return error; +} + +/** + * gfs_init_access_acl - initialized the access acl + * + * NOTE: This must be the first extended attribute that is created for + * this inode. + */ +int +gfs_init_access_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data, + int size) +{ + struct gfs_easet_io req; + + req.es_data = data; + req.es_name = GFS_POSIX_ACL_ACCESS; + req.es_data_len = size; + req.es_name_len = GFS_POSIX_ACL_ACCESS_LEN; + req.es_cmd = GFS_EACMD_CREATE; + req.es_type = GFS_EATYPE_SYS; + + return init_new_inode_eattr(dip, ip, &req); +} + +int +gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type, + struct posix_acl *acl) +{ + struct buffer_head *dibh; + void *data; + int size; + int error; + + size = posix_acl_to_xattr(acl, NULL, 0); + + data = gmalloc(size); + + posix_acl_to_xattr(acl, data, size); + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto out; + + error = gfs_init_access_acl(dip, ip, data, size); + if (error) + goto out_relse; + + if (type == GFS_FILE_DIR) { + error = gfs_create_default_acl(dip, ip, data, size); + if (error) + goto out_relse; + } + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + + out_relse: + brelse(dibh); + + out: + kfree(data); + posix_acl_release(acl); + + return error; +} + +int +gfs_acl_setattr(struct inode *inode) +{ + struct gfs_inode *ip = vn2ip(inode); + struct posix_acl *acl; + struct gfs_ea_location location; + int error; + + if (S_ISLNK(inode->i_mode)) + return 0; + + memset(&location, 0, sizeof(struct gfs_ea_location)); + + error = gfs_findacl(ip, TRUE, &acl, &location); /* Check error here? */ + if (!location.ea) + return error; + + error = posix_acl_chmod_masq(acl, inode->i_mode); + if (!error) + error = gfs_replace_acl(inode, acl, TRUE, location); + + posix_acl_release(acl); + brelse(location.bh); + + return error; +} diff -urN linux-orig/fs/gfs/acl.h linux-patched/fs/gfs/acl.h --- linux-orig/fs/gfs/acl.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/acl.h 2004-06-30 13:27:49.332713682 -0500 @@ -0,0 +1,28 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __ACL_DOT_H__ +#define __ACL_DOT_H__ + +int gfs_setup_new_acl(struct gfs_inode *dip, + unsigned int type, unsigned int *mode, + struct posix_acl **acl_ptr); +int gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr); +int gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type, + struct posix_acl *acl); +int gfs_acl_setattr(struct inode *inode); +int gfs_validate_acl(struct gfs_inode *ip, const char *value, int size, + int access); +void gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl); + +#endif /* __ACL_DOT_H__ */ diff -urN linux-orig/fs/gfs/bits.c linux-patched/fs/gfs/bits.c --- linux-orig/fs/gfs/bits.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/bits.c 2004-06-30 13:27:49.332713682 -0500 @@ -0,0 +1,183 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + * These routines are used by the resource group routines (rgrp.c) + * to keep track of block allocation. Each block is represented by two + * bits. One bit indicates whether or not the block is used. (1=used, + * 0=free) The other bit indicates whether or not the block contains a + * dinode or not. (1=dinode, 0=data block) So, each byte represents + * GFS_NBBY (i.e. 4) blocks. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bits.h" + +static const char valid_change[16] = { + /* current */ + + /* n */ 0, 1, 1, 1, + /* e */ 1, 0, 0, 0, + /* w */ 1, 0, 0, 1, + 0, 0, 1, 0 +}; + +/** + * gfs_setbit - Set a bit in the bitmaps + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @block: the block to set + * @new_state: the new state of the block + * + */ + +void +gfs_setbit(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t block, unsigned char new_state) +{ + unsigned char *byte, *end, cur_state; + unsigned int bit; + + byte = buffer + (block / GFS_NBBY); + bit = (block % GFS_NBBY) * GFS_BIT_SIZE; + end = buffer + buflen; + + GFS_ASSERT_RGRPD(byte < end, rgd,); + + cur_state = (*byte >> bit) & GFS_BIT_MASK; + GFS_ASSERT_RGRPD(valid_change[new_state * 4 + cur_state], rgd, + printk("cur_state = %u, new_state = %u\n", + cur_state, new_state);); + + *byte ^= cur_state << bit; + *byte |= new_state << bit; +} + +/** + * gfs_testbit - test a bit in the bitmaps + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @block: the block to read + * + */ + +unsigned char +gfs_testbit(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, uint32_t block) +{ + unsigned char *byte, *end, cur_state; + unsigned int bit; + + byte = buffer + (block / GFS_NBBY); + bit = (block % GFS_NBBY) * GFS_BIT_SIZE; + end = buffer + buflen; + + GFS_ASSERT_RGRPD(byte < end, rgd,); + + cur_state = (*byte >> bit) & GFS_BIT_MASK; + + return cur_state; +} + +/** + * gfs_bitfit - Find a free block in the bitmaps + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @goal: the block to try to allocate + * @old_state: the state of the block we're looking for + * + * Return: the block number that was allocated + */ + +uint32_t +gfs_bitfit(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t goal, unsigned char old_state) +{ + unsigned char *byte, *end, alloc; + uint32_t blk = goal; + unsigned int bit; + + byte = buffer + (goal / GFS_NBBY); + bit = (goal % GFS_NBBY) * GFS_BIT_SIZE; + end = buffer + buflen; + alloc = (old_state & 1) ? 0 : 0x55; + + while (byte < end) { + if ((*byte & 0x55) == alloc) { + blk += (8 - bit) >> 1; + + bit = 0; + byte++; + + continue; + } + + if (((*byte >> bit) & GFS_BIT_MASK) == old_state) + return blk; + + bit += GFS_BIT_SIZE; + if (bit >= 8) { + bit = 0; + byte++; + } + + blk++; + } + + return BFITNOENT; +} + +/** + * gfs_bitcount - count the number of bits in a certain state + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @state: the state of the block we're looking for + * + * Returns: The number of bits + */ + +uint32_t +gfs_bitcount(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + unsigned char state) +{ + unsigned char *byte = buffer; + unsigned char *end = buffer + buflen; + unsigned char state1 = state << 2; + unsigned char state2 = state << 4; + unsigned char state3 = state << 6; + uint32_t count = 0; + + for (; byte < end; byte++) { + if (((*byte) & 0x03) == state) + count++; + if (((*byte) & 0x0C) == state1) + count++; + if (((*byte) & 0x30) == state2) + count++; + if (((*byte) & 0xC0) == state3) + count++; + } + + return count; +} diff -urN linux-orig/fs/gfs/bits.h linux-patched/fs/gfs/bits.h --- linux-orig/fs/gfs/bits.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/bits.h 2004-06-30 13:27:49.332713682 -0500 @@ -0,0 +1,32 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __BITS_DOT_H__ +#define __BITS_DOT_H__ + +#define BFITNOENT (0xFFFFFFFF) + +void gfs_setbit(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t block, unsigned char new_state); +unsigned char gfs_testbit(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t block); +uint32_t gfs_bitfit(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t goal, unsigned char old_state); +uint32_t gfs_bitcount(struct gfs_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + unsigned char state); + +#endif /* __BITS_DOT_H__ */ diff -urN linux-orig/fs/gfs/bmap.c linux-patched/fs/gfs/bmap.c --- linux-orig/fs/gfs/bmap.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/bmap.c 2004-06-30 13:27:49.333713450 -0500 @@ -0,0 +1,1404 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "dio.h" +#include "glock.h" +#include "inode.h" +#include "ioctl.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +struct metapath { + unsigned int mp_list[GFS_MAX_META_HEIGHT]; +}; + +typedef int (*block_call_t) (struct gfs_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, + uint64_t *bottom, unsigned int height, + void *data); + +struct strip_mine { + int sm_first; + unsigned int sm_height; +}; + +/** + * gfs_unstuffer_sync - unstuff a dinode synchronously + * @ip: the inode + * @dibh: the dinode buffer + * @block: the block number that was allocated + * @private: not used + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_unstuffer_sync(struct gfs_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + int error; + + error = gfs_get_data_buffer(ip, block, TRUE, &bh); + if (error) + return error; + + gfs_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs_dinode)); + + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT); + + brelse(bh); + + return error; +} + +/** + * gfs_unstuffer_async - unstuff a dinode asynchronously + * @ip: the inode + * @dibh: the dinode buffer + * @block: the block number that was allocated + * @private: not used + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_unstuffer_async(struct gfs_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + int error; + + error = gfs_get_data_buffer(ip, block, TRUE, &bh); + if (error) + return error; + + gfs_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs_dinode)); + + error = gfs_dwrite(sdp, bh, DIO_DIRTY); + + brelse(bh); + + return error; +} + +/** + * gfs_unstuff_dinode - Unstuff a dinode when the data has grown too big + * @ip: The GFS inode to unstuff + * @unstuffer: the routine that handles unstuffing a non-zero length file + * @private: private data for the unstuffer + * + * This routine unstuffs a dinode and returns it to a "normal" state such + * that the height can be grown in the traditional way. + * + * Returns: 0 on success, -EXXXX on failure + */ + +int +gfs_unstuff_dinode(struct gfs_inode *ip, gfs_unstuffer_t unstuffer, + void *private) +{ + struct buffer_head *bh, *dibh; + uint64_t block = 0; + int journaled = gfs_is_jdata(ip); + int error; + + GFS_ASSERT_INODE(gfs_is_stuffed(ip), ip,); + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + return error; + + if (ip->i_di.di_size) { + /* Get a free block, fill it with the stuffed data, + and write it out to disk */ + + if (journaled) { + error = gfs_metaalloc(ip, &block); + if (error) + goto fail; + + error = gfs_get_data_buffer(ip, block, TRUE, &bh); + if (error) + goto fail; + + gfs_buffer_copy_tail(bh, sizeof(struct gfs_meta_header), + dibh, sizeof(struct gfs_dinode)); + + brelse(bh); + } else { + gfs_blkalloc(ip, &block); + + error = unstuffer(ip, dibh, block, private); + if (error) + goto fail; + } + } + + /* Set up the pointer to the new block */ + + gfs_trans_add_bh(ip->i_gl, dibh); + + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode)); + + if (ip->i_di.di_size) { + *(uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)) = cpu_to_gfs64(block); + ip->i_di.di_blocks++; + } + + ip->i_di.di_height = 1; + + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return 0; + + fail: + brelse(dibh); + + return error; +} + +/** + * calc_tree_height - Calculate the height of a metadata tree + * @ip: The GFS inode + * @size: The proposed size of the file + * + * Work out how tall a metadata tree needs to be in order to accommodate a + * file of a particular size. If size is less than the current size of + * the inode, then the current size of the inode is used instead of the + * supplied one. + * + * Returns: the height the tree should be + */ + +static unsigned int +calc_tree_height(struct gfs_inode *ip, uint64_t size) +{ + struct gfs_sbd *sdp = ip->i_sbd; + uint64_t *arr; + unsigned int max, height; + + if (ip->i_di.di_size > size) + size = ip->i_di.di_size; + + if (gfs_is_jdata(ip)) { + arr = sdp->sd_jheightsize; + max = sdp->sd_max_jheight; + } else { + arr = sdp->sd_heightsize; + max = sdp->sd_max_height; + } + + for (height = 0; height < max; height++) + if (arr[height] >= size) + break; + + return height; +} + +/** + * build_height - Build a metadata tree of the requested height + * @ip: The GFS inode + * @height: The height to build to + * + * This routine makes sure that the metadata tree is tall enough to hold + * "size" bytes of data. + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +build_height(struct gfs_inode *ip, int height) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh, *dibh; + uint64_t block, *bp; + unsigned int x; + int new_block; + int error; + + while (ip->i_di.di_height < height) { + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + return error; + + new_block = FALSE; + bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)); + for (x = 0; x < sdp->sd_diptrs; x++, bp++) + if (*bp) { + new_block = TRUE; + break; + } + + if (new_block) { + /* Get a new block, fill it with the old direct pointers, + and write it out */ + + error = gfs_metaalloc(ip, &block); + if (error) + goto fail; + + error = gfs_dread(sdp, block, ip->i_gl, + DIO_NEW | DIO_START | DIO_WAIT, &bh); + if (error) + goto fail; + + gfs_trans_add_bh(ip->i_gl, bh); + gfs_metatype_set(sdp, bh, GFS_METATYPE_IN, + GFS_FORMAT_IN); + memset(bh->b_data + sizeof(struct gfs_meta_header), + 0, + sizeof(struct gfs_indirect) - + sizeof(struct gfs_meta_header)); + gfs_buffer_copy_tail(bh, sizeof(struct gfs_indirect), + dibh, sizeof(struct gfs_dinode)); + + brelse(bh); + } + + /* Set up the new direct pointer and write it out to disk */ + + gfs_trans_add_bh(ip->i_gl, dibh); + + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode)); + + if (new_block) { + *(uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)) = cpu_to_gfs64(block); + ip->i_di.di_blocks++; + } + + ip->i_di.di_height++; + + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + return 0; + + fail: + brelse(dibh); + + return error; +} + +/** + * find_metapath - Find path through the metadata tree + * @ip: The inode pointer + * @mp: The metapath to return the result in + * @block: The disk block to look up + * + * This routine returns a struct metapath structure that defines a path through + * the metadata of inode "ip" to get to block "block". + * + * Example: + * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a + * filesystem with a blocksize of 4096. + * + * find_metapath() would return a struct metapath structure set to: + * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, + * and mp_list[2] = 165. + * + * That means that in order to get to the block containing the byte at + * offset 101342453, we would load the indirect block pointed to by pointer + * 0 in the dinode. We would then load the indirect block pointed to by + * pointer 48 in that indirect block. We would then load the data block + * pointed to by pointer 165 in that indirect block. + * + * ---------------------------------------- + * | Dinode | | + * | | 4| + * | |0 1 2 3 4 5 9| + * | | 6| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Indirect Block | + * | 5| + * | 4 4 4 4 4 5 5 1| + * |0 5 6 7 8 9 0 1 2| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Indirect Block | + * | 1 1 1 1 1 5| + * | 6 6 6 6 6 1| + * |0 3 4 5 6 7 2| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Data block containing offset | + * | 101342453 | + * | | + * | | + * ---------------------------------------- + * + */ + +static struct metapath * +find_metapath(struct gfs_inode *ip, uint64_t block) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct metapath *mp; + uint64_t b = block; + unsigned int i; + + mp = gmalloc(sizeof(struct metapath)); + memset(mp, 0, sizeof(struct metapath)); + + for (i = ip->i_di.di_height; i--;) + mp->mp_list[i] = do_div(b, sdp->sd_inptrs); + + return mp; +} + +/** + * metapointer - Return pointer to start of metadata in a buffer + * @bh: The buffer + * @height: The metadata height (0 = dinode) + * @mp: The metapath + * + * Return a pointer to the block number of the next height of the metadata + * tree given a buffer containing the pointer to the current height of the + * metadata tree. + */ + +static __inline__ uint64_t * +metapointer(struct buffer_head *bh, unsigned int height, struct metapath *mp) +{ + unsigned int head_size = (height > 0) ? + sizeof(struct gfs_indirect) : sizeof(struct gfs_dinode); + + return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height]; +} + +/** + * get_metablock - Get the next metadata block in metadata tree + * @ip: The GFS inode + * @bh: Buffer containing the pointers to metadata blocks + * @height: The height of the tree (0 = dinode) + * @mp: The metapath + * @create: Non-zero if we may create a new meatdata block + * @new: Used to indicate if we did create a new metadata block + * @block: the returned disk block number + * + * Given a metatree, complete to a particular height, checks to see if the next + * height of the tree exists. If not the next height of the tree is created. + * The block number of the next height of the metadata tree is returned. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +get_metablock(struct gfs_inode *ip, + struct buffer_head *bh, unsigned int height, struct metapath *mp, + int create, int *new, uint64_t *block) +{ + uint64_t *ptr = metapointer(bh, height, mp); + int error; + + if (*ptr) { + *block = gfs64_to_cpu(*ptr); + return 0; + } + + *block = 0; + + if (!create) + return 0; + + error = gfs_metaalloc(ip, block); + if (error) + return error; + + gfs_trans_add_bh(ip->i_gl, bh); + + *ptr = cpu_to_gfs64(*block); + ip->i_di.di_blocks++; + + *new = 1; + + return 0; +} + +/** + * get_datablock - Get datablock number from metadata block + * @ip: The GFS inode + * @bh: The buffer containing pointers to datablocks + * @mp: The metapath + * @create: Non-zero if we may create a new data block + * @new: Used to indicate if we created a new data block + * @block: the returned disk block number + * + * Given a fully built metadata tree, checks to see if a particular data + * block exists. It is created if it does not exist and the block number + * on disk is returned. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +get_datablock(struct gfs_inode *ip, + struct buffer_head *bh, struct metapath *mp, + int create, int *new, uint64_t *block) +{ + uint64_t *ptr = metapointer(bh, ip->i_di.di_height - 1, mp); + + if (*ptr) { + *block = gfs64_to_cpu(*ptr); + return 0; + } + + *block = 0; + + if (!create) + return 0; + + if (gfs_is_jdata(ip)) { + int error; + error = gfs_metaalloc(ip, block); + if (error) + return error; + } else + gfs_blkalloc(ip, block); + + gfs_trans_add_bh(ip->i_gl, bh); + + *ptr = cpu_to_gfs64(*block); + ip->i_di.di_blocks++; + + *new = 1; + + return 0; +} + +/** + * gfs_block_map - Map a block from an inode to a disk block + * @ip: The GFS inode + * @lblock: The logical block number + * @new: Value/Result argument (1 = may create/did create new blocks) + * @dblock: the disk block number of the start of an extent + * @extlen: the size of the extent + * + * Find the block number on the current device which corresponds to an + * inode's block. If the block had to be created, "new" will be set. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_block_map(struct gfs_inode *ip, + uint64_t lblock, int *new, + uint64_t *dblock, uint32_t *extlen) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + struct metapath *mp; + int create = *new; + unsigned int bsize; + unsigned int height; + unsigned int end_of_metadata; + unsigned int x; + int error; + + *new = 0; + *dblock = 0; + if (extlen) + *extlen = 0; + + if (gfs_is_stuffed(ip)) { + if (!lblock) { + *dblock = ip->i_num.no_addr; + if (extlen) + *extlen = 1; + } + return 0; + } + + bsize = (gfs_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; + + height = calc_tree_height(ip, (lblock + 1) * bsize); + if (ip->i_di.di_height < height) { + if (!create) + return 0; + + error = build_height(ip, height); + if (error) + return error; + } + + mp = find_metapath(ip, lblock); + end_of_metadata = ip->i_di.di_height - 1; + + error = gfs_get_inode_buffer(ip, &bh); + if (error) + goto out; + + for (x = 0; x < end_of_metadata; x++) { + error = get_metablock(ip, bh, x, mp, create, new, dblock); + brelse(bh); + if (error || !*dblock) + goto out; + + error = gfs_get_meta_buffer(ip, x + 1, *dblock, *new, &bh); + if (error) + goto out; + } + + error = get_datablock(ip, bh, mp, create, new, dblock); + if (error) { + brelse(bh); + goto out; + } + + if (extlen && *dblock) { + *extlen = 1; + + if (!*new) { + uint64_t tmp_dblock; + int tmp_new; + unsigned int nptrs; + + nptrs = (end_of_metadata) ? sdp->sd_inptrs : sdp->sd_diptrs; + + while (++mp->mp_list[end_of_metadata] < nptrs) { + get_datablock(ip, bh, mp, + FALSE, &tmp_new, + &tmp_dblock); + + if (*dblock + *extlen != tmp_dblock) + break; + + (*extlen)++; + } + } + } + + brelse(bh); + + if (*new) { + error = gfs_get_inode_buffer(ip, &bh); + if (!error) { + gfs_trans_add_bh(ip->i_gl, bh); + gfs_dinode_out(&ip->i_di, bh->b_data); + brelse(bh); + } + } + + out: + kfree(mp); + + return error; +} + +/** + * do_grow - Make a file look bigger than it is + * @ip: the inode + * @size: the size to set the file to + * + * Called with an exclusive lock on @ip. + * + * Returns: 0 on succes, -EXXX on failure + */ + +static int +do_grow(struct gfs_inode *ip, uint64_t size) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al; + struct buffer_head *dibh; + unsigned int h; + int journaled = gfs_is_jdata(ip); + int error; + + al = gfs_alloc_get(ip); + + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + if (journaled) + al->al_requested_meta = sdp->sd_max_height + 1; + else { + al->al_requested_meta = sdp->sd_max_height; + al->al_requested_data = 1; + } + + error = gfs_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + /* Trans may require: + Full extention of the metadata tree, block allocation, + a dinode modification, and a quota change */ + + error = gfs_trans_begin(sdp, + sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + + 1 + !!journaled, + 1); + if (error) + goto fail_ipres; + + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) { + if (gfs_is_stuffed(ip)) { + error = gfs_unstuff_dinode(ip, gfs_unstuffer_sync, NULL); + if (error) + goto fail_end_trans; + } + + h = calc_tree_height(ip, size); + if (ip->i_di.di_height < h) { + error = build_height(ip, h); + if (error) + goto fail_end_trans; + } + } + + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs_trans_end(sdp); + + gfs_inplace_release(ip); + gfs_quota_unlock_m(ip); + gfs_alloc_put(ip); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_ipres: + gfs_inplace_release(ip); + + fail_gunlock_q: + gfs_quota_unlock_m(ip); + + fail: + gfs_alloc_put(ip); + + return error; +} + +/** + * recursive_scan - recursively scan through the end of a file + * @ip: the inode + * @dibh: the dinode buffer + * @mp: the path through the metadata to the point to start + * @height: the height the recursion is at + * @block: the indirect block to look at + * @first: TRUE if this is the first block + * @bc: the call to make for each piece of metadata + * @data: data opaque to this function to pass to @bc + * + * When this is first called @height and @block should be zero and + * @first should be TRUE. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +recursive_scan(struct gfs_inode *ip, struct buffer_head *dibh, + struct metapath *mp, unsigned int height, uint64_t block, + int first, block_call_t bc, void *data) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh = NULL; + uint64_t *top, *bottom; + uint64_t bn; + int error; + + if (!height) { + error = gfs_get_inode_buffer(ip, &bh); + if (error) + goto fail; + dibh = bh; + + top = (uint64_t *)(bh->b_data + sizeof(struct gfs_dinode)) + + mp->mp_list[0]; + bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs_dinode)) + + sdp->sd_diptrs; + } else { + error = gfs_get_meta_buffer(ip, height, block, FALSE, &bh); + if (error) + goto fail; + + top = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect)) + + ((first) ? mp->mp_list[height] : 0); + bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect)) + + sdp->sd_inptrs; + } + + error = bc(ip, dibh, bh, top, bottom, height, data); + if (error) + goto fail; + + if (height < ip->i_di.di_height - 1) + for (; top < bottom; top++, first = FALSE) { + if (!*top) + continue; + + bn = gfs64_to_cpu(*top); + + error = recursive_scan(ip, dibh, mp, + height + 1, bn, first, + bc, data); + if (error) + goto fail; + } + + brelse(bh); + + return 0; + + fail: + if (bh) + brelse(bh); + + return error; +} + +/** + * do_strip - Look for a layer a particular layer of the file and strip it off + * @ip: the inode + * @dibh: the dinode buffer + * @bh: A buffer of pointers + * @top: The first pointer in the buffer + * @bottom: One more than the last pointer + * @height: the height this buffer is at + * @data: a pointer to a struct strip_mine + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +do_strip(struct gfs_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, uint64_t *bottom, + unsigned int height, void *data) +{ + struct strip_mine *sm = (struct strip_mine *)data; + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_holder ri_gh; + struct gfs_rgrp_list rlist; + uint64_t bn, bstart; + uint32_t blen; + uint64_t *p; + unsigned int rg_blocks = 0; + int metadata; + int x; + int error; + + if (!*top) + sm->sm_first = FALSE; + + if (height != sm->sm_height) + return 0; + + if (sm->sm_first) { + top++; + sm->sm_first = FALSE; + } + + metadata = (height != ip->i_di.di_height - 1) || gfs_is_jdata(ip); + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + memset(&rlist, 0, sizeof(struct gfs_rgrp_list)); + bstart = 0; + blen = 0; + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + + bn = gfs64_to_cpu(*p); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) + gfs_rlist_add(sdp, &rlist, bstart); + + bstart = bn; + blen = 1; + } + } + + if (bstart) + gfs_rlist_add(sdp, &rlist, bstart); + else + goto out; /* Nothing to do */ + + gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); + + error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); + if (error) + goto fail; + + for (x = 0; x < rlist.rl_rgrps; x++) { + struct gfs_rgrpd *rgd; + rgd = gl2rgd(rlist.rl_ghs[x].gh_gl); + rg_blocks += rgd->rd_ri.ri_length; + } + + /* Trans may require: + All the bitmaps that were reserved. + One block for the dinode. + One block for the indirect block being cleared. + One block for a quota change. */ + + error = gfs_trans_begin(sdp, rg_blocks + 2, 1); + if (error) + goto fail_rg_gunlock; + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_trans_add_bh(ip->i_gl, bh); + + bstart = 0; + blen = 0; + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + + bn = gfs64_to_cpu(*p); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) { + if (metadata) + gfs_metafree(ip, bstart, blen); + else + gfs_blkfree(ip, bstart, blen); + } + + bstart = bn; + blen = 1; + } + + *p = 0; + ip->i_di.di_blocks--; + } + + if (bstart) { + if (metadata) + gfs_metafree(ip, bstart, blen); + else + gfs_blkfree(ip, bstart, blen); + } + + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs_dinode_out(&ip->i_di, dibh->b_data); + + gfs_trans_end(sdp); + + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + gfs_rlist_free(&rlist); + + out: + gfs_glock_dq_uninit(&ri_gh); + + return 0; + + fail_rg_gunlock: + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + + fail: + gfs_rlist_free(&rlist); + + gfs_glock_dq_uninit(&ri_gh); + + return error; +} + +/** + * gfs_truncator_default - truncate a partial data block + * @ip: the inode + * @size: the size the file should be + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_truncator_default(struct gfs_inode *ip, uint64_t size) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + uint64_t bn; + int not_new = 0; + int error; + + error = gfs_block_map(ip, size >> sdp->sd_sb.sb_bsize_shift, ¬_new, + &bn, NULL); + if (error) + return error; + if (!bn) + return 0; + + error = gfs_get_data_buffer(ip, bn, FALSE, &bh); + if (error) + return error; + + gfs_buffer_clear_tail(bh, size & (sdp->sd_sb.sb_bsize - 1)); + + error = gfs_dwrite(sdp, bh, DIO_DIRTY); + + brelse(bh); + + return error; +} + +/** + * truncator_journaled - truncate a partial data block + * @ip: the inode + * @size: the size the file should be + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +truncator_journaled(struct gfs_inode *ip, uint64_t size) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + uint64_t lbn, dbn; + uint32_t off; + int not_new = 0; + int error; + + lbn = size; + off = do_div(lbn, sdp->sd_jbsize); + + error = gfs_block_map(ip, lbn, ¬_new, &dbn, NULL); + if (error) + return error; + if (!dbn) + return 0; + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + return error; + + error = gfs_get_data_buffer(ip, dbn, FALSE, &bh); + if (!error) { + gfs_trans_add_bh(ip->i_gl, bh); + gfs_buffer_clear_tail(bh, + sizeof(struct gfs_meta_header) + + off); + brelse(bh); + } + + gfs_trans_end(sdp); + + return error; +} + +/** + * gfs_shrink - make a file smaller + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block + * + * Called with an exclusive lock on @ip. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_shrink(struct gfs_inode *ip, uint64_t size, gfs_truncator_t truncator) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_holder ri_gh; + struct gfs_rgrpd *rgd; + struct buffer_head *dibh; + uint64_t block; + unsigned int height; + int journaled = gfs_is_jdata(ip); + int error; + + if (!size) + block = 0; + else if (journaled) { + block = size - 1; + do_div(block, sdp->sd_jbsize); + } + else + block = (size - 1) >> sdp->sd_sb.sb_bsize_shift; + + /* Get rid of all the data/metadata blocks */ + + height = ip->i_di.di_height; + if (height) { + struct metapath *mp = find_metapath(ip, block); + gfs_alloc_get(ip); + + error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) { + gfs_alloc_put(ip); + kfree(mp); + return error; + } + + while (height--) { + struct strip_mine sm; + + sm.sm_first = (size) ? TRUE : FALSE; + sm.sm_height = height; + + error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, + do_strip, &sm); + if (error) { + gfs_quota_unhold_m(ip); + gfs_alloc_put(ip); + kfree(mp); + return error; + } + } + + gfs_quota_unhold_m(ip); + gfs_alloc_put(ip); + kfree(mp); + } + + /* If we truncated in the middle of a block, zero out the leftovers. */ + + if (gfs_is_stuffed(ip)) { + /* Do nothing */ + } else if (journaled) { + if (do_mod(size, sdp->sd_jbsize)) { + error = truncator_journaled(ip, size); + if (error) + return error; + } + } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) { + error = truncator(ip, size); + if (error) + return error; + } + + /* Set the new size (and possibly the height) */ + + if (!size) { + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + return error; + } + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + goto out; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + if (!size) { + ip->i_di.di_height = 0; + + rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr); + GFS_ASSERT_INODE(rgd, ip,); + + ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr; + ip->i_di.di_goal_dblk = + ip->i_di.di_goal_mblk = + ip->i_num.no_addr - rgd->rd_ri.ri_data1; + } + + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(ip->i_gl, dibh); + + if (!ip->i_di.di_height && + size < sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode) + size); + + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + out_end_trans: + gfs_trans_end(sdp); + + out: + if (!size) + gfs_glock_dq_uninit(&ri_gh); + + return error; +} + +/** + * do_same - truncate to same size (update time stamps) + * @ip: + * + * Returns: errno + */ + +static int +do_same(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + int error; + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + return error; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto out; + + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + + brelse(dibh); + + out: + gfs_trans_end(sdp); + + return error; +} + +/** + * gfs_truncatei - make a file a give size + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block + * + * The file size can grow, shrink, or stay the same size. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_truncatei(struct gfs_inode *ip, uint64_t size, + gfs_truncator_t truncator) +{ + GFS_ASSERT_INODE(ip->i_di.di_type == GFS_FILE_REG, ip,); + + if (size == ip->i_di.di_size) + return do_same(ip); + else if (size > ip->i_di.di_size) + return do_grow(ip, size); + else + return gfs_shrink(ip, size, truncator); +} + +/** + * gfs_write_calc_reserv - calculate the number of blocks needed to write to a file + * @ip: the file + * @len: the number of bytes to be written to the file + * @data_blocks: returns the number of data blocks required + * @ind_blocks: returns the number of indirect blocks required + * + */ + +void +gfs_write_calc_reserv(struct gfs_inode *ip, unsigned int len, + unsigned int *data_blocks, unsigned int *ind_blocks) +{ + struct gfs_sbd *sdp = ip->i_sbd; + unsigned int tmp; + + if (gfs_is_jdata(ip)) { + *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; + *ind_blocks = 3 * (sdp->sd_max_jheight - 1); + } else { + *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3; + *ind_blocks = 3 * (sdp->sd_max_height - 1); + } + + for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) { + tmp = DIV_RU(tmp, sdp->sd_inptrs); + *ind_blocks += tmp; + } +} + +/** + * gfs_write_alloc_required - figure out if a write is going to require an allocation + * @ip: the file being written to + * @offset: the offset to write to + * @len: the number of bytes being written + * @alloc_required: the int is set to TRUE if an alloc is required, FALSE otherwise + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_write_alloc_required(struct gfs_inode *ip, + uint64_t offset, unsigned int len, + int *alloc_required) +{ + struct gfs_sbd *sdp = ip->i_sbd; + uint64_t lblock, lblock_stop, dblock; + uint32_t extlen; + int not_new = FALSE; + int error = 0; + + *alloc_required = FALSE; + + if (!len) + return 0; + + if (gfs_is_stuffed(ip)) { + if (offset + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) + *alloc_required = TRUE; + return 0; + } + + if (gfs_is_jdata(ip)) { + unsigned int bsize = sdp->sd_jbsize; + lblock = offset; + do_div(lblock, bsize); + lblock_stop = offset + len + bsize - 1; + do_div(lblock_stop, bsize); + } else { + unsigned int shift = sdp->sd_sb.sb_bsize_shift; + lblock = offset >> shift; + lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; + } + + for (; lblock < lblock_stop; lblock += extlen) { + error = gfs_block_map(ip, lblock, ¬_new, &dblock, &extlen); + if (error) + return error; + + if (!dblock) { + *alloc_required = TRUE; + return 0; + } + } + + return 0; +} + +/** + * do_gfm - Copy out the dinode/indirect blocks of a file + * @ip: the file + * @dibh: the dinode buffer + * @bh: the indirect buffer we're looking at + * @top: the first pointer in the block + * @bottom: one more than the last pointer in the block + * @height: the height the block is at + * @data: a pointer to a struct gfs_user_buffer structure + * + * If this is a journaled file, copy out the data too. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +do_gfm(struct gfs_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, uint64_t *bottom, + unsigned int height, void *data) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data; + struct buffer_head *data_bh; + uint64_t *bp, bn; + int error; + + error = gfs_add_bh_to_ub(ub, bh); + if (error) + return error; + + if (ip->i_di.di_type != GFS_FILE_DIR || + height + 1 != ip->i_di.di_height) + return 0; + + for (bp = top; bp < bottom; bp++) + if (*bp) { + bn = gfs64_to_cpu(*bp); + + error = gfs_dread(sdp, bn, ip->i_gl, + DIO_START | DIO_WAIT, &data_bh); + if (error) + return error; + + error = gfs_add_bh_to_ub(ub, data_bh); + + brelse(data_bh); + + if (error) + return error; + } + + return 0; +} + +/** + * gfs_get_file_meta - return all the metadata for a file + * @ip: the file + * @ub: the structure representing the meta + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub) +{ + struct buffer_head *dibh; + struct metapath *mp; + int error; + + if (gfs_is_stuffed(ip)) { + error = gfs_get_inode_buffer(ip, &dibh); + if (!error) { + error = gfs_add_bh_to_ub(ub, dibh); + brelse(dibh); + } + } else { + mp = find_metapath(ip, 0); + error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_gfm, ub); + kfree(mp); + } + + return error; +} diff -urN linux-orig/fs/gfs/bmap.h linux-patched/fs/gfs/bmap.h --- linux-orig/fs/gfs/bmap.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/bmap.h 2004-06-30 13:27:49.333713450 -0500 @@ -0,0 +1,48 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __BMAP_DOT_H__ +#define __BMAP_DOT_H__ + +typedef int (*gfs_unstuffer_t) (struct gfs_inode * ip, + struct buffer_head * dibh, uint64_t block, + void *private); + +int gfs_unstuffer_sync(struct gfs_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); +int gfs_unstuffer_async(struct gfs_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); + +int gfs_unstuff_dinode(struct gfs_inode *ip, gfs_unstuffer_t unstuffer, + void *private); + +int gfs_block_map(struct gfs_inode *ip, + uint64_t lblock, int *new, + uint64_t *dblock, uint32_t *extlen); + +typedef int (*gfs_truncator_t) (struct gfs_inode * ip, uint64_t size); + +int gfs_truncator_default(struct gfs_inode *ip, uint64_t size); + +int gfs_shrink(struct gfs_inode *ip, uint64_t size, gfs_truncator_t truncator); +int gfs_truncatei(struct gfs_inode *ip, uint64_t size, + gfs_truncator_t truncator); + +void gfs_write_calc_reserv(struct gfs_inode *ip, unsigned int len, + unsigned int *data_blocks, unsigned int *ind_blocks); +int gfs_write_alloc_required(struct gfs_inode *ip, uint64_t offset, + unsigned int len, int *alloc_required); + +int gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub); + +#endif /* __BMAP_DOT_H__ */ diff -urN linux-orig/fs/gfs/daemon.c linux-patched/fs/gfs/daemon.c --- linux-orig/fs/gfs/daemon.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/daemon.c 2004-06-30 13:27:49.333713450 -0500 @@ -0,0 +1,259 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "daemon.h" +#include "glock.h" +#include "log.h" +#include "quota.h" +#include "recovery.h" +#include "super.h" +#include "unlinked.h" + +/** + * gfs_scand - Writing of cached scan chanes into the scan file + * @sdp: Pointer to GFS superblock + * + */ + +int +gfs_scand(void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)data; + + daemonize("gfs_scand"); + sdp->sd_scand_process = current; + set_bit(SDF_SCAND_RUN, &sdp->sd_flags); + complete(&sdp->sd_thread_completion); + + for (;;) { + gfs_scand_internal(sdp); + + if (!test_bit(SDF_SCAND_RUN, &sdp->sd_flags)) + break; + + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(sdp->sd_tune.gt_scand_secs * HZ); + } + + down(&sdp->sd_thread_lock); + up(&sdp->sd_thread_lock); + + complete(&sdp->sd_thread_completion); + + return 0; +} + +/** + * gfs_glockd - Writing of cached scan chanes into the scan file + * @sdp: Pointer to GFS superblock + * + */ + +int +gfs_glockd(void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)data; + + daemonize("gfs_glockd"); + set_bit(SDF_GLOCKD_RUN, &sdp->sd_flags); + complete(&sdp->sd_thread_completion); + + for (;;) { + while (atomic_read(&sdp->sd_reclaim_count)) + gfs_reclaim_glock(sdp); + + if (!test_bit(SDF_GLOCKD_RUN, &sdp->sd_flags)) + break; + + { + DECLARE_WAITQUEUE(__wait_chan, current); + current->state = TASK_INTERRUPTIBLE; + add_wait_queue(&sdp->sd_reclaim_wchan, &__wait_chan); + if (!atomic_read(&sdp->sd_reclaim_count) + && test_bit(SDF_GLOCKD_RUN, &sdp->sd_flags)) + schedule(); + remove_wait_queue(&sdp->sd_reclaim_wchan, &__wait_chan); + current->state = TASK_RUNNING; + } + } + + complete(&sdp->sd_thread_completion); + + return 0; +} + +/** + * gfs_recoverd - Recovery of dead machine's journals + * @sdp: Pointer to GFS superblock + * + */ + +int +gfs_recoverd(void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)data; + + daemonize("gfs_recoverd"); + sdp->sd_recoverd_process = current; + set_bit(SDF_RECOVERD_RUN, &sdp->sd_flags); + complete(&sdp->sd_thread_completion); + + for (;;) { + gfs_check_journals(sdp); + + if (!test_bit(SDF_RECOVERD_RUN, &sdp->sd_flags)) + break; + + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(sdp->sd_tune.gt_recoverd_secs * HZ); + } + + down(&sdp->sd_thread_lock); + up(&sdp->sd_thread_lock); + + complete(&sdp->sd_thread_completion); + + return 0; +} + +/** + * gfs_logd - Writing of cached log chanes into the log file + * @sdp: Pointer to GFS superblock + * + */ + +int +gfs_logd(void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)data; + struct gfs_holder ji_gh; + + daemonize("gfs_logd"); + sdp->sd_logd_process = current; + set_bit(SDF_LOGD_RUN, &sdp->sd_flags); + complete(&sdp->sd_thread_completion); + + for (;;) { + gfs_ail_empty(sdp); + + if (time_after_eq(jiffies, + sdp->sd_jindex_refresh_time + + sdp->sd_tune.gt_jindex_refresh_secs * HZ)) { + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags) && + !gfs_jindex_hold(sdp, &ji_gh)) + gfs_glock_dq_uninit(&ji_gh); + sdp->sd_jindex_refresh_time = jiffies; + } + + if (!test_bit(SDF_LOGD_RUN, &sdp->sd_flags)) + break; + + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(sdp->sd_tune.gt_logd_secs * HZ); + } + + down(&sdp->sd_thread_lock); + up(&sdp->sd_thread_lock); + + complete(&sdp->sd_thread_completion); + + return 0; +} + +/** + * gfs_quotad - Writing of cached quota chanes into the quota file + * @sdp: Pointer to GFS superblock + * + */ + +int +gfs_quotad(void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)data; + int error; + + daemonize("gfs_quotad"); + sdp->sd_quotad_process = current; + set_bit(SDF_QUOTAD_RUN, &sdp->sd_flags); + complete(&sdp->sd_thread_completion); + + for (;;) { + if (time_after_eq(jiffies, + sdp->sd_quota_sync_time + + sdp->sd_tune.gt_quota_quantum * HZ)) { + error = gfs_quota_sync(sdp); + if (error && error != -EROFS) + printk("GFS: fsid=%s: quotad: error = %d\n", + sdp->sd_fsname, error); + sdp->sd_quota_sync_time = jiffies; + } + + gfs_quota_scan(sdp); + + if (!test_bit(SDF_QUOTAD_RUN, &sdp->sd_flags)) + break; + + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(sdp->sd_tune.gt_quotad_secs * HZ); + } + + down(&sdp->sd_thread_lock); + up(&sdp->sd_thread_lock); + + complete(&sdp->sd_thread_completion); + + return 0; +} + +/** + * gfs_inoded - Deallocation of unlinked inodes + * @sdp: Pointer to GFS superblock + * + */ + +int +gfs_inoded(void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)data; + + daemonize("gfs_inoded"); + sdp->sd_inoded_process = current; + set_bit(SDF_INODED_RUN, &sdp->sd_flags); + complete(&sdp->sd_thread_completion); + + for (;;) { + gfs_unlinked_dealloc(sdp); + + if (!test_bit(SDF_INODED_RUN, &sdp->sd_flags)) + break; + + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(sdp->sd_tune.gt_inoded_secs * HZ); + } + + down(&sdp->sd_thread_lock); + up(&sdp->sd_thread_lock); + + complete(&sdp->sd_thread_completion); + + return 0; +} diff -urN linux-orig/fs/gfs/daemon.h linux-patched/fs/gfs/daemon.h --- linux-orig/fs/gfs/daemon.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/daemon.h 2004-06-30 13:27:49.334713218 -0500 @@ -0,0 +1,24 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __DAEMON_DOT_H__ +#define __DAEMON_DOT_H__ + +int gfs_scand(void *data); +int gfs_glockd(void *data); +int gfs_recoverd(void *data); +int gfs_logd(void *data); +int gfs_quotad(void *data); +int gfs_inoded(void *data); + +#endif /* __DAEMON_DOT_H__ */ diff -urN linux-orig/fs/gfs/dio.c linux-patched/fs/gfs/dio.c --- linux-orig/fs/gfs/dio.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/dio.c 2004-06-30 13:27:49.334713218 -0500 @@ -0,0 +1,1302 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "lops.h" +#include "rgrp.h" +#include "trans.h" + +#define buffer_busy(bh) ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock))) + +/** + * aspace_get_block - + * @inode: + * @lblock: + * @bh_result: + * @create: + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +aspace_get_block(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + struct gfs_sbd *sdp = vfs2sdp(inode->i_sb); + GFS_ASSERT_SBD(FALSE, sdp,); +} + +/** + * gfs_aspace_writepage - write an aspace page + * @page: the page + * @wbc: + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_aspace_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, aspace_get_block, wbc); +} + +/** + * stuck_releasepage - We're stuck in gfs_releasepage(). Print stuff out. + * @bh: the buffer we're stuck on + * + */ + +static void +stuck_releasepage(struct buffer_head *bh) +{ + struct gfs_sbd *sdp = vfs2sdp(bh->b_page->mapping->host->i_sb); + struct gfs_bufdata *bd = bh2bd(bh); + + printk("GFS: fsid=%s: stuck in gfs_releasepage()...\n", sdp->sd_fsname); + printk("GFS: fsid=%s: blkno = %"PRIu64", bh->b_count = %d\n", + sdp->sd_fsname, + (uint64_t)bh->b_blocknr, + atomic_read(&bh->b_count)); + printk("GFS: fsid=%s: bh2bd(bh) = %s\n", + sdp->sd_fsname, + (bd) ? "!NULL" : "NULL"); + + if (bd) { + struct gfs_glock *gl = bd->bd_gl; + + printk("GFS: fsid=%s: gl = (%u, %"PRIu64")\n", + sdp->sd_fsname, + gl->gl_name.ln_type, + gl->gl_name.ln_number); + + printk("GFS: fsid=%s: bd_new_le.le_trans = %s\n", + sdp->sd_fsname, + (bd->bd_new_le.le_trans) ? "!NULL" : "NULL"); + printk("GFS: fsid=%s: bd_incore_le.le_trans = %s\n", + sdp->sd_fsname, + (bd->bd_incore_le.le_trans) ? "!NULL" : "NULL"); + printk("GFS: fsid=%s: bd_frozen = %s\n", + sdp->sd_fsname, + (bd->bd_frozen) ? "!NULL" : "NULL"); + printk("GFS: fsid=%s: bd_pinned = %u\n", + sdp->sd_fsname, bd->bd_pinned); + printk("GFS: fsid=%s: bd_ail_tr_list = %s\n", + sdp->sd_fsname, + (list_empty(&bd->bd_ail_tr_list)) ? "Empty" : "!Empty"); + + if (gl->gl_ops == &gfs_inode_glops) { + struct gfs_inode *ip = gl2ip(gl); + + if (ip) { + unsigned int x; + + printk("GFS: fsid=%s: ip = %"PRIu64"/%"PRIu64"\n", + sdp->sd_fsname, + ip->i_num.no_formal_ino, + ip->i_num.no_addr); + printk("GFS: fsid=%s: ip->i_count = %d, ip->i_vnode = %s\n", + sdp->sd_fsname, + atomic_read(&ip->i_count), + (ip->i_vnode) ? "!NULL" : "NULL"); + for (x = 0; x < GFS_MAX_META_HEIGHT; x++) + printk("GFS: fsid=%s: ip->i_cache[%u] = %s\n", + sdp->sd_fsname, x, + (ip->i_cache[x]) ? "!NULL" : "NULL"); + } + } + } +} + +/** + * gfs_aspace_releasepage - free the metadata associated with a page + * @page: the page that's being released + * @gfp_mask: huh?? + * + * Call try_to_free_buffers() if the buffers in this page can be + * released. + * + * Returns: 0 + */ + +static int +gfs_aspace_releasepage(struct page *page, int gfp_mask) +{ + struct inode *aspace = page->mapping->host; + struct gfs_sbd *sdp = vfs2sdp(aspace->i_sb); + struct buffer_head *bh, *head; + struct gfs_bufdata *bd; + unsigned long t; + + if (!page_has_buffers(page)) + goto out; + + head = bh = page_buffers(page); + do { + t = jiffies; + + while (atomic_read(&bh->b_count)) { + if (atomic_read(&aspace->i_writecount)) { + if (time_after_eq(jiffies, + t + + sdp->sd_tune.gt_stall_secs * HZ)) { + stuck_releasepage(bh); + t = jiffies; + } + + yield(); + continue; + } + + return 0; + } + + bd = bh2bd(bh); + if (bd) { + GFS_ASSERT_SBD(bd->bd_bh == bh, sdp,); + GFS_ASSERT_SBD(!bd->bd_new_le.le_trans, sdp,); + GFS_ASSERT_SBD(!bd->bd_incore_le.le_trans, sdp,); + GFS_ASSERT_SBD(!bd->bd_frozen, sdp,); + GFS_ASSERT_SBD(!bd->bd_pinned, sdp,); + GFS_ASSERT_SBD(list_empty(&bd->bd_ail_tr_list), sdp,); + kmem_cache_free(gfs_bufdata_cachep, bd); + atomic_dec(&sdp->sd_bufdata_count); + bh2bd(bh) = NULL; + } + + bh = bh->b_this_page; + } + while (bh != head); + + out: + return try_to_free_buffers(page); +} + +static struct address_space_operations aspace_aops = { + .writepage = gfs_aspace_writepage, + .releasepage = gfs_aspace_releasepage, +}; + +/** + * gfs_aspace_get - Get and initialize a struct inode structure + * @sdp: the filesystem the aspace is in + * + * Right now a struct inode is just a struct inode. Maybe Linux + * will supply a more lightweight address space construct (that works) + * in the future. + * + * Make sure pages/buffers in this aspace aren't in high memory. + * + * Returns: the aspace + */ + +struct inode * +gfs_aspace_get(struct gfs_sbd *sdp) +{ + struct inode *aspace; + + aspace = new_inode(sdp->sd_vfs); + if (aspace) { + mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL); + aspace->i_mapping->a_ops = &aspace_aops; + aspace->i_size = ~0ULL; + vn2ip(aspace) = NULL; + insert_inode_hash(aspace); + } + + return aspace; +} + +/** + * gfs_aspace_put - get rid of an aspace + * @aspace: + * + */ + +void +gfs_aspace_put(struct inode *aspace) +{ + remove_inode_hash(aspace); + iput(aspace); +} + +/** + * gfs_ail_start_trans - Start I/O on a part of the AIL + * @sdp: the filesystem + * @tr: the part of the AIL + * + */ + +void +gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *head, *tmp, *prev; + struct gfs_bufdata *bd; + struct buffer_head *bh; + int retry; + + do { + retry = FALSE; + + spin_lock(&sdp->sd_ail_lock); + + for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list); + bh = bd->bd_bh; + + if (gfs_trylock_buffer(bh)) + continue; + + if (bd->bd_pinned) { + gfs_unlock_buffer(bh); + continue; + } + + if (!buffer_busy(bh)) { + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + + list_del_init(&bd->bd_ail_tr_list); + list_del(&bd->bd_ail_gl_list); + + gfs_unlock_buffer(bh); + brelse(bh); + continue; + } + + if (buffer_dirty(bh)) { + list_move(&bd->bd_ail_tr_list, head); + + spin_unlock(&sdp->sd_ail_lock); + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + spin_lock(&sdp->sd_ail_lock); + + gfs_unlock_buffer(bh); + retry = TRUE; + break; + } + + gfs_unlock_buffer(bh); + } + + spin_unlock(&sdp->sd_ail_lock); + } while (retry); +} + +/** + * gfs_ail_empty_trans - Check whether or not a trans in the AIL has been synced + * @sdp: the filesystem + * @tr: the transaction + * + */ + +int +gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *head, *tmp, *prev; + struct gfs_bufdata *bd; + struct buffer_head *bh; + int ret; + + spin_lock(&sdp->sd_ail_lock); + + for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list); + bh = bd->bd_bh; + + if (gfs_trylock_buffer(bh)) + continue; + + if (bd->bd_pinned || buffer_busy(bh)) { + gfs_unlock_buffer(bh); + continue; + } + + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + + list_del_init(&bd->bd_ail_tr_list); + list_del(&bd->bd_ail_gl_list); + + gfs_unlock_buffer(bh); + brelse(bh); + } + + ret = list_empty(head); + + spin_unlock(&sdp->sd_ail_lock); + + return ret; +} + +/** + * ail_empty_gl - remove all buffers for a given lock from the AIL + * @gl: the glock + * + * None of the buffers should be dirty, locked, or pinned. + */ + +static void +ail_empty_gl(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_bufdata *bd; + struct buffer_head *bh; + + spin_lock(&sdp->sd_ail_lock); + + while (!list_empty(&gl->gl_ail_bufs)) { + bd = list_entry(gl->gl_ail_bufs.next, + struct gfs_bufdata, bd_ail_gl_list); + bh = bd->bd_bh; + + GFS_ASSERT_GLOCK(!bd->bd_pinned && !buffer_busy(bh), gl, + printk("%u %.8lX\n", bd->bd_pinned, bh->b_state);); + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + + list_del_init(&bd->bd_ail_tr_list); + list_del(&bd->bd_ail_gl_list); + + brelse(bh); + } + + spin_unlock(&sdp->sd_ail_lock); +} + +/** + * gfs_inval_buf - Invalidate all buffers associated with a glock + * @gl: the glock + * + */ + +void +gfs_inval_buf(struct gfs_glock *gl) +{ + struct inode *aspace = gl->gl_aspace; + struct address_space *mapping = gl->gl_aspace->i_mapping; + + ail_empty_gl(gl); + + atomic_inc(&aspace->i_writecount); + truncate_inode_pages(mapping, 0); + atomic_dec(&aspace->i_writecount); + + GFS_ASSERT_GLOCK(!mapping->nrpages, gl,); +} + +/** + * gfs_sync_buf - Sync all buffers associated with a glock + * @gl: The glock + * @flags: DIO_START | DIO_WAIT + * + */ + +void +gfs_sync_buf(struct gfs_glock *gl, int flags) +{ + struct address_space *mapping = gl->gl_aspace->i_mapping; + int error = 0; + + if (flags & DIO_START) + error = filemap_fdatawrite(mapping); + if (!error && (flags & DIO_WAIT)) + error = filemap_fdatawait(mapping); + if (!error && (flags & (DIO_INVISIBLE | DIO_CHECK)) == DIO_CHECK) + ail_empty_gl(gl); + + if (error) + gfs_io_error(gl->gl_sbd); +} + +/** + * getbuf - Get a buffer with a given address space + * @sdp: the filesystem + * @aspace: the address space + * @blkno: the block number + * @create: TRUE if the buffer should be created + * + * Returns: the buffer + */ + +static struct buffer_head * +getbuf(struct gfs_sbd *sdp, struct inode *aspace, uint64_t blkno, int create) +{ + struct page *page; + struct buffer_head *bh; + unsigned int shift; + unsigned long index; + unsigned int bufnum; + + shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; + index = blkno >> shift; + bufnum = blkno - (index << shift); + + if (create) { + RETRY_MALLOC(page = grab_cache_page(aspace->i_mapping, index), page); + } else { + page = find_lock_page(aspace->i_mapping, index); + if (!page) + return NULL; + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); + + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) + /* Do nothing */; + get_bh(bh); + + if (!buffer_mapped(bh)) + map_bh(bh, sdp->sd_vfs, blkno); + else + GFS_ASSERT_SBD(bh->b_bdev == sdp->sd_vfs->s_bdev && + bh->b_blocknr == blkno, + sdp,); + + unlock_page(page); + page_cache_release(page); + + return bh; +} + +/** + * gfs_dgetblk - Get a block + * @sdp: The GFS superblock + * @blkno: The block number + * @gl: The glock associated with this block + * + * Returns: The buffer + */ + +struct buffer_head * +gfs_dgetblk(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl) +{ + struct buffer_head *bh; + + if (gl) + bh = getbuf(sdp, gl->gl_aspace, blkno, CREATE); + else + bh = sb_getblk(sdp->sd_vfs, blkno); + + return bh; +} + +/** + * gfs_dread - Read a block from disk + * @sdp: The GFS superblock + * @blkno: The block number + * @gl: The glock covering the block + * @flags: flags to gfs_dreread() + * @bhp: the place where the buffer is returned + * + * Returns: The buffer on success, NULL on failur + */ + +int +gfs_dread(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl, int flags, + struct buffer_head **bhp) +{ + int error; + + *bhp = gfs_dgetblk(sdp, blkno, gl); + error = gfs_dreread(sdp, *bhp, flags); + if (error) + brelse(*bhp); + + return error; +} + +/** + * gfs_prep_new_buffer - Mark a new buffer we just gfs_dgetblk()ed uptodate + * @bh: the buffer + * + */ + +void +gfs_prep_new_buffer(struct buffer_head *bh) +{ + wait_on_buffer(bh); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); +} + +/** + * gfs_dreread - Reread a block from disk + * @sdp: the filesystem + * @bh: The block to read + * @flags: Flags that control the read + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags) +{ + int error = 0; + + if (flags & DIO_NEW) { + if (gfs_mhc_fish(sdp, bh)) + return 0; + clear_buffer_uptodate(bh); + } + + if (flags & DIO_FORCE) + clear_buffer_uptodate(bh); + + if ((flags & DIO_START) && !buffer_uptodate(bh)) + ll_rw_block(READ, 1, &bh); + + if (flags & DIO_WAIT) { + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + gfs_io_error_bh(sdp, bh); + error = -EIO; + } + } + + return error; +} + +/** + * gfs_dwrite - Write a buffer + * @sdp: the filesystem + * @bh: The buffer to write + * @flags: The type of write operation to do + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags) +{ + int error = 0; + + GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,); + GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,); + + if (flags & DIO_CLEAN) { + lock_buffer(bh); + clear_buffer_dirty(bh); + unlock_buffer(bh); + } + + if (flags & DIO_DIRTY) + mark_buffer_dirty(bh); + + if ((flags & DIO_START) && buffer_dirty(bh)) { + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + } + + if (flags & DIO_WAIT) { + wait_on_buffer(bh); + + if (!buffer_uptodate(bh) || buffer_dirty(bh)) { + gfs_io_error_bh(sdp, bh); + error = -EIO; + } + } + + return error; +} + +/** + * gfs_attach_bufdata - attach a struct gfs_bufdata structure to a buffer + * @bh: The buffer to be attached to + * @gl: the glock the buffer belongs to + * + */ + +void +gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl) +{ + struct gfs_bufdata *bd; + + lock_page(bh->b_page); + + if (bh2bd(bh)) { + unlock_page(bh->b_page); + return; + } + + RETRY_MALLOC(bd = kmem_cache_alloc(gfs_bufdata_cachep, GFP_KERNEL), bd); + atomic_inc(&gl->gl_sbd->sd_bufdata_count); + + memset(bd, 0, sizeof(struct gfs_bufdata)); + + bd->bd_bh = bh; + bd->bd_gl = gl; + + INIT_LE(&bd->bd_new_le, &gfs_buf_lops); + INIT_LE(&bd->bd_incore_le, &gfs_buf_lops); + + init_MUTEX(&bd->bd_lock); + + INIT_LIST_HEAD(&bd->bd_ail_tr_list); + + bh2bd(bh) = bd; + + unlock_page(bh->b_page); +} + +/** + * gfs_is_pinned - Figure out if a buffer is pinned or not + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to be pinned + * + * Returns: TRUE if the buffer is pinned, FALSE otherwise + */ + +int +gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh) +{ + struct gfs_bufdata *bd = bh2bd(bh); + int ret = FALSE; + + if (bd) { + gfs_lock_buffer(bh); + if (bd->bd_pinned) + ret = TRUE; + gfs_unlock_buffer(bh); + } + + return ret; +} + +/** + * gfs_dpin - Pin a metadata buffer in memory + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to be pinned + * + */ + +void +gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh) +{ + struct gfs_bufdata *bd; + char *data; + + GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,); + GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,); + + bd = bh2bd(bh); + GFS_ASSERT_SBD(bd, sdp,); + + gfs_lock_buffer(bh); + + GFS_ASSERT_GLOCK(!bd->bd_frozen, bd->bd_gl,); + + if (!bd->bd_pinned++) { + wait_on_buffer(bh); + + /* If this buffer is in the AIL and it has already been written, + remove it from the AIL. */ + + spin_lock(&sdp->sd_ail_lock); + if (!list_empty(&bd->bd_ail_tr_list) && !buffer_busy(bh)) { + list_del_init(&bd->bd_ail_tr_list); + list_del(&bd->bd_ail_gl_list); + brelse(bh); + } + spin_unlock(&sdp->sd_ail_lock); + + clear_buffer_dirty(bh); + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + } else { + gfs_unlock_buffer(bh); + + data = gmalloc(sdp->sd_sb.sb_bsize); + + gfs_lock_buffer(bh); + if (bd->bd_pinned > 1) { + memcpy(data, bh->b_data, sdp->sd_sb.sb_bsize); + bd->bd_frozen = data; + } else + kfree(data); + } + + gfs_unlock_buffer(bh); + + get_bh(bh); +} + +/** + * gfs_dunpin - Unpin a buffer + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to unpin + * @tr: The transaction in the AIL that contains this buffer + * + */ + +void +gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh, struct gfs_trans *tr) +{ + struct gfs_bufdata *bd; + + GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,); + + bd = bh2bd(bh); + GFS_ASSERT_SBD(bd, sdp,); + + gfs_lock_buffer(bh); + + GFS_ASSERT_GLOCK(bd->bd_pinned, bd->bd_gl,); + + if (bd->bd_pinned == 1) + mark_buffer_dirty(bh); + + bd->bd_pinned--; + + gfs_unlock_buffer(bh); + + /* Add the buffer to the AIL + and get rid of an old reference if there is one */ + + if (tr) { + spin_lock(&sdp->sd_ail_lock); + + if (list_empty(&bd->bd_ail_tr_list)) + list_add(&bd->bd_ail_gl_list, &bd->bd_gl->gl_ail_bufs); + else { + list_del_init(&bd->bd_ail_tr_list); + brelse(bh); + } + list_add(&bd->bd_ail_tr_list, &tr->tr_ail_bufs); + + spin_unlock(&sdp->sd_ail_lock); + } else + brelse(bh); +} + +/** + * logbh_end_io - called at the end of a logbh write + * @bh: the buffer + * @uptodate: whether or not the write succeeded + * + * Don't do ENTER() AND EXIT() here. + * + */ + +static void +logbh_end_io(struct buffer_head *bh, int uptodate) +{ + if (uptodate) + set_buffer_uptodate(bh); + else + clear_buffer_uptodate(bh); + unlock_buffer(bh); +} + +/** + * gfs_logbh_init - Initialize a fake buffer head + * @sdp: the filesystem + * @bh: the buffer to initialize + * @blkno: the block address of the buffer + * @data: the data to be written + * + */ + +void +gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh, + uint64_t blkno, char *data) +{ + memset(bh, 0, sizeof(struct buffer_head)); + bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); + atomic_set(&bh->b_count, 1); + set_bh_page(bh, virt_to_page(data), ((unsigned long)data) & (PAGE_SIZE - 1)); + bh->b_blocknr = blkno; + bh->b_size = sdp->sd_sb.sb_bsize; + bh->b_bdev = sdp->sd_vfs->s_bdev; + init_buffer(bh, logbh_end_io, NULL); + INIT_LIST_HEAD(&bh->b_assoc_buffers); +} + +/** + * gfs_logbh_uninit - Clean up a fake buffer head + * @sdp: the filesystem + * @bh: the buffer to clean + * + */ + +void +gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh) +{ + GFS_ASSERT_SBD(!buffer_busy(bh) && + atomic_read(&bh->b_count) == 1, + sdp,); +} + +/** + * gfs_logbh_start - Start writing a fake buffer head + * @sdp: the filesystem + * @bh: the buffer to write + * + * Returns: 0 on success, -EXXX on error; + */ + +int +gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh) +{ + submit_bh(WRITE, bh); + return 0; +} + +/** + * gfs_logbh_wait - Wait for the write of a fake buffer head to complete + * @sdp: the filesystem + * @bh: the buffer to write + * + * Returns: 0 on success, -EXXX on error; + */ + +int +gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh) +{ + int error = 0; + + wait_on_buffer(bh); + + if (!buffer_uptodate(bh) || buffer_dirty(bh)) { + gfs_io_error_bh(sdp, bh); + error = -EIO; + } + + return error; +} + +/** + * gfs_replay_buf - write a log buffer to its inplace location + * @gl: the journal's glock + * @bh: the buffer + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_bufdata *bd; + + bd = bh2bd(bh); + if (!bd) { + gfs_attach_bufdata(bh, gl); + bd = bh2bd(bh); + } + + mark_buffer_dirty(bh); + + if (list_empty(&bd->bd_ail_tr_list)) { + get_bh(bh); + list_add(&bd->bd_ail_tr_list, &sdp->sd_recovery_bufs); + } + + return 0; +} + +/** + * gfs_replay_check - Check up on journal replay + * @sdp: the filesystem + * + */ + +void +gfs_replay_check(struct gfs_sbd *sdp) +{ + struct buffer_head *bh; + struct gfs_bufdata *bd; + + while (!list_empty(&sdp->sd_recovery_bufs)) { + bd = list_entry(sdp->sd_recovery_bufs.prev, + struct gfs_bufdata, bd_ail_tr_list); + bh = bd->bd_bh; + + if (buffer_busy(bh)) { + list_move(&bd->bd_ail_tr_list, + &sdp->sd_recovery_bufs); + break; + } else { + list_del_init(&bd->bd_ail_tr_list); + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + brelse(bh); + } + } +} + +/** + * gfs_replay_wait - Wait for all replayed buffers to hit the disk + * @sdp: the filesystem + * + */ + +void +gfs_replay_wait(struct gfs_sbd *sdp) +{ + struct list_head *head, *tmp, *prev; + struct buffer_head *bh; + struct gfs_bufdata *bd; + + for (head = &sdp->sd_recovery_bufs, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list); + bh = bd->bd_bh; + + if (!buffer_busy(bh)) { + list_del_init(&bd->bd_ail_tr_list); + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + brelse(bh); + continue; + } + + if (buffer_dirty(bh)) { + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + } + } + + while (!list_empty(head)) { + bd = list_entry(head->prev, struct gfs_bufdata, bd_ail_tr_list); + bh = bd->bd_bh; + + wait_on_buffer(bh); + + GFS_ASSERT_SBD(!buffer_busy(bh), sdp,); + + list_del_init(&bd->bd_ail_tr_list); + if (!buffer_uptodate(bh)) + gfs_io_error_bh(sdp, bh); + brelse(bh); + } +} + +/** + * gfs_wipe_buffers - make buffers so they aren't dirty/pinned anymore + * @ip: the inode who owns the buffers + * @bstart: the first buffer in the run + * @blen: the number of buffers in the run + * + */ + +void +gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd, + uint64_t bstart, uint32_t blen) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct inode *aspace = ip->i_gl->gl_aspace; + struct buffer_head *bh; + struct gfs_bufdata *bd; + int busy; + int add = FALSE; + + while (blen) { + bh = getbuf(sdp, aspace, bstart, NO_CREATE); + if (bh) { + + bd = bh2bd(bh); + + if (buffer_uptodate(bh)) { + if (bd) { + gfs_lock_buffer(bh); + gfs_mhc_add(rgd, &bh, 1); + busy = bd->bd_pinned || buffer_busy(bh); + gfs_unlock_buffer(bh); + + if (busy) + add = TRUE; + else { + spin_lock(&sdp->sd_ail_lock); + if (!list_empty(&bd->bd_ail_tr_list)) { + list_del_init(&bd->bd_ail_tr_list); + list_del(&bd->bd_ail_gl_list); + brelse(bh); + } + spin_unlock(&sdp->sd_ail_lock); + } + } else { + GFS_ASSERT_INODE(!buffer_dirty(bh), ip,); + wait_on_buffer(bh); + GFS_ASSERT_INODE(!buffer_busy(bh), ip,); + gfs_mhc_add(rgd, &bh, 1); + } + } else { + GFS_ASSERT_INODE(!bd || !bd->bd_pinned, ip,); + GFS_ASSERT_INODE(!buffer_dirty(bh), ip,); + wait_on_buffer(bh); + GFS_ASSERT_INODE(!buffer_busy(bh), ip,); + } + + brelse(bh); + } + + bstart++; + blen--; + } + + if (add) + gfs_depend_add(rgd, ip->i_num.no_formal_ino); +} + +/** + * gfs_sync_meta - sync all the buffers in a filesystem + * @sdp: the filesystem + * + */ + +void +gfs_sync_meta(struct gfs_sbd *sdp) +{ + gfs_log_flush(sdp); + for (;;) { + gfs_ail_start(sdp, DIO_ALL); + if (gfs_ail_empty(sdp)) + break; + + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ / 10); + } +} + +/** + * gfs_flush_meta_cache - get rid of any references on buffers for this inode + * @ip: The GFS inode + * + */ + +void +gfs_flush_meta_cache(struct gfs_inode *ip) +{ + struct buffer_head **bh_slot; + unsigned int x; + + spin_lock(&ip->i_lock); + + for (x = 0; x < GFS_MAX_META_HEIGHT; x++) { + bh_slot = &ip->i_cache[x]; + if (*bh_slot) { + brelse(*bh_slot); + *bh_slot = NULL; + } + } + + spin_unlock(&ip->i_lock); +} + +/** + * gfs_get_meta_buffer - Get a metadata buffer + * @ip: The GFS inode + * @depth: The depth in the metadata tree + * @num: The block number (device relative) of the buffer + * @new: Non-zero if we may create a new buffer + * @bhp: the buffer is returned here + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new, + struct buffer_head **bhp) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh, **bh_slot = &ip->i_cache[height]; + int flags = ((new) ? DIO_NEW : 0) | DIO_START | DIO_WAIT; + int error; + + spin_lock(&ip->i_lock); + bh = *bh_slot; + if (bh) { + if (bh->b_blocknr == num) + get_bh(bh); + else + bh = NULL; + } + spin_unlock(&ip->i_lock); + + if (bh) { + error = gfs_dreread(sdp, bh, flags); + if (error) { + brelse(bh); + return error; + } + } else { + error = gfs_dread(sdp, num, ip->i_gl, flags, &bh); + if (error) + return error; + + spin_lock(&ip->i_lock); + if (*bh_slot != bh) { + if (*bh_slot) + brelse(*bh_slot); + *bh_slot = bh; + get_bh(bh); + } + spin_unlock(&ip->i_lock); + } + + if (new) { + GFS_ASSERT_INODE(height, ip,); + + gfs_trans_add_bh(ip->i_gl, bh); + gfs_metatype_set(sdp, bh, GFS_METATYPE_IN, GFS_FORMAT_IN); + gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header)); + } else + gfs_metatype_check(sdp, bh, + (height) ? GFS_METATYPE_IN : GFS_METATYPE_DI); + + *bhp = bh; + + return 0; +} + +/** + * gfs_get_data_buffer - Get a data buffer + * @ip: The GFS inode + * @num: The block number (device relative) of the data block + * @new: Non-zero if this is a new allocation + * @bhp: the buffer is returned here + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new, + struct buffer_head **bhp) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + int error = 0; + + if (block == ip->i_num.no_addr) { + GFS_ASSERT_INODE(!new, ip,); + + error = gfs_dread(sdp, block, ip->i_gl, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + gfs_metatype_check(sdp, bh, GFS_METATYPE_DI); + } else if (gfs_is_jdata(ip)) { + if (new) { + error = gfs_dread(sdp, block, ip->i_gl, + DIO_NEW | DIO_START | DIO_WAIT, &bh); + if (error) + return error; + gfs_trans_add_bh(ip->i_gl, bh); + gfs_metatype_set(sdp, bh, GFS_METATYPE_JD, GFS_FORMAT_JD); + gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header)); + } else { + error = gfs_dread(sdp, block, ip->i_gl, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + gfs_metatype_check(sdp, bh, GFS_METATYPE_JD); + } + } else { + if (new) { + bh = gfs_dgetblk(sdp, block, ip->i_gl); + gfs_prep_new_buffer(bh); + } else { + error = gfs_dread(sdp, block, ip->i_gl, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + } + } + + *bhp = bh; + + return 0; +} + +/** + * gfs_start_ra - start readahead on an extent of a file + * @gl: the glock the blocks belong to + * @dblock: the starting disk block + * @extlen: the number of blocks in the extent + * + */ + +void +gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct inode *aspace = gl->gl_aspace; + struct buffer_head *first_bh, *bh; + uint32_t max_ra = sdp->sd_tune.gt_max_readahead >> sdp->sd_sb.sb_bsize_shift; + int error; + + GFS_ASSERT_GLOCK(extlen, gl,); + if (!max_ra) + return; + if (extlen > max_ra) + extlen = max_ra; + + first_bh = getbuf(sdp, aspace, dblock, CREATE); + + if (buffer_uptodate(first_bh)) + goto out; + if (!buffer_locked(first_bh)) { + error = gfs_dreread(sdp, first_bh, DIO_START); + if (error) + goto out; + } + + dblock++; + extlen--; + + while (extlen) { + bh = getbuf(sdp, aspace, dblock, CREATE); + + if (!buffer_uptodate(bh) && !buffer_locked(bh)) { + error = gfs_dreread(sdp, bh, DIO_START); + brelse(bh); + if (error) + goto out; + } else + brelse(bh); + + dblock++; + extlen--; + + if (buffer_uptodate(first_bh)) + break; + } + + out: + brelse(first_bh); +} diff -urN linux-orig/fs/gfs/dio.h linux-patched/fs/gfs/dio.h --- linux-orig/fs/gfs/dio.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/dio.h 2004-06-30 13:27:49.335712986 -0500 @@ -0,0 +1,195 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __DIO_DOT_H__ +#define __DIO_DOT_H__ + +void gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr); +int gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr); + +/* Asynchronous I/O Routines */ + +struct buffer_head *gfs_dgetblk(struct gfs_sbd *sdp, uint64_t blkno, + struct gfs_glock *gl); +int gfs_dread(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl, + int flags, struct buffer_head **bhp); + +void gfs_prep_new_buffer(struct buffer_head *bh); +int gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags); +int gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags); + +void gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl); +int gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh); +void gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh); +void gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh, + struct gfs_trans *tr); + +static __inline__ +void gfs_lock_buffer(struct buffer_head *bh) +{ + struct gfs_bufdata *bd = bh2bd(bh); + down(&bd->bd_lock); +} +static __inline__ +int gfs_trylock_buffer(struct buffer_head *bh) +{ + struct gfs_bufdata *bd = bh2bd(bh); + return down_trylock(&bd->bd_lock); +} +static __inline__ +void gfs_unlock_buffer(struct buffer_head *bh) +{ + struct gfs_bufdata *bd = bh2bd(bh); + up(&bd->bd_lock); +} + +void gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh, uint64_t blkno, + char *data); +void gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh); +int gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh); +int gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh); + +int gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh); +void gfs_replay_check(struct gfs_sbd *sdp); +void gfs_replay_wait(struct gfs_sbd *sdp); + +void gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd, + uint64_t bstart, uint32_t blen); + +void gfs_sync_meta(struct gfs_sbd *sdp); + +/* Buffer Caching routines */ + +int gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new, + struct buffer_head **bhp); +int gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new, + struct buffer_head **bhp); +void gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen); + +static __inline__ int +gfs_get_inode_buffer(struct gfs_inode *ip, struct buffer_head **bhp) +{ + return gfs_get_meta_buffer(ip, 0, ip->i_num.no_addr, FALSE, bhp); +} + +struct inode *gfs_aspace_get(struct gfs_sbd *sdp); +void gfs_aspace_put(struct inode *aspace); + +void gfs_inval_buf(struct gfs_glock *gl); +void gfs_sync_buf(struct gfs_glock *gl, int flags); + +void gfs_flush_meta_cache(struct gfs_inode *ip); + +/* Buffer Content Functions */ + +/** + * gfs_buffer_clear - Zeros out a buffer + * @ip: The GFS inode + * @bh: The buffer to zero + * + */ + +static __inline__ void +gfs_buffer_clear(struct buffer_head *bh) +{ + memset(bh->b_data, 0, bh->b_size); +} + +/** + * gfs_buffer_clear_tail - Clear buffer beyond the dinode + * @bh: The buffer containing the on-disk inode + * @head: the size of the head of the buffer + * + * Clears the remaining part of an on-disk inode that is not a dinode. + * i.e. The data part of a stuffed inode, or the top level of metadata + * of a non-stuffed inode. + */ + +static __inline__ void +gfs_buffer_clear_tail(struct buffer_head *bh, int head) +{ + memset(bh->b_data + head, 0, bh->b_size - head); +} + +/** + * gfs_buffer_clear_ends - Zero out any bits of a buffer which are not being written + * @bh: The buffer + * @offset: Offset in buffer where write starts + * @amount: Amount of data being written + * @journaled: TRUE if this is a journaled buffer + * + */ + +static __inline__ void +gfs_buffer_clear_ends(struct buffer_head *bh, int offset, int amount, + int journaled) +{ + int z_off1 = (journaled) ? sizeof(struct gfs_meta_header) : 0; + int z_len1 = offset - z_off1; + int z_off2 = offset + amount; + int z_len2 = (bh)->b_size - z_off2; + + if (z_len1) + memset(bh->b_data + z_off1, 0, z_len1); + + if (z_len2) + memset(bh->b_data + z_off2, 0, z_len2); +} + +/** + * gfs_buffer_copy_tail - copies the tail of one buffer to another + * @to_bh: the buffer to copy to + * @to_head: the size of the head of to_bh + * @from_bh: the buffer to copy from + * @from_head: the size of the head of from_bh + * + * from_head is guaranteed to bigger than to_head + */ + +static __inline__ void +gfs_buffer_copy_tail(struct buffer_head *to_bh, int to_head, + struct buffer_head *from_bh, int from_head) +{ + memcpy(to_bh->b_data + to_head, + from_bh->b_data + from_head, + from_bh->b_size - from_head); + memset(to_bh->b_data + to_bh->b_size + to_head - from_head, + 0, + from_head - to_head); +} + +/** + * gfs_buffer_print - print a buffer to the debug console + * @bh: the buffer + * @string: what to print before the contents of the buffer + * + */ + +static __inline__ void +gfs_buffer_print(struct buffer_head *bh, char *string) +{ + unsigned int x, size = (bh)->b_size; + unsigned char *c = (bh)->b_data; + + printk("%s\n", string); + + for (x = 0; x < size; x++) { + printk("%.2X ", c[x]); + if (x % 16 == 15) + printk("\n"); + } + if (x % 16 != 0) + printk("\n"); +} + +#endif /* __DIO_DOT_H__ */ diff -urN linux-orig/fs/gfs/dir.c linux-patched/fs/gfs/dir.c --- linux-orig/fs/gfs/dir.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/dir.c 2004-06-30 13:27:49.335712986 -0500 @@ -0,0 +1,2273 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* +* Implements Extendible Hashing as described in: +* "Extendible Hashing" by Fagin, et al in +* __ACM Trans. on Database Systems__, Sept 1979. +* +* +* Here's the layout of dirents which is essentially the same as that of ext2 +* within a single block. The field de_name_len is the number of bytes +* actually required for the name (no null terminator). The field de_rec_len +* is the number of bytes allocated to the dirent. The offset of the next +* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is +* deleted, the preceding dirent inherits its allocated space, ie +* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained +* by adding de_rec_len to the current dirent, this essentially causes the +* deleted dirent to get jumped over when iterating through all the dirents. +* When deleting the first dirent in a block, there is no previous dirent so +* the field de_ino is set to zero to designate it as deleted. When allocating +* a dirent, gfs_dirent_alloc iterates through the dirents in a block. If the +* first dirent has (de_ino == 0) and de_rec_len is large enough, this first +* dirent is allocated. Otherwise it must go through all the 'used' dirents +* searching for one in which the amount of total space minus the amount of +* used space will provide enough space for the new dirent. +* There are two types of blocks in which dirents reside. In a stuffed dinode, +* the dirents begin at offset sizeof(struct gfs_dinode) from the beginning of the block. +* In leaves, they begin at offset sizeof (struct gfs_leaf) from the beginning of the +* leaf block. The dirents reside in leaves when +* +* dip->i_di.di_regime == GFS_DIR_EXHASH. +* +* The dirents are in the stuffed dinode when dip->i_di.di_regime == GFS_DIR_LINEAR. +* When the dirents are in leaves, the actual contents of the directory file are +* used as an array of 64-bit block pointers pointing to the leaf blocks. The +* dirents are NOT in the directory file itself. There can be more than one block +* pointer in the array that points to the same leaf. In fact, when a directory is +* first converted from linear to exhash, all of the pointers point to the same +* leaf. When a leaf is completely full, the size of the hash table can be doubled +* unless it is already at the maximum size which is hard coded into +* GFS_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list but +* never before the maximum hash table size has been reached. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "dir.h" +#include "file.h" +#include "glock.h" +#include "inode.h" +#include "ioctl.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +#define IS_LEAF (1) +#define IS_DINODE (2) + +#if 1 +#define gfs_dir_hash2offset(h) (((uint64_t)(h)) >> 1) +#define gfs_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1)) +#else +#define gfs_dir_hash2offset(h) (((uint64_t)(h))) +#define gfs_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)))) +#endif + +typedef int (*leaf_call_t) (struct gfs_inode *dip, + uint32_t index, uint32_t len, uint64_t leaf_no, + void *data); + +/** + * int gfs_filecmp - Compare two filenames + * @file1: The first filename + * @file2: The second filename + * @len_of_file2: The length of the second file + * + * This routine compares two filenames and returns TRUE if they are equal. + * + * Returns: TRUE (!=0) if the files are the same, otherwise FALSE (0). + */ + +int +gfs_filecmp(struct qstr *file1, char *file2, int len_of_file2) +{ + if (file1->len != len_of_file2) + return FALSE; + if (memcmp(file1->name, file2, file1->len)) + return FALSE; + return TRUE; +} + +/** + * dirent_first - Return the first dirent + * @dip: the directory + * @bh: The buffer + * @dent: Pointer to list of dirents + * + * return first dirent whether bh points to leaf or stuffed dinode + * + * Returns: IS_LEAF or IS_DINODE + */ + +static int +dirent_first(struct gfs_inode *dip, struct buffer_head *bh, + struct gfs_dirent **dent) +{ + struct gfs_meta_header *h = (struct gfs_meta_header *)bh->b_data; + + if (gfs32_to_cpu(h->mh_type) == GFS_METATYPE_LF) { + gfs_meta_check(dip->i_sbd, bh); + *dent = (struct gfs_dirent *)(bh->b_data + sizeof(struct gfs_leaf)); + return IS_LEAF; + } else { + gfs_metatype_check(dip->i_sbd, bh, GFS_METATYPE_DI); + *dent = (struct gfs_dirent *)(bh->b_data + sizeof(struct gfs_dinode)); + return IS_DINODE; + } +} + +/** + * dirent_next - Next dirent + * @dip: the directory + * @bh: The buffer + * @dent: Pointer to list of dirents + * + * Returns: 0 on success, error code otherwise + */ + +static int +dirent_next(struct gfs_inode *dip, struct buffer_head *bh, + struct gfs_dirent **dent) +{ + struct gfs_dirent *tmp, *cur; + char *bh_end; + uint32_t cur_rec_len; + + cur = *dent; + bh_end = bh->b_data + bh->b_size; + + cur_rec_len = gfs16_to_cpu(cur->de_rec_len); + + if ((char *)cur + cur_rec_len >= bh_end) { + GFS_ASSERT_INODE((char *)cur + cur_rec_len == bh_end, dip,); + return -ENOENT; + } + + tmp = (struct gfs_dirent *)((char *)cur + cur_rec_len); + + GFS_ASSERT_INODE((char *)tmp + gfs16_to_cpu(tmp->de_rec_len) <= bh_end, + dip,); + /* Only the first dent could ever have de_ino == 0 */ + GFS_ASSERT_INODE(tmp->de_inum.no_formal_ino, dip,); + + *dent = tmp; + + return 0; +} + +/** + * dirent_del - Delete a dirent + * @dip: The GFS inode + * @bh: The buffer + * @prev: The previous dirent + * @cur: The current dirent + * + */ + +static void +dirent_del(struct gfs_inode *dip, struct buffer_head *bh, + struct gfs_dirent *prev, struct gfs_dirent *cur) +{ + uint32_t cur_rec_len, prev_rec_len; + + GFS_ASSERT_INODE(cur->de_inum.no_formal_ino, dip,); + + gfs_trans_add_bh(dip->i_gl, bh); + + /* If there is no prev entry, this is the first entry in the block. + The de_rec_len is already as big as it needs to be. Just zero + out the inode number and return. */ + + if (!prev) { + cur->de_inum.no_formal_ino = 0; /* No endianess worries */ + return; + } + + /* Combine this dentry with the previous one. */ + + prev_rec_len = gfs16_to_cpu(prev->de_rec_len); + cur_rec_len = gfs16_to_cpu(cur->de_rec_len); + + GFS_ASSERT_INODE((char *)prev + prev_rec_len == (char *)cur, dip,); + GFS_ASSERT_INODE((char *)cur + cur_rec_len <= + bh->b_data + bh->b_size, dip,); + + prev_rec_len += cur_rec_len; + prev->de_rec_len = cpu_to_gfs16(prev_rec_len); +} + +/** + * gfs_dirent_alloc - Allocate a directory entry + * @dip: The GFS inode + * @bh: The buffer + * @name_len: The length of the name + * @dent_out: Pointer to list of dirents + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_dirent_alloc(struct gfs_inode *dip, struct buffer_head *bh, int name_len, + struct gfs_dirent **dent_out) +{ + struct gfs_dirent *dent, *new; + unsigned int rec_len = GFS_DIRENT_SIZE(name_len); + unsigned int entries = 0, offset = 0, x = 0; + int type; + + type = dirent_first(dip, bh, &dent); + + if (type == IS_LEAF) { + struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data; + entries = gfs16_to_cpu(leaf->lf_entries); + offset = sizeof(struct gfs_leaf); + } else { + struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data; + entries = gfs32_to_cpu(dinode->di_entries); + offset = sizeof(struct gfs_dinode); + } + + if (!entries) { + gfs_trans_add_bh(dip->i_gl, bh); + + dent->de_rec_len = bh->b_size - offset; + dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len); + dent->de_name_len = cpu_to_gfs16(name_len); + + *dent_out = dent; + return 0; + } + + do { + uint32_t cur_rec_len, cur_name_len; + + cur_rec_len = gfs16_to_cpu(dent->de_rec_len); + cur_name_len = gfs16_to_cpu(dent->de_name_len); + + if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) || + (cur_rec_len >= GFS_DIRENT_SIZE(cur_name_len) + rec_len)) { + gfs_trans_add_bh(dip->i_gl, bh); + + if (dent->de_inum.no_formal_ino) { + new = (struct gfs_dirent *)((char *)dent + + GFS_DIRENT_SIZE(cur_name_len)); + memset(new, 0, sizeof(struct gfs_dirent)); + + new->de_rec_len = cpu_to_gfs16(cur_rec_len - + GFS_DIRENT_SIZE(cur_name_len)); + new->de_name_len = cpu_to_gfs16(name_len); + + dent->de_rec_len = cur_rec_len - gfs16_to_cpu(new->de_rec_len); + dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len); + + *dent_out = new; + return 0; + } + + dent->de_name_len = cpu_to_gfs16(name_len); + + *dent_out = dent; + return 0; + } + + GFS_ASSERT_INODE(x < entries, dip,); + + if (dent->de_inum.no_formal_ino) + x++; + } + while (dirent_next(dip, bh, &dent) == 0); + + return -ENOSPC; +} + +/** + * dirent_fits - See if we can fit a entry in this buffer + * @dip: The GFS inode + * @bh: The buffer + * @name_len: The length of the name + * + * Returns: TRUE if it can fit, FALSE otherwise + */ + +static int +dirent_fits(struct gfs_inode *dip, struct buffer_head *bh, int name_len) +{ + struct gfs_dirent *dent; + unsigned int rec_len = GFS_DIRENT_SIZE(name_len); + unsigned int entries = 0, x = 0; + int type; + + type = dirent_first(dip, bh, &dent); + + if (type == IS_LEAF) { + struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data; + entries = gfs16_to_cpu(leaf->lf_entries); + } else { + struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data; + entries = gfs32_to_cpu(dinode->di_entries); + } + + if (!entries) + return TRUE; + + do { + uint32_t cur_rec_len, cur_name_len; + + cur_rec_len = gfs16_to_cpu(dent->de_rec_len); + cur_name_len = gfs16_to_cpu(dent->de_name_len); + + if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) || + (cur_rec_len >= GFS_DIRENT_SIZE(cur_name_len) + rec_len)) + return TRUE; + + GFS_ASSERT_INODE(x < entries, dip,); + + if (dent->de_inum.no_formal_ino) + x++; + } + while (dirent_next(dip, bh, &dent) == 0); + + return FALSE; +} + +/** + * leaf_search + * @bh: + * @filename: + * @dent_out: + * @dent_prev: + * + * Returns: + */ + +static int +leaf_search(struct gfs_inode *dip, + struct buffer_head *bh, struct qstr *filename, + struct gfs_dirent **dent_out, struct gfs_dirent **dent_prev) +{ + uint32_t hash; + struct gfs_dirent *dent, *prev = NULL; + unsigned int entries = 0, x = 0; + int type; + + type = dirent_first(dip, bh, &dent); + + if (type == IS_LEAF) { + struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data; + entries = gfs16_to_cpu(leaf->lf_entries); + } else if (type == IS_DINODE) { + struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data; + entries = gfs32_to_cpu(dinode->di_entries); + } + + hash = gfs_dir_hash(filename->name, filename->len); + + do { + if (!dent->de_inum.no_formal_ino) { + prev = dent; + continue; + } + + if (gfs32_to_cpu(dent->de_hash) == hash && + gfs_filecmp(filename, (char *)(dent + 1), + gfs16_to_cpu(dent->de_name_len))) { + *dent_out = dent; + if (dent_prev) + *dent_prev = prev; + + return 0; + } + + GFS_ASSERT_INODE(x < entries, dip,); + x++; + prev = dent; + } + while (dirent_next(dip, bh, &dent) == 0); + + return -ENOENT; +} + +/** + * get_leaf - Get leaf + * @dip: + * @leaf_no: + * @bh_out: + * + * Returns: 0 on success, error code otherwise + */ + +static int +get_leaf(struct gfs_inode *dip, uint64_t leaf_no, struct buffer_head **bhp) +{ + struct gfs_sbd *sdp = dip->i_sbd; + int error; + + error = gfs_dread(sdp, leaf_no, dip->i_gl, DIO_START | DIO_WAIT, bhp); + if (!error) + gfs_metatype_check(sdp, *bhp, GFS_METATYPE_LF); + + return error; +} + +/** + * get_leaf_nr - Get a leaf number associated with the index + * @dip: The GFS inode + * @index: + * @leaf_out: + * + * Returns: 0 on success, error code otherwise + */ + +static int +get_leaf_nr(struct gfs_inode *dip, uint32_t index, uint64_t *leaf_out) +{ + uint64_t leaf_no; + int error; + + error = gfs_internal_read(dip, (char *)&leaf_no, + index * sizeof(uint64_t), + sizeof(uint64_t)); + if (error != sizeof(uint64_t)) + return (error < 0) ? error : -EIO; + + *leaf_out = gfs64_to_cpu(leaf_no); + + return 0; +} + +/** + * get_first_leaf - Get first leaf + * @dip: The GFS inode + * @index: + * @bh_out: + * + * Returns: 0 on success, error code otherwise + */ + +static int +get_first_leaf(struct gfs_inode *dip, uint32_t index, + struct buffer_head **bh_out) +{ + uint64_t leaf_no; + int error; + + error = get_leaf_nr(dip, index, &leaf_no); + if (!error) + error = get_leaf(dip, leaf_no, bh_out); + + return error; +} + +/** + * get_next_leaf - Get next leaf + * @dip: The GFS inode + * @bh_in: The buffer + * @bh_out: + * + * Returns: 0 on success, error code otherwise + */ + +static int +get_next_leaf(struct gfs_inode *dip, struct buffer_head *bh_in, + struct buffer_head **bh_out) +{ + struct gfs_leaf *leaf; + int error; + + leaf = (struct gfs_leaf *)bh_in->b_data; + + if (!leaf->lf_next) + error = -ENOENT; + else + error = get_leaf(dip, gfs64_to_cpu(leaf->lf_next), bh_out); + + return error; +} + +/** + * linked_leaf_search - Linked leaf search + * @dip: The GFS inode + * @filename: The filename to search for + * @dent_out: + * @dent_prev: + * @bh_out: + * + * Returns: 0 on sucess, error code otherwise + */ + +static int +linked_leaf_search(struct gfs_inode *dip, struct qstr *filename, + struct gfs_dirent **dent_out, struct gfs_dirent **dent_prev, + struct buffer_head **bh_out) +{ + struct buffer_head *bh = NULL, *bh_next; + uint32_t hsize, index; + uint32_t hash; + int error; + + hsize = 1 << dip->i_di.di_depth; + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,); + + /* Figure out the address of the leaf node. */ + + hash = gfs_dir_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_first_leaf(dip, index, &bh_next); + if (error) + return error; + + /* Find the entry */ + + do { + if (bh) + brelse(bh); + + bh = bh_next; + + error = leaf_search(dip, bh, filename, dent_out, dent_prev); + switch (error) { + case 0: + *bh_out = bh; + return 0; + + case -ENOENT: + break; + + default: + brelse(bh); + return error; + } + + error = get_next_leaf(dip, bh, &bh_next); + } + while (!error); + + brelse(bh); + + return error; +} + +/** + * dir_make_exhash - Convet a stuffed directory into an ExHash directory + * @dip: The GFS inode + * + * Returns: 0 on success, error code otherwise + */ + +static int +dir_make_exhash(struct gfs_inode *dip) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_dirent *dent; + struct buffer_head *bh, *dibh; + struct gfs_leaf *leaf; + int y; + uint32_t x; + uint64_t *lp, bn; + int error; + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + /* Allocate a new block for the first leaf node */ + + error = gfs_metaalloc(dip, &bn); + if (error) + goto fail; + + /* Turn over a new leaf */ + + error = gfs_dread(sdp, bn, dip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh); + if (error) + goto fail; + + gfs_trans_add_bh(dip->i_gl, bh); + gfs_metatype_set(sdp, bh, GFS_METATYPE_LF, GFS_FORMAT_LF); + gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header)); + + /* Fill in the leaf structure */ + + leaf = (struct gfs_leaf *)bh->b_data; + + GFS_ASSERT_INODE(dip->i_di.di_entries < (1 << 16), dip,); + + leaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE); + leaf->lf_entries = cpu_to_gfs16(dip->i_di.di_entries); + + /* Copy dirents */ + + gfs_buffer_copy_tail(bh, sizeof(struct gfs_leaf), dibh, + sizeof(struct gfs_dinode)); + + /* Find last entry */ + + x = 0; + dirent_first(dip, bh, &dent); + + do { + if (!dent->de_inum.no_formal_ino) + continue; + if (++x == dip->i_di.di_entries) + break; + } + while (dirent_next(dip, bh, &dent) == 0); + + /* Adjust the last dirent's record length + (Remember that dent still points to the last entry.) */ + + dent->de_rec_len = gfs16_to_cpu(dent->de_rec_len) + + sizeof(struct gfs_dinode) - + sizeof(struct gfs_leaf); + dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len); + + brelse(bh); + + /* We're done with the new leaf block, now setup the new + hash table. */ + + gfs_trans_add_bh(dip->i_gl, dibh); + gfs_buffer_clear_tail(dibh, sizeof (struct gfs_dinode)); + + lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)); + + for (x = sdp->sd_hash_ptrs; x--; lp++) + *lp = cpu_to_gfs64(bn); + + dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; + dip->i_di.di_blocks++; + dip->i_di.di_flags |= GFS_DIF_EXHASH; + dip->i_di.di_payload_format = 0; + + for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; + dip->i_di.di_depth = y; + + gfs_dinode_out(&dip->i_di, dibh->b_data); + + brelse(dibh); + + return 0; + + fail: + brelse(dibh); + return error; +} + +/** + * dir_split_leaf - Split a leaf block into two + * @dip: The GFS inode + * @index: + * @leaf_no: + * + * Returns: 0 on success, error code on failure + */ + +static int +dir_split_leaf(struct gfs_inode *dip, uint32_t index, uint64_t leaf_no) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct buffer_head *nbh, *obh, *dibh; + struct gfs_leaf *nleaf, *oleaf; + struct gfs_dirent *dent, *prev = NULL, *next = NULL, *new; + uint32_t start, len, half_len, divider; + uint64_t bn, *lp; + uint32_t name_len; + int x, moved = FALSE; + int error; + + /* Allocate the new leaf block */ + + error = gfs_metaalloc(dip, &bn); + if (error) + return error; + + /* Get the new leaf block */ + + error = gfs_dread(sdp, bn, dip->i_gl, + DIO_NEW | DIO_START | DIO_WAIT, &nbh); + if (error) + return error; + + gfs_trans_add_bh(dip->i_gl, nbh); + gfs_metatype_set(sdp, nbh, GFS_METATYPE_LF, GFS_FORMAT_LF); + gfs_buffer_clear_tail(nbh, sizeof (struct gfs_meta_header)); + + nleaf = (struct gfs_leaf *)nbh->b_data; + + nleaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE); + + /* Get the old leaf block */ + + error = get_leaf(dip, leaf_no, &obh); + if (error) + goto fail; + + gfs_trans_add_bh(dip->i_gl, obh); + + oleaf = (struct gfs_leaf *)obh->b_data; + + /* Compute the start and len of leaf pointers in the hash table. */ + + len = 1 << (dip->i_di.di_depth - gfs16_to_cpu(oleaf->lf_depth)); + GFS_ASSERT_INODE(len != 1, dip,); + half_len = len >> 1; + + start = (index & ~(len - 1)); + + /* Change the pointers. + Don't bother distinguishing stuffed from non-stuffed. + This code is complicated enough already. */ + + lp = gmalloc(half_len * sizeof(uint64_t)); + + error = gfs_internal_read(dip, (char *)lp, start * sizeof(uint64_t), + half_len * sizeof(uint64_t)); + if (error != half_len * sizeof(uint64_t)) { + if (error >= 0) + error = -EIO; + goto fail_lpfree; + } + + /* Change the pointers */ + + for (x = 0; x < half_len; x++) + lp[x] = cpu_to_gfs64(bn); + + error = gfs_internal_write(dip, (char *)lp, start * sizeof(uint64_t), + half_len * sizeof(uint64_t)); + if (error != half_len * sizeof(uint64_t)) { + if (error >= 0) + error = -EIO; + goto fail_lpfree; + } + + kfree(lp); + + /* Compute the divider */ + + divider = (start + half_len) << (32 - dip->i_di.di_depth); + + /* Copy the entries */ + + dirent_first(dip, obh, &dent); + + do { + next = dent; + if (dirent_next(dip, obh, &next)) + next = NULL; + + if (dent->de_inum.no_formal_ino && + gfs32_to_cpu(dent->de_hash) < divider) { + name_len = gfs16_to_cpu(dent->de_name_len); + + error = gfs_dirent_alloc(dip, nbh, name_len, &new); + GFS_ASSERT_INODE(!error, dip,); + + new->de_inum = dent->de_inum; /* No endianness worries */ + new->de_hash = dent->de_hash; /* No endianness worries */ + new->de_type = dent->de_type; /* No endianness worries */ + memcpy((char *)(new + 1), (char *)(dent + 1), + name_len); + + nleaf->lf_entries = gfs16_to_cpu(nleaf->lf_entries) + 1; + nleaf->lf_entries = cpu_to_gfs16(nleaf->lf_entries); + + dirent_del(dip, obh, prev, dent); + + GFS_ASSERT_INODE(gfs16_to_cpu(oleaf->lf_entries), dip,); + oleaf->lf_entries = gfs16_to_cpu(oleaf->lf_entries) - 1; + oleaf->lf_entries = cpu_to_gfs16(oleaf->lf_entries); + + if (!prev) + prev = dent; + + moved = TRUE; + } else + prev = dent; + + dent = next; + } + while (dent); + + /* If none of the entries got moved into the new leaf, + artificially fill in the first entry. */ + + if (!moved) { + error = gfs_dirent_alloc(dip, nbh, 0, &new); + GFS_ASSERT_INODE(!error, dip,); + new->de_inum.no_formal_ino = 0; + } + + oleaf->lf_depth = gfs16_to_cpu(oleaf->lf_depth) + 1; + oleaf->lf_depth = cpu_to_gfs16(oleaf->lf_depth); + nleaf->lf_depth = oleaf->lf_depth; + + error = gfs_get_inode_buffer(dip, &dibh); + GFS_ASSERT_INODE(!error, dip,); /* Pinned in gfs_internal_write() */ + + dip->i_di.di_blocks++; + + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + brelse(obh); + brelse(nbh); + + return 0; + + fail_lpfree: + kfree(lp); + + brelse(obh); + + fail: + brelse(nbh); + return error; +} + +/** + * dir_double_exhash - Double size of ExHash table + * @dip: The GFS dinode + * + * Returns: 0 on success, error code on failure + */ + +static int +dir_double_exhash(struct gfs_inode *dip) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct buffer_head *dibh; + uint32_t hsize; + uint64_t *buf; + uint64_t *from, *to; + uint64_t block; + int x; + int error = 0; + + hsize = 1 << dip->i_di.di_depth; + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,); + + /* Allocate both the "from" and "to" buffers in one big chunk */ + + buf = gmalloc(3 * sdp->sd_hash_bsize); + + for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { + error = gfs_internal_read(dip, (char *)buf, + block * sdp->sd_hash_bsize, + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + + from = buf; + to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize); + + for (x = sdp->sd_hash_ptrs; x--; from++) { + *to++ = *from; /* No endianess worries */ + *to++ = *from; + } + + error = gfs_internal_write(dip, (char *)buf + sdp->sd_hash_bsize, + block * sdp->sd_sb.sb_bsize, + sdp->sd_sb.sb_bsize); + if (error != sdp->sd_sb.sb_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + } + + kfree(buf); + + error = gfs_get_inode_buffer(dip, &dibh); + GFS_ASSERT_INODE(!error, dip,); /* Pinned in gfs_internal_write() */ + + dip->i_di.di_depth++; + + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; + + fail: + kfree(buf); + + return error; +} + +/** + * compare_dents - compare directory entries by hash value + * @a: first dent + * @b: second dent + * + * When comparing the hash entries of @a to @b: + * gt: returns 1 + * lt: returns -1 + * eq: returns 0 + */ + +static int +compare_dents(const void *a, const void *b) +{ + struct gfs_dirent *dent_a, *dent_b; + uint32_t hash_a, hash_b; + int ret = 0; + + dent_a = *(struct gfs_dirent **)a; + hash_a = dent_a->de_hash; + hash_a = gfs32_to_cpu(hash_a); + + dent_b = *(struct gfs_dirent **)b; + hash_b = dent_b->de_hash; + hash_b = gfs32_to_cpu(hash_b); + + if (hash_a > hash_b) + ret = 1; + else if (hash_a < hash_b) + ret = -1; + else { + unsigned int len_a = gfs16_to_cpu(dent_a->de_name_len); + unsigned int len_b = gfs16_to_cpu(dent_b->de_name_len); + + if (len_a > len_b) + ret = 1; + else if (len_a < len_b) + ret = -1; + else + ret = memcmp((char *)(dent_a + 1), + (char *)(dent_b + 1), + len_a); + } + + return ret; +} + +/** + * do_filldir_main - read out directory entries + * @dip: The GFS inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @darr: an array of struct gfs_dirent pointers to read + * @entries: the number of entries in darr + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Jump through some hoops to make sure that if there are hash collsions, + * they are read out at the beginning of a buffer. We want to minimize + * the possibility that they will fall into different readdir buffers or + * that someone will want to seek to that location. + * + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir + */ + +static int +do_filldir_main(struct gfs_inode *dip, uint64_t *offset, + void *opaque, gfs_filldir_t filldir, + struct gfs_dirent **darr, uint32_t entries, int *copied) +{ + struct gfs_dirent *dent, *dent_next; + struct gfs_inum inum; + uint64_t off, off_next; + unsigned int x, y; + int run = FALSE; + int error = 0; + + gfs_sort(darr, entries, sizeof(struct gfs_dirent *), compare_dents); + + dent_next = darr[0]; + off_next = gfs32_to_cpu(dent_next->de_hash); + off_next = gfs_dir_hash2offset(off_next); + + for (x = 0, y = 1; x < entries; x++, y++) { + dent = dent_next; + off = off_next; + + if (y < entries) { + dent_next = darr[y]; + off_next = gfs32_to_cpu(dent_next->de_hash); + off_next = gfs_dir_hash2offset(off_next); + + if (off < *offset) + continue; + *offset = off; + + if (off_next == off) { + if (*copied && !run) + return 1; + run = TRUE; + } else + run = FALSE; + } else { + if (off < *offset) + continue; + *offset = off; + } + + gfs_inum_in(&inum, (char *)&dent->de_inum); + + error = filldir(opaque, (char *)(dent + 1), + gfs16_to_cpu(dent->de_name_len), + off, &inum, + gfs16_to_cpu(dent->de_type)); + if (error) + return 1; + + *copied = TRUE; + } + + /* Increment the *offset by one, so the next time we come into the do_filldir fxn, + we get the next entry instead of the last one in the current leaf */ + + (*offset)++; + + return 0; +} + +/** + * do_filldir_single - Read directory entries out of a single block + * @dip: The GFS inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @bh: the block + * @entries: the number of entries in the block + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir + */ + +static int +do_filldir_single(struct gfs_inode *dip, uint64_t *offset, + void *opaque, gfs_filldir_t filldir, + struct buffer_head *bh, uint32_t entries, int *copied) +{ + struct gfs_dirent **darr; + struct gfs_dirent *de; + unsigned int e = 0; + int error = 0; + + if (!entries) + return 0; + + darr = gmalloc(entries * sizeof(struct gfs_dirent *)); + + dirent_first(dip, bh, &de); + do { + if (!de->de_inum.no_formal_ino) + continue; + darr[e++] = de; + } + while (dirent_next(dip, bh, &de) == 0); + + GFS_ASSERT_INODE(e == entries, dip,); + + error = do_filldir_main(dip, offset, opaque, filldir, darr, + entries, copied); + + kfree(darr); + + return error; +} + +/** + * do_filldir_multi - Read directory entries out of a linked leaf list + * @dip: The GFS inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @bh: the first leaf in the list + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir + */ + +static int +do_filldir_multi(struct gfs_inode *dip, uint64_t *offset, + void *opaque, gfs_filldir_t filldir, + struct buffer_head *bh, int *copied) +{ + struct buffer_head **larr = NULL; + struct gfs_dirent **darr; + struct gfs_leaf *leaf; + struct buffer_head *tmp_bh; + struct gfs_dirent *de; + unsigned int entries, e = 0; + unsigned int leaves = 0, l = 0; + unsigned int x; + uint64_t ln; + int error = 0; + + /* Count leaves and entries */ + + leaf = (struct gfs_leaf *)bh->b_data; + entries = gfs16_to_cpu(leaf->lf_entries); + ln = leaf->lf_next; + + while (ln) { + ln = gfs64_to_cpu(ln); + + error = get_leaf(dip, ln, &tmp_bh); + if (error) + return error; + + leaf = (struct gfs_leaf *)tmp_bh->b_data; + if (leaf->lf_entries) { + entries += gfs16_to_cpu(leaf->lf_entries); + leaves++; + } + ln = leaf->lf_next; + + brelse(tmp_bh); + } + + /* Bail out if there's nothing to do */ + + if (!entries) + return 0; + + /* Alloc arrays */ + + if (leaves) + larr = gmalloc(leaves * sizeof(struct buffer_head *)); + + darr = gmalloc(entries * sizeof(struct gfs_dirent *)); + + /* Fill in arrays */ + + leaf = (struct gfs_leaf *)bh->b_data; + if (leaf->lf_entries) { + dirent_first(dip, bh, &de); + do { + if (!de->de_inum.no_formal_ino) + continue; + darr[e++] = de; + } + while (dirent_next(dip, bh, &de) == 0); + } + ln = leaf->lf_next; + + while (ln) { + ln = gfs64_to_cpu(ln); + + error = get_leaf(dip, ln, &tmp_bh); + if (error) + goto out; + + leaf = (struct gfs_leaf *)tmp_bh->b_data; + if (leaf->lf_entries) { + dirent_first(dip, tmp_bh, &de); + do { + if (!de->de_inum.no_formal_ino) + continue; + darr[e++] = de; + } + while (dirent_next(dip, tmp_bh, &de) == 0); + + larr[l++] = tmp_bh; + + ln = leaf->lf_next; + } else { + ln = leaf->lf_next; + brelse(tmp_bh); + } + } + + GFS_ASSERT_INODE(l == leaves, dip,); + GFS_ASSERT_INODE(e == entries, dip,); + + /* Do work */ + + error = do_filldir_main(dip, offset, opaque, filldir, darr, + entries, copied); + + /* Clean up */ + + out: + kfree(darr); + + for (x = 0; x < l; x++) + brelse(larr[x]); + + if (leaves) + kfree(larr); + + return error; +} + +/** + * dir_e_search - + * @dip: The GFS inode + * @filename: + * @inode: + * + * Returns: + */ + +static int +dir_e_search(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int *type) +{ + struct buffer_head *bh; + struct gfs_dirent *dent; + int error; + + error = linked_leaf_search(dip, filename, &dent, NULL, &bh); + if (error) + return error; + + if (inum) + gfs_inum_in(inum, (char *)&dent->de_inum); + if (type) + *type = gfs16_to_cpu(dent->de_type); + + brelse(bh); + + return 0; +} + +/** + * dir_e_add - + * @dip: The GFS inode + * @filename: + * @inode: + * @type: + * + */ + +static int +dir_e_add(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int type) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct buffer_head *bh, *nbh, *dibh; + struct gfs_leaf *leaf, *nleaf; + struct gfs_dirent *dent; + uint32_t hsize, index; + uint32_t hash; + uint64_t leaf_no, bn; + int error; + + restart: + hsize = 1 << dip->i_di.di_depth; + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,); + + /* Figure out the address of the leaf node. */ + + hash = gfs_dir_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_leaf_nr(dip, index, &leaf_no); + if (error) + return error; + + /* Add entry to the leaf */ + + for (;;) { + error = get_leaf(dip, leaf_no, &bh); + if (error) + return error; + + leaf = (struct gfs_leaf *)bh->b_data; + + if (gfs_dirent_alloc(dip, bh, filename->len, &dent)) { + + if (gfs16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) { + /* Can we split the leaf? */ + + brelse(bh); + + error = dir_split_leaf(dip, index, leaf_no); + if (error) + return error; + + goto restart; + + } else if (dip->i_di.di_depth < GFS_DIR_MAX_DEPTH) { + /* Can we double the hash table? */ + + brelse(bh); + + error = dir_double_exhash(dip); + if (error) + return error; + + goto restart; + + } else if (leaf->lf_next) { + /* Can we try the next leaf in the list? */ + leaf_no = gfs64_to_cpu(leaf->lf_next); + brelse(bh); + continue; + + } else { + /* Create a new leaf and add it to the list. */ + + error = gfs_metaalloc(dip, &bn); + if (error) { + brelse(bh); + return error; + } + + error = gfs_dread(sdp, bn, dip->i_gl, + DIO_NEW | DIO_START | DIO_WAIT, + &nbh); + if (error) { + brelse(bh); + return error; + } + + gfs_trans_add_bh(dip->i_gl, nbh); + gfs_metatype_set(sdp, nbh, GFS_METATYPE_LF, + GFS_FORMAT_LF); + gfs_buffer_clear_tail(nbh, + sizeof(struct gfs_meta_header)); + + gfs_trans_add_bh(dip->i_gl, bh); + leaf->lf_next = cpu_to_gfs64(bn); + + nleaf = (struct gfs_leaf *)nbh->b_data; + nleaf->lf_depth = leaf->lf_depth; + nleaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE); + + if (gfs_dirent_alloc(dip, nbh, filename->len, &dent)) + GFS_ASSERT_INODE(FALSE, dip,); + + dip->i_di.di_blocks++; + + brelse(bh); + + bh = nbh; + leaf = nleaf; + } + } + + /* If the gfs_dirent_alloc() succeeded, it pinned the "bh". */ + + gfs_inum_out(inum, (char *)&dent->de_inum); + dent->de_hash = cpu_to_gfs32(hash); + dent->de_type = cpu_to_gfs16(type); + memcpy((char *)(dent + 1), filename->name, filename->len); + + leaf->lf_entries = gfs16_to_cpu(leaf->lf_entries) + 1; + leaf->lf_entries = cpu_to_gfs16(leaf->lf_entries); + + brelse(bh); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + dip->i_di.di_entries++; + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(dip->i_gl, dibh); + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; + } + + return -ENOENT; +} + +/** + * dir_e_del - + * @dip: The GFS inode + * @filename: + * + * Returns: + */ + +static int +dir_e_del(struct gfs_inode *dip, struct qstr *filename) +{ + struct buffer_head *bh, *dibh; + struct gfs_dirent *dent, *prev; + struct gfs_leaf *leaf; + unsigned int entries; + int error; + + error = linked_leaf_search(dip, filename, &dent, &prev, &bh); + GFS_ASSERT_INODE(error != -ENOENT, dip,); + if (error) + return error; + + dirent_del(dip, bh, prev, dent); /* Pins bh */ + + leaf = (struct gfs_leaf *)bh->b_data; + entries = gfs16_to_cpu(leaf->lf_entries); + GFS_ASSERT_INODE(entries, dip,); + entries--; + leaf->lf_entries = cpu_to_gfs16(entries); + + brelse(bh); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + GFS_ASSERT_INODE(dip->i_di.di_entries, dip,); + dip->i_di.di_entries--; + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(dip->i_gl, dibh); + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * dir_e_read - Reads the entries from a directory into a filldir buffer + * @dip: dinode pointer + * @offset: the hash of the last entry read shifted to the right once + * @opaque: buffer for the filldir function to fill + * @filldir: points to the filldir function to use + * + */ + +static int +dir_e_read(struct gfs_inode *dip, uint64_t *offset, void *opaque, + gfs_filldir_t filldir) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct buffer_head *bh; + struct gfs_leaf leaf; + uint32_t hsize, len; + uint32_t ht_offset, lp_offset, ht_offset_cur = -1; + uint32_t hash, index; + uint64_t *lp; + int copied = FALSE; + int error = 0; + + hsize = 1 << dip->i_di.di_depth; + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,); + + hash = gfs_dir_offset2hash(*offset); + index = hash >> (32 - dip->i_di.di_depth); + + lp = gmalloc(sdp->sd_hash_bsize); + + while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs_internal_read(dip, (char *)lp, + ht_offset * sizeof(uint64_t), + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + error = get_leaf(dip, gfs64_to_cpu(lp[lp_offset]), &bh); + if (error) + goto out; + + gfs_leaf_in(&leaf, bh->b_data); + + if (leaf.lf_next) + error = do_filldir_multi(dip, offset, + opaque, filldir, + bh, &copied); + else + error = do_filldir_single(dip, offset, + opaque, filldir, + bh, leaf.lf_entries, + &copied); + + brelse(bh); + + if (error) { + if (error > 0) + error = 0; + goto out; + } + + len = 1 << (dip->i_di.di_depth - leaf.lf_depth); + index = (index & ~(len - 1)) + len; + } + + out: + kfree(lp); + + return error; +} + +/** + * dir_e_mvino - + * @dip: The GFS inode + * @filename: + * @new_inode: + * + * Returns: + */ + +static int +dir_e_mvino(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int new_type) +{ + struct buffer_head *bh, *dibh; + struct gfs_dirent *dent; + int error; + + error = linked_leaf_search(dip, filename, &dent, NULL, &bh); + GFS_ASSERT_INODE(error != -ENOENT, dip,); + if (error) + return error; + + gfs_trans_add_bh(dip->i_gl, bh); + + gfs_inum_out(inum, (char *)&dent->de_inum); + dent->de_type = cpu_to_gfs16(new_type); + + brelse(bh); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(dip->i_gl, dibh); + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * dir_l_search - + * @dip: The GFS inode + * @filename: + * @inode: + * + * Returns: + */ + +static int +dir_l_search(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int *type) +{ + struct buffer_head *dibh; + struct gfs_dirent *dent; + int error; + + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + error = leaf_search(dip, dibh, filename, &dent, NULL); + if (!error) { + if (inum) + gfs_inum_in(inum, (char *)&dent->de_inum); + if (type) + *type = gfs16_to_cpu(dent->de_type); + } + + brelse(dibh); + + return error; +} + +/** + * dir_l_add - + * @dip: The GFS inode + * @filename: + * @inode: + * @type: + * + * Returns: + */ + +static int +dir_l_add(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int type) +{ + struct buffer_head *dibh; + struct gfs_dirent *dent; + int error; + + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + if (gfs_dirent_alloc(dip, dibh, filename->len, &dent)) { + brelse(dibh); + + error = dir_make_exhash(dip); + if (!error) + error = dir_e_add(dip, filename, inum, type); + + return error; + } + + /* gfs_dirent_alloc() pins */ + + gfs_inum_out(inum, (char *)&dent->de_inum); + dent->de_hash = gfs_dir_hash(filename->name, filename->len); + dent->de_hash = cpu_to_gfs32(dent->de_hash); + dent->de_type = cpu_to_gfs16(type); + memcpy((char *)(dent + 1), filename->name, filename->len); + + dip->i_di.di_entries++; + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * dir_l_del - + * @dip: The GFS inode + * @filename: + * + * Returns: + */ + +static int +dir_l_del(struct gfs_inode *dip, struct qstr *filename) +{ + struct buffer_head *dibh; + struct gfs_dirent *dent, *prev; + int error; + + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + error = leaf_search(dip, dibh, filename, &dent, &prev); + GFS_ASSERT_INODE(!error, dip,); + + dirent_del(dip, dibh, prev, dent); + + /* dirent_del() pins */ + + GFS_ASSERT_INODE(dip->i_di.di_entries, dip,); + dip->i_di.di_entries--; + + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs_dinode_out(&dip->i_di, dibh->b_data); + + brelse(dibh); + + return 0; +} + +/** + * dir_l_read - + * @dip: + * @offset: + * @opaque: + * @filldir: + * + * Returns: + */ + +static int +dir_l_read(struct gfs_inode *dip, uint64_t *offset, void *opaque, + gfs_filldir_t filldir) +{ + struct buffer_head *dibh; + int copied = FALSE; + int error; + + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,); + + if (!dip->i_di.di_entries) + return 0; + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + error = do_filldir_single(dip, offset, + opaque, filldir, + dibh, dip->i_di.di_entries, + &copied); + if (error > 0) + error = 0; + + brelse(dibh); + + return error; +} + +/** + * dir_l_mvino - + * @dip: + * @filename: + * @new_inode: + * + * Returns: + */ + +static int +dir_l_mvino(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int new_type) +{ + struct buffer_head *dibh; + struct gfs_dirent *dent; + int error; + + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,); + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + return error; + + error = leaf_search(dip, dibh, filename, &dent, NULL); + GFS_ASSERT_INODE(!error, dip,); + + gfs_trans_add_bh(dip->i_gl, dibh); + + gfs_inum_out(inum, (char *)&dent->de_inum); + dent->de_type = cpu_to_gfs16(new_type); + + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs_dinode_out(&dip->i_di, dibh->b_data); + + brelse(dibh); + + return 0; +} + +/** + * gfs_dir_search - Search a directory + * @dip: The GFS inode + * @filename: + * @inode: + * + * This routine searches a directory for a file or another directory. + * Assumes a glock is held on dip. + * + * Returns: Inode number if found, -EXXXX on failure. + */ + +int +gfs_dir_search(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int *type) +{ + int error; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + if (dip->i_di.di_flags & GFS_DIF_EXHASH) + error = dir_e_search(dip, filename, inum, type); + else + error = dir_l_search(dip, filename, inum, type); + + return error; +} + +/** + * gfs_dir_add - Add new filename into directory + * @dip: The GFS inode + * @filename: The new name + * @inode: The inode number of the entry + * @type: The type of the entry + * + * Returns: 0 on success, error code on failure + */ + +int +gfs_dir_add(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int type) +{ + int error; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + if (dip->i_di.di_flags & GFS_DIF_EXHASH) + error = dir_e_add(dip, filename, inum, type); + else + error = dir_l_add(dip, filename, inum, type); + + return error; +} + +/** + * gfs_dir_del - Delete a directory entry + * @dip: The GFS inode + * @filename: The filename + * + * Returns: 0 on success, error code on failure + */ + +int +gfs_dir_del(struct gfs_inode *dip, struct qstr *filename) +{ + int error; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + if (dip->i_di.di_flags & GFS_DIF_EXHASH) + error = dir_e_del(dip, filename); + else + error = dir_l_del(dip, filename); + + return error; +} + +/** + * gfs_dir_read - Translate a GFS filename + * @dip: The GFS inode + * @offset: + * @opaque: + * @filldir: + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_dir_read(struct gfs_inode *dip, uint64_t * offset, void *opaque, + gfs_filldir_t filldir) +{ + int error; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + if (dip->i_di.di_flags & GFS_DIF_EXHASH) + error = dir_e_read(dip, offset, opaque, filldir); + else + error = dir_l_read(dip, offset, opaque, filldir); + + return error; +} + +/** + * gfs_dir_mvino - Change inode number of directory entry + * @dip: The GFS inode + * @filename: + * @new_inode: + * + * This routine changes the inode number of a directory entry. It's used + * by rename to change ".." when a directory is moved. + * Assumes a glock is held on dvp. + * + * Returns: 0 on success, -EXXXX on failure + */ + +int +gfs_dir_mvino(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int new_type) +{ + int error; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + if (dip->i_di.di_flags & GFS_DIF_EXHASH) + error = dir_e_mvino(dip, filename, inum, new_type); + else + error = dir_l_mvino(dip, filename, inum, new_type); + + return error; +} + +/** + * foreach_leaf - call a function for each leaf in a directory + * @dip: the directory + * @lc: the function to call for each each + * @data: private data to pass to it + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +foreach_leaf(struct gfs_inode *dip, leaf_call_t lc, void *data) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct buffer_head *bh; + struct gfs_leaf leaf; + uint32_t hsize, len; + uint32_t ht_offset, lp_offset, ht_offset_cur = -1; + uint32_t index = 0; + uint64_t *lp; + uint64_t leaf_no; + int error = 0; + + GFS_ASSERT_INODE(dip->i_di.di_flags & GFS_DIF_EXHASH, dip,); + hsize = 1 << dip->i_di.di_depth; + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,); + + lp = gmalloc(sdp->sd_hash_bsize); + + while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs_internal_read(dip, (char *)lp, + ht_offset * sizeof(uint64_t), + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + leaf_no = gfs64_to_cpu(lp[lp_offset]); + if (leaf_no) { + error = get_leaf(dip, leaf_no, &bh); + if (error) + goto out; + gfs_leaf_in(&leaf, bh->b_data); + brelse(bh); + + len = 1 << (dip->i_di.di_depth - leaf.lf_depth); + + error = lc(dip, index, len, leaf_no, data); + if (error) + goto out; + + index = (index & ~(len - 1)) + len; + } else + index++; + } + + GFS_ASSERT_INODE(index == hsize, dip,); + + out: + kfree(lp); + + return error; +} + +/** + * leaf_free - Deallocate a directory leaf + * @dip: the directory + * @index: the hash table offset in the directory + * @len: the number of pointers to this leaf + * @leaf_no: the leaf number + * @data: not used + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +leaf_free(struct gfs_inode *dip, + uint32_t index, uint32_t len, + uint64_t leaf_no, void *data) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_holder ri_gh; + struct gfs_leaf tmp_leaf; + struct gfs_rgrp_list rlist; + struct buffer_head *bh, *dibh; + uint64_t blk; + unsigned int rg_blocks = 0; + char *ht; + unsigned int x, size = len * sizeof(uint64_t); + int error; + + memset(&rlist, 0, sizeof(struct gfs_rgrp_list)); + + ht = gmalloc(size); + memset(ht, 0, size); + + gfs_alloc_get(dip); + + error = gfs_quota_hold_m(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + goto fail_qs; + + /* Count the number of leaves */ + + for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) { + error = get_leaf(dip, blk, &bh); + if (error) + goto fail_rlist; + gfs_leaf_in(&tmp_leaf, (bh)->b_data); + brelse(bh); + + gfs_rlist_add(sdp, &rlist, blk); + } + + gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); + + error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); + if (error) + goto fail_rlist; + + for (x = 0; x < rlist.rl_rgrps; x++) { + struct gfs_rgrpd *rgd; + rgd = gl2rgd(rlist.rl_ghs[x].gh_gl); + rg_blocks += rgd->rd_ri.ri_length; + } + + /* Trans may require: + All the bitmaps that were reserved. + One block for the dinode. + All the hash blocks that will be changed. + One block for a quota change. */ + + error = gfs_trans_begin(sdp, + rg_blocks + 1 + (DIV_RU(size, sdp->sd_jbsize) + 1), + 1); + if (error) + goto fail_rg_gunlock; + + for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) { + error = get_leaf(dip, blk, &bh); + if (error) + goto fail_end_trans; + gfs_leaf_in(&tmp_leaf, bh->b_data); + brelse(bh); + + gfs_metafree(dip, blk, 1); + + dip->i_di.di_blocks--; + } + + error = gfs_internal_write(dip, ht, index * sizeof(uint64_t), size); + if (error != size) { + if (error >= 0) + error = -EIO; + goto fail_end_trans; + } + + error = gfs_get_inode_buffer(dip, &dibh); + if (error) + goto fail_end_trans; + + gfs_trans_add_bh(dip->i_gl, dibh); + gfs_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + gfs_trans_end(sdp); + + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + gfs_rlist_free(&rlist); + gfs_glock_dq_uninit(&ri_gh); + gfs_quota_unhold_m(dip); + gfs_alloc_put(dip); + kfree(ht); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_rg_gunlock: + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + + fail_rlist: + gfs_rlist_free(&rlist); + gfs_glock_dq_uninit(&ri_gh); + + fail_qs: + gfs_quota_unhold_m(dip); + + fail: + gfs_alloc_put(dip); + kfree(ht); + + return error; +} + +/** + * gfs_dir_exhash_free - free all the leaf block in a directory + * @dip: the directory + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_dir_exhash_free(struct gfs_inode *dip) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct buffer_head *bh; + int error; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + error = foreach_leaf(dip, leaf_free, NULL); + if (error) + return error; + + /* Make this a regular file in case we crash. + (We don't want to free these blocks a second time.) */ + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + return error; + + error = gfs_get_inode_buffer(dip, &bh); + if (error) + goto fail; + + gfs_trans_add_bh(dip->i_gl, bh); + ((struct gfs_dinode *)bh->b_data)->di_type = cpu_to_gfs16(GFS_FILE_REG); + + brelse(bh); + + gfs_trans_end(sdp); + + return 0; + + fail: + gfs_trans_end(sdp); + return error; +} + +/** + * gfs_diradd_alloc_required - figure out if an entry addition is going to require an allocation + * @ip: the file being written to + * @filname: the filename that's going to be added + * @alloc_required: the int is set to TRUE if an alloc is required, FALSE otherwise + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_diradd_alloc_required(struct gfs_inode *dip, struct qstr *filename, + int *alloc_required) +{ + struct buffer_head *bh = NULL, *bh_next; + uint32_t hsize, hash, index; + int error = 0; + + *alloc_required = FALSE; + + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + + if (dip->i_di.di_flags & GFS_DIF_EXHASH) { + hsize = 1 << dip->i_di.di_depth; + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, + dip,); + + hash = gfs_dir_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_first_leaf(dip, index, &bh_next); + if (error) + return error; + + do { + if (bh) + brelse(bh); + + bh = bh_next; + + if (dirent_fits(dip, bh, filename->len)) + break; + + error = get_next_leaf(dip, bh, &bh_next); + if (error == -ENOENT) { + *alloc_required = TRUE; + error = 0; + break; + } + } + while (!error); + + brelse(bh); + } else { + error = gfs_get_inode_buffer(dip, &bh); + if (error) + return error; + + if (!dirent_fits(dip, bh, filename->len)) + *alloc_required = TRUE; + + brelse(bh); + } + + return error; +} + +/** + * do_gdm - copy out one leaf (or list of leaves) + * @dip: the directory + * @index: the hash table offset in the directory + * @len: the number of pointers to this leaf + * @leaf_no: the leaf number + * @data: a pointer to a struct gfs_user_buffer structure + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +do_gdm(struct gfs_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no, + void *data) +{ + struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data; + struct gfs_leaf leaf; + struct buffer_head *bh; + uint64_t blk; + int error = 0; + + for (blk = leaf_no; blk; blk = leaf.lf_next) { + error = get_leaf(dip, blk, &bh); + if (error) + break; + + gfs_leaf_in(&leaf, bh->b_data); + + error = gfs_add_bh_to_ub(ub, bh); + + brelse(bh); + + if (error) + break; + } + + return error; +} + +/** + * gfs_get_dir_meta - return all the leaf blocks of a directory + * @dip: the directory + * @ub: the structure representing the meta + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_get_dir_meta(struct gfs_inode *dip, struct gfs_user_buffer *ub) +{ + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,); + return foreach_leaf(dip, do_gdm, ub); +} diff -urN linux-orig/fs/gfs/dir.h linux-patched/fs/gfs/dir.h --- linux-orig/fs/gfs/dir.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/dir.h 2004-06-30 13:27:49.335712986 -0500 @@ -0,0 +1,55 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __DIR_DOT_H__ +#define __DIR_DOT_H__ + +/** + * gfs_filldir_t - Report a directory entry to the caller of gfs_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * Returns: 0 on success, 1 if buffer full + */ + +typedef int (*gfs_filldir_t) (void *opaque, + const char *name, unsigned int length, + uint64_t offset, + struct gfs_inum *inum, unsigned int type); + +int gfs_filecmp(struct qstr *file1, char *file2, int len_of_file2); +int gfs_dirent_alloc(struct gfs_inode *dip, struct buffer_head *bh, + int name_len, struct gfs_dirent **dent_out); + +int gfs_dir_search(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int *type); +int gfs_dir_add(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *inum, unsigned int type); +int gfs_dir_del(struct gfs_inode *dip, struct qstr *filename); +int gfs_dir_read(struct gfs_inode *dip, uint64_t * offset, void *opaque, + gfs_filldir_t filldir); +int gfs_dir_mvino(struct gfs_inode *dip, struct qstr *filename, + struct gfs_inum *new_inum, unsigned int new_type); + +int gfs_dir_exhash_free(struct gfs_inode *dip); + +int gfs_diradd_alloc_required(struct gfs_inode *dip, struct qstr *filename, + int *alloc_required); + +int gfs_get_dir_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub); + +#endif /* __DIR_DOT_H__ */ diff -urN linux-orig/fs/gfs/eattr.c linux-patched/fs/gfs/eattr.c --- linux-orig/fs/gfs/eattr.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/eattr.c 2004-06-30 13:27:49.337712522 -0500 @@ -0,0 +1,2340 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "acl.h" +#include "dio.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "ioctl.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +#define GFS_EA_REC_LEN(x) gfs32_to_cpu((x)->ea_rec_len) +#define GFS_EA_NAME(x) ((char *)(x) + sizeof(struct gfs_ea_header)) +#define GFS_EA_DATA_PTRS(x) ((uint64_t *)((char *)(x) + sizeof(struct gfs_ea_header) + (((x)->ea_name_len + 7) & ~7))) + +#define GFS_EA_NEXT(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_REC_LEN(x)) +#define GFS_EA_FREESPACE(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_SIZE(x)) + +#define GFS_EAREQ_IS_STUFFED(x, y) (((sizeof(struct gfs_ea_header) + (x)->es_data_len + (x)->es_name_len + 7) & ~7) <= y) + +#define GFS_EADATA_NUM_PTRS(x, y) (((x) + (y) - 1) / (y)) + +#define GFS_EA_SIZE(x) ((sizeof(struct gfs_ea_header) + (x)->ea_name_len + (GFS_EA_IS_UNSTUFFED(x)? (8 * (x)->ea_num_ptrs) : GFS_EA_DATA_LEN(x)) + 7) & ~ 7) + +#define GFS_EACMD_VALID(x) ((x) <= GFS_EACMD_REMOVE) + +#define GFS_EA_IS_LAST(x) ((x)->ea_flags & GFS_EAFLAG_LAST) + +#define GFS_EA_STRLEN(x) ((x)->ea_name_len + 1 + (((x)->ea_type == GFS_EATYPE_USR)? 5 : 7)) + +#define GFS_FIRST_EA(x) ((struct gfs_ea_header *) ((x)->b_data + sizeof(struct gfs_meta_header))) + +#define EA_ALLOC 1 +#define EA_DEALLOC 2 + +static struct buffer_head *alloc_eattr_blk(struct gfs_sbd *sdp, + struct gfs_inode *alloc_ip, + struct gfs_inode *ip, + uint64_t * block); + +/** + * can_replace - returns true if ea is large enough to hold the data in + * the request + */ + +static __inline__ int +can_replace(struct gfs_ea_header *ea, struct gfs_easet_io *req, + uint32_t avail_size) +{ + int data_space = + GFS_EA_REC_LEN(ea) - sizeof (struct gfs_ea_header) - + ea->ea_name_len; + + if (GFS_EAREQ_IS_STUFFED(req, avail_size) && !GFS_EA_IS_UNSTUFFED(ea)) + return (req->es_data_len <= data_space); + else + return (GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size) <= + ea->ea_num_ptrs); +} + +/** + * get_req_size - returns the acutal number of bytes the request will take up + * (not counting any unstuffed data blocks) + */ + +static __inline__ uint32_t +get_req_size(struct gfs_easet_io *req, uint32_t avail_size) +{ + uint32_t size = + ((sizeof (struct gfs_ea_header) + req->es_data_len + + req->es_name_len + 7) & ~7); + + if (size <= avail_size) + return size; + + return ((sizeof (struct gfs_ea_header) + req->es_name_len + 7) & ~7) + + (8 * GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size)); +} + +/** + * gfs_ea_write_permission - decides if the user has permission to write to + * the ea + * @req: the write request + * @ip: inode of file with the ea + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_ea_write_permission(struct gfs_easet_io *req, struct gfs_inode *ip) +{ + struct inode *inode = gfs_iget(ip, NO_CREATE); + int error = 0; + + GFS_ASSERT_INODE(inode, ip,); + + if (req->es_type == GFS_EATYPE_USR) { + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + error = -EPERM; + else { + error = permission(inode, MAY_WRITE, NULL); + if (error == -EACCES) + error = -EPERM; + } + } else if (req->es_type == GFS_EATYPE_SYS) { + if (IS_ACCESS_ACL(req->es_name, req->es_name_len)) + error = gfs_validate_acl(ip, req->es_data, + req->es_data_len, 1); + else if (IS_DEFAULT_ACL(req->es_name, req->es_name_len)) + error = gfs_validate_acl(ip, req->es_data, + req->es_data_len, 0); + else { + if (!capable(CAP_SYS_ADMIN)) + error = -EPERM; + } + } else + error = -EOPNOTSUPP; + + iput(inode); + + return error; +} + +/** + * gfs_ea_read_permission - decides if the user has permission to read from + * the ea + * @req: the read request + * @ip: inode of file with the ea + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip) +{ + struct inode *inode = gfs_iget(ip, NO_CREATE); + int error = 0; + + GFS_ASSERT_INODE(inode, ip,); + + if (req->eg_type == GFS_EATYPE_USR){ + error = permission(inode, MAY_READ, NULL); + if (error == -EACCES) + error = -EPERM; + } + else if (req->eg_type == GFS_EATYPE_SYS) { + if (IS_ACCESS_ACL(req->eg_name, req->eg_name_len) || + IS_DEFAULT_ACL(req->eg_name, req->eg_name_len)) + error = 0; + else{ + if (!capable(CAP_SYS_ADMIN)) + error = -EPERM; + } + } else + error = -EOPNOTSUPP; + + iput(inode); + + return error; +} + +/** + * gfs_es_memcpy - gfs memcpy wrapper with a return value + * + */ + +int +gfs_ea_memcpy(void *dest, void *src, unsigned long size) +{ + memcpy(dest, src, size); + return 0; +} + +/** + * gfs_ea_copy_to_user - copy_to_user wrapper + */ + +int +gfs_ea_copy_to_user(void *dest, void *src, unsigned long size) +{ + int error; + error = (copy_to_user(dest, src, size)) ? -EFAULT : 0; + return error; +} + +/** + * Returns: 1 if find_direct_eattr should stop checking (if the eattr was found + * location will be set) + * 0 if find_eattr should keep on checking + * -EXXX on error + */ +int +find_direct_eattr(struct gfs_inode *ip, uint64_t blkno, char *name, + int name_len, int type, struct gfs_ea_location *location) +{ + int err; + struct buffer_head *bh; + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_ea_header *curr, *prev = NULL; + + err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh); + if (err) + goto out; + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + curr = + (struct gfs_ea_header *) ((bh)->b_data + + sizeof (struct gfs_meta_header)); + if (curr->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_IS_LAST(curr)) + goto out_drelse; + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + prev = curr; + curr = GFS_EA_NEXT(curr); + } + if (type != curr->ea_type && ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + if (type == GFS_EATYPE_SYS) + err = 1; + goto out_drelse; + } + while (1) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + + if (type == curr->ea_type && name_len == curr->ea_name_len && + !memcmp(name, GFS_EA_NAME(curr), name_len)) { + location->bh = bh; + location->ea = curr; + location->prev = prev; + err = 1; + goto out; + } + if (GFS_EA_IS_LAST(curr)) + break; + prev = curr; + curr = GFS_EA_NEXT(curr); + } + + out_drelse: + brelse(bh); + + out: + return err; +} + +/** + * find_eattr - find a matching eattr + * + * Returns: 1 if ea found, 0 if no ea found, -EXXX on error + */ +int +find_eattr(struct gfs_inode *ip, char *name, int name_len, int type, + struct gfs_ea_location *location) +{ + int err; + struct buffer_head *bh; + struct gfs_sbd *sdp = ip->i_sbd; + uint64_t *eablk, *end; + + memset(location, 0, sizeof (struct gfs_ea_location)); + + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + err = + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, + DIO_START | DIO_WAIT, &bh); + if (err) + goto fail; + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN); + eablk = + (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect)); + end = + eablk + + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8); + while (eablk < end && *eablk) { + err = + find_direct_eattr(ip, gfs64_to_cpu(*eablk), name, + name_len, type, location); + if (err || location->ea) + break; + eablk++; + } + brelse(bh); + if (err < 0) + goto fail; + } else { + err = + find_direct_eattr(ip, ip->i_di.di_eattr, name, name_len, + type, location); + if (err < 0) + goto fail; + } + + return (location->ea != NULL); + + fail: + return err; +} + +static void +make_space(struct gfs_inode *ip, struct buffer_head *bh, uint32_t size, + uint64_t blkno, struct gfs_ea_location *avail) +{ + struct gfs_sbd *sdp = ip->i_sbd; + uint32_t free_size, avail_size; + struct gfs_ea_header *ea, *new_ea; + void *buf; + + free_size = 0; + avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + ea = GFS_FIRST_EA(bh); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,); + if (ea->ea_type == GFS_EATYPE_UNUSED) { + free_size = GFS_EA_REC_LEN(ea); + ea = GFS_EA_NEXT(ea); + } + while (free_size < size) { + free_size += (GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea)); + if (GFS_EA_IS_LAST(ea)) + break; + ea = GFS_EA_NEXT(ea); + } + if (free_size < size) + goto out; + buf = gmalloc(avail_size); + + free_size = avail_size; + ea = GFS_FIRST_EA(bh); + if (ea->ea_type == GFS_EATYPE_UNUSED) + ea = GFS_EA_NEXT(ea); + new_ea = (struct gfs_ea_header *) buf; + new_ea->ea_flags = 0; + new_ea->ea_rec_len = cpu_to_gfs32(size); + new_ea->ea_num_ptrs = 0; + new_ea->ea_type = GFS_EATYPE_UNUSED; + free_size -= size; + new_ea = GFS_EA_NEXT(new_ea); + while (1) { + memcpy(new_ea, ea, GFS_EA_SIZE(ea)); + if (GFS_EA_IS_LAST(ea)) + break; + new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea)); + free_size -= GFS_EA_SIZE(ea); + ea = GFS_EA_NEXT(ea); + new_ea = GFS_EA_NEXT(new_ea); + } + new_ea->ea_rec_len = cpu_to_gfs32(free_size); + memcpy(GFS_FIRST_EA(bh), buf, avail_size); + kfree(buf); + avail->ea = GFS_FIRST_EA(bh); + avail->prev = NULL; + avail->bh = bh; + + out: + return; +} + +static int +expand_to_indirect(struct gfs_inode *alloc_ip, struct gfs_inode *ip, + struct buffer_head **bh) +{ + int err; + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh1 = NULL, *bh2 = NULL, *indbh = NULL; + uint64_t blkno, *blkptr; + uint32_t free_size, avail_size; + struct gfs_ea_header *prev, *curr, *new_ea = NULL; + + avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + free_size = avail_size; + ip->i_di.di_flags |= GFS_DIF_EA_INDIRECT; + blkno = ip->i_di.di_eattr; + err = gfs_metaalloc(alloc_ip, &ip->i_di.di_eattr); + if (err) + goto out; + ip->i_di.di_blocks++; + err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_NEW | DIO_START | + DIO_WAIT, &indbh); + if (err) + goto out; + bh1 = *bh; + *bh = indbh; + gfs_trans_add_bh(ip->i_gl, indbh); + gfs_metatype_set(sdp, indbh, GFS_METATYPE_IN, GFS_FORMAT_IN); + memset((indbh)->b_data + sizeof (struct gfs_meta_header), 0, + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header)); + blkptr = (uint64_t *) ((indbh)->b_data + sizeof (struct gfs_indirect)); + *blkptr++ = cpu_to_gfs64(blkno); + prev = NULL; + curr = GFS_FIRST_EA(bh1); + while (curr->ea_type != GFS_EATYPE_USR) { + if (GFS_EA_IS_LAST(curr)) + goto out_drelse1; + free_size -= GFS_EA_REC_LEN(curr); + prev = curr; + curr = GFS_EA_NEXT(curr); + } + if (!prev || prev->ea_type == GFS_EATYPE_UNUSED) + goto out_drelse1; + gfs_trans_add_bh(ip->i_gl, bh1); + prev->ea_rec_len = cpu_to_gfs32(GFS_EA_REC_LEN(prev) + free_size); + prev->ea_flags |= GFS_EAFLAG_LAST; + bh2 = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno); + if (!bh2) { + err = -EIO; + goto out_drelse1; + } + free_size = avail_size; + new_ea = GFS_FIRST_EA(bh2); + while (1) { + memcpy(new_ea, curr, GFS_EA_SIZE(curr)); + if (GFS_EA_IS_LAST(curr)) + break; + new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(curr)); + free_size -= GFS_EA_SIZE(curr); + curr = GFS_EA_NEXT(curr); + new_ea = GFS_EA_NEXT(new_ea); + } + new_ea->ea_rec_len = cpu_to_gfs32(free_size); + *blkptr = cpu_to_gfs64(blkno); + brelse(bh2); + + out_drelse1: + brelse(bh1); + + out: + return err; +} + +static void +find_direct_sys_space(struct gfs_inode *ip, int size, struct buffer_head *bh, + struct gfs_ea_location *avail) +{ + struct gfs_ea_header *curr, *prev = NULL; + + curr = GFS_FIRST_EA(bh); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (curr->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_REC_LEN(curr) >= size) { + avail->ea = curr; + avail->prev = NULL; + avail->bh = bh; + goto out; + } + prev = curr; + curr = GFS_EA_NEXT(curr); + } + while (curr->ea_type == GFS_EATYPE_SYS) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) { + avail->ea = curr; + avail->prev = prev; + avail->bh = bh; + goto out; + } + if (GFS_EA_IS_LAST(curr)) + break; + prev = curr; + curr = GFS_EA_NEXT(curr); + } + make_space(ip, bh, size, ip->i_di.di_eattr, avail); + + out: + return; +} + +/** + * int find_indirect_space + * + * @space: + * @blktype: returns the type of block GFS_EATYPE_... + * + * returns 0 on success, -EXXX on failure + */ +static int +find_indirect_space(struct gfs_inode *ip, uint64_t blkno, int type, + int size, struct gfs_ea_location *avail, int *blktype) +{ + int err; + struct buffer_head *bh; + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_ea_header *curr, *prev = NULL; + + err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh); + if (err) + goto out; + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + curr = GFS_FIRST_EA(bh); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (curr->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_IS_LAST(curr)) { + avail->ea = curr; + avail->prev = NULL; + avail->bh = bh; + *blktype = GFS_EATYPE_UNUSED; + goto out; + } + prev = curr; + curr = GFS_EA_NEXT(curr); + } + if (type != curr->ea_type) { + *blktype = curr->ea_type; + goto out_drelse; + } else + *blktype = type; + if (prev && GFS_EA_REC_LEN(prev) >= size) { + avail->ea = prev; + avail->prev = NULL; + avail->bh = bh; + goto out; + } + while (1) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) { + avail->ea = curr; + avail->prev = prev; + avail->bh = bh; + goto out; + } + if (GFS_EA_IS_LAST(curr)) + break; + prev = curr; + curr = GFS_EA_NEXT(curr); + } + + out_drelse: + brelse(bh); + + out: + return err; +} + +static int +find_indirect_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, + int size, struct buffer_head *bh, + struct gfs_ea_location *avail) +{ + int err = 0; + struct gfs_sbd *sdp = ip->i_sbd; + uint64_t *eablk, *end, *first_usr_blk = NULL; + int blktype; + uint64_t blkno; + + eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect)); + end = + eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8); + + while (eablk < end && *eablk) { + err = + find_indirect_space(ip, gfs64_to_cpu(*eablk), + GFS_EATYPE_SYS, size, avail, &blktype); + if (err) + goto out; + if (blktype == GFS_EATYPE_USR && !first_usr_blk) + first_usr_blk = eablk; + if (avail->ea) { + if (!first_usr_blk) + goto out; + gfs_trans_add_bh(ip->i_gl, bh); + blkno = *eablk; + *eablk = *first_usr_blk; + *first_usr_blk = blkno; + goto out; + } + eablk++; + } + if (eablk >= end) { + err = -ENOSPC; + goto out; + } + avail->bh = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno); + if (!avail->bh) { + err = -EIO; + goto out; + } + avail->ea = GFS_FIRST_EA(avail->bh); + avail->prev = NULL; + gfs_trans_add_bh(ip->i_gl, bh); + if (first_usr_blk) { + *eablk = *first_usr_blk; + *first_usr_blk = cpu_to_gfs64(blkno); + } else + *eablk = cpu_to_gfs64(blkno); + + out: + return err; +} + +int +find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size, + struct gfs_ea_location *avail) +{ + int err; + struct buffer_head *bh; + struct gfs_sbd *sdp = ip->i_sbd; + + err = + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT, + &bh); + if (err) + goto out; + + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN); + err = find_indirect_sys_space(alloc_ip, ip, size, bh, avail); + } else { + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + find_direct_sys_space(ip, size, bh, avail); + if (!avail->ea) { + err = expand_to_indirect(alloc_ip, ip, &bh); + if (err) + goto out_drelse; + err = + find_indirect_sys_space(alloc_ip, ip, size, bh, + avail); + } + } + + out_drelse: + if (avail->bh != bh) + brelse(bh); + + out: + return err; +} + +static int +get_blk_type(struct gfs_inode *ip, uint64_t blkno, int *blktype) +{ + int err = 0; + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + struct gfs_ea_header *ea; + + err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh); + if (err) + goto out; + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + ea = GFS_FIRST_EA(bh); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,); + if (ea->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_IS_LAST(ea)) { + *blktype = GFS_EATYPE_UNUSED; + goto out_drelse; + } + ea = GFS_EA_NEXT(ea); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,); + } + *blktype = ea->ea_type; + + out_drelse: + brelse(bh); + + out: + return err; +} + +static void +find_direct_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh, + struct gfs_ea_location *avail) +{ + struct gfs_ea_header *curr, *prev = NULL; + + curr = GFS_FIRST_EA(bh); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (curr->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_IS_LAST(curr)) { + avail->ea = curr; + avail->prev = NULL; + avail->bh = bh; + goto out; + } + prev = curr; + curr = GFS_EA_NEXT(curr); + if (curr->ea_type == GFS_EATYPE_USR + && GFS_EA_REC_LEN(prev) >= size) { + avail->ea = prev; + avail->prev = NULL; + avail->bh = bh; + goto out; + } + } + while (curr->ea_type != GFS_EATYPE_USR) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (GFS_EA_IS_LAST(curr)) + break; + prev = curr; + curr = GFS_EA_NEXT(curr); + } + while (1) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) { + avail->ea = curr; + avail->prev = prev; + avail->bh = bh; + goto out; + } + if (GFS_EA_IS_LAST(curr)) + break; + prev = curr; + curr = GFS_EA_NEXT(curr); + } + + out: + return; +} + +static int +find_indirect_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh, + struct gfs_ea_location *avail) +{ + int err = 0; + struct gfs_sbd *sdp = ip->i_sbd; + uint64_t *eablk, *end, *last_sys_blk = NULL, *first_usr_blk = NULL; + int blktype; + uint64_t blkno; + + eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect)); + end = + eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8); + + while (eablk < end && *eablk) { + err = + find_indirect_space(ip, gfs64_to_cpu(*eablk), + GFS_EATYPE_USR, size, avail, &blktype); + if (err) + goto out; + if (blktype == GFS_EATYPE_SYS) + last_sys_blk = eablk; + if (blktype == GFS_EATYPE_USR && !first_usr_blk) + first_usr_blk = eablk; + if (avail->ea) { + if (first_usr_blk) + goto out; + first_usr_blk = eablk + 1; + while (first_usr_blk < end && *first_usr_blk) { + err = + get_blk_type(ip, + gfs64_to_cpu(*first_usr_blk), + &blktype); + if (blktype == GFS_EATYPE_SYS) + last_sys_blk = first_usr_blk; + if (blktype == GFS_EATYPE_USR) + break; + first_usr_blk++; + } + if (last_sys_blk > eablk) { + gfs_trans_add_bh(ip->i_gl, bh); + blkno = *eablk; + *eablk = *last_sys_blk; + *last_sys_blk = blkno; + } + goto out; + } + eablk++; + } + + if (eablk >= end) { + err = -ENOSPC; + goto out; + } + avail->bh = alloc_eattr_blk(sdp, ip, ip, &blkno); + if (!avail->bh) { + err = -EIO; + goto out; + } + avail->ea = GFS_FIRST_EA(avail->bh); + avail->prev = NULL; + gfs_trans_add_bh(ip->i_gl, bh); + *eablk = cpu_to_gfs64(blkno); + + out: + return err; +} + +static int +find_usr_space(struct gfs_inode *ip, int size, struct gfs_ea_location *avail) +{ + int err; + struct buffer_head *bh; + struct gfs_sbd *sdp = ip->i_sbd; + + err = + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT, + &bh); + if (err) + goto out; + + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN); + err = find_indirect_usr_space(ip, size, bh, avail); + } else { + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + find_direct_usr_space(ip, size, bh, avail); + if (!avail->ea) { + err = expand_to_indirect(ip, ip, &bh); + if (err) + goto out_drelse; + err = find_indirect_usr_space(ip, size, bh, avail); + } + } + + out_drelse: + if (avail->bh != bh) + brelse(bh); + + out: + return err; +} + +static int +find_space(struct gfs_inode *ip, int size, int type, + struct gfs_ea_location *avail) +{ + int err; + + memset(avail, 0, sizeof (struct gfs_ea_location)); + + if (type == GFS_EATYPE_SYS) + err = find_sys_space(ip, ip, size, avail); + else + err = find_usr_space(ip, size, avail); + + return err; +} + +static int +can_replace_in_block(struct gfs_inode *ip, int size, + struct gfs_ea_location found, struct gfs_ea_header **space) +{ + struct gfs_ea_header *curr, *prev = NULL; + + *space = NULL; + curr = GFS_FIRST_EA(found.bh); + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (curr->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_REC_LEN(curr) >= size) { + *space = curr; + goto out; + } + prev = curr; + curr = GFS_EA_NEXT(curr); + } + while (1) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,); + if (curr == found.ea) { + /* + * See if there will be enough space after the old version of the eattr + * is deleted. + */ + if (prev) { + if (prev->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_REC_LEN(prev) + + GFS_EA_REC_LEN(curr) >= size) { + *space = prev; + goto out; + } + } else if (GFS_EA_REC_LEN(prev) + + GFS_EA_REC_LEN(curr) >= + GFS_EA_SIZE(prev) + size) { + *space = prev; + goto out; + } + } else if (GFS_EA_REC_LEN(curr) >= size) { + *space = curr; + goto out; + } + } else if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) { + *space = curr; + goto out; + } + if (GFS_EA_IS_LAST(curr)) + break; + prev = curr; + curr = GFS_EA_NEXT(curr); + } + + out: + return (*space != NULL); +} + +/** + * read_unstuffed - actually copies the unstuffed data into the + * request buffer + */ + +int +read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp, + struct gfs_ea_header *ea, uint32_t avail_size, + gfs_ea_copy_fn_t copy_fn) +{ + struct buffer_head *bh[66]; /* This is the maximum number of data ptrs possible */ + int err = 0; + int max = GFS_EADATA_NUM_PTRS(GFS_EA_DATA_LEN(ea), avail_size); + int i, j, left = GFS_EA_DATA_LEN(ea); + char *outptr, *buf; + uint64_t *indptr = GFS_EA_DATA_PTRS(ea); + + for (i = 0; i < max; i++) { + err = + gfs_dread(sdp, gfs64_to_cpu(*indptr), ip->i_gl, DIO_START, + &bh[i]); + indptr++; + if (err) { + for (j = 0; j < i; j++) + brelse(bh[j]); + goto out; + } + } + + outptr = dest; + + for (i = 0; i < max; i++) { + err = gfs_dreread(sdp, bh[i], DIO_WAIT); + if (err) { + for (j = i; j < max; j++) + brelse(bh[j]); + goto out; + } + gfs_metatype_check(sdp, bh[i], GFS_METATYPE_EA); + buf = (bh[i])->b_data + sizeof (struct gfs_meta_header); + err = + copy_fn(outptr, buf, + (avail_size > left) ? left : avail_size); + if (err) { + for (j = i; j < max; j++) + brelse(bh[j]); + goto out; + } + left -= avail_size; + outptr += avail_size; + brelse(bh[i]); + } + + out: + + return err; +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ +int +get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req, + gfs_ea_copy_fn_t copy_fn) +{ + int err; + struct gfs_ea_location location; + uint32_t avail_size; + + avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + + err = find_eattr(ip, req->eg_name, req->eg_name_len, req->eg_type, + &location); + if (err != 1) { + if (err == 0) + err = -ENODATA; + goto out; + } + + if (req->eg_data_len) { + if (req->eg_data_len < GFS_EA_DATA_LEN(location.ea)) + err = -ERANGE; + else if (GFS_EA_IS_UNSTUFFED(location.ea)) + err = + read_unstuffed(req->eg_data, ip, sdp, location.ea, + avail_size, copy_fn); + else + err = copy_fn(req->eg_data, GFS_EA_DATA(location.ea), + GFS_EA_DATA_LEN(location.ea)); + if (!err) + err = GFS_EA_DATA_LEN(location.ea); + } else + err = GFS_EA_DATA_LEN(location.ea); + + brelse(location.bh); + + out: + return err; +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +struct gfs_ea_header * +prep_ea(struct gfs_ea_header *ea) +{ + struct gfs_ea_header *new = ea; + + if (ea->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_IS_LAST(ea)) + ea->ea_flags = GFS_EAFLAG_LAST; + else + ea->ea_flags = 0; + } else { + new = GFS_EA_FREESPACE(ea); + new->ea_rec_len = + cpu_to_gfs32(GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea)); + ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea)); + if (GFS_EA_IS_LAST(ea)) { + ea->ea_flags &= ~GFS_EAFLAG_LAST; + new->ea_flags = GFS_EAFLAG_LAST; + } else + new->ea_flags = 0; + } + + return new; +} + +/** + * replace_ea - replaces the existing data with the request data + */ +int +replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_ea_header *ea, + struct gfs_easet_io *req) +{ + int err = 0; + int i; + uint32_t copy_size, data_left = req->es_data_len; + struct buffer_head *bh; + uint64_t *datablk = GFS_EA_DATA_PTRS(ea); + const char *dataptr = req->es_data; + uint32_t avail_size = + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + + ea->ea_data_len = cpu_to_gfs32(req->es_data_len); + if (!GFS_EA_IS_UNSTUFFED(ea)) + memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len); + else { + for (i = 0; i < ea->ea_num_ptrs && data_left > 0; i++) { + err = gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl, + DIO_START | DIO_WAIT, &bh); + if (err) + goto out; + gfs_trans_add_bh(ip->i_gl, bh); + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + copy_size = + (data_left > avail_size) ? avail_size : data_left; + memcpy((bh)->b_data + sizeof (struct gfs_meta_header), + dataptr, copy_size); + dataptr += copy_size; + data_left -= copy_size; + datablk++; + brelse(bh); + } + GFS_ASSERT_INODE(data_left == 0, ip, + printk + ("req->es_data_len = %u, ea->ea_num_ptrs = %d\n", + req->es_data_len, ea->ea_num_ptrs); + ); + } + + out: + return err; +} + +/** + * write_ea - writes the request info to an ea, creating new blocks if + * necessary + * + * @sdp: superblock pointer + * @alloc_ip: inode that has the blocks reserved for allocation + * @ip: inode that is being modified + * @ea: the location of the new ea in a block + * @req: the write request + * + * Note: does not update ea_rec_len or the GFS_EAFLAG_LAST bin of ea_flags + * + * returns : 0 on success, -EXXX on error + */ + +int +write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip, struct gfs_inode *ip, + struct gfs_ea_header *ea, struct gfs_easet_io *req) +{ + int err = 0; + uint64_t *blkptr; + uint64_t temp; + const char *dataptr; + uint32_t data_left, copy; + uint32_t avail_size = + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + int i; + struct buffer_head *bh = NULL; + + ea->ea_data_len = cpu_to_gfs32(req->es_data_len); + ea->ea_name_len = req->es_name_len; + ea->ea_type = req->es_type; + ea->ea_pad = 0; + + memcpy(GFS_EA_NAME(ea), req->es_name, req->es_name_len); + + if (GFS_EAREQ_IS_STUFFED(req, avail_size)) { + ea->ea_num_ptrs = 0; + memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len); + } else { + blkptr = GFS_EA_DATA_PTRS(ea); + dataptr = req->es_data; + data_left = req->es_data_len; + ea->ea_num_ptrs = + GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size); + + for (i = 0; i < ea->ea_num_ptrs; i++) { + if ((bh = + alloc_eattr_blk(sdp, alloc_ip, ip, + &temp)) == NULL) { + err = -EIO; + goto out; + } + copy = + (data_left > avail_size) ? avail_size : data_left; + memcpy((bh)->b_data + sizeof (struct gfs_meta_header), + dataptr, copy); + *blkptr = cpu_to_gfs64(temp); + dataptr += copy; + data_left -= copy; + blkptr++; + brelse(bh); + } + + GFS_ASSERT_INODE(!data_left, ip,); + } + + out: + + return err; +} + +/** + * erase_ea_data_ptrs - deallocate all the unstuffed data blocks pointed to + * ea records in this block + * @sdp: the superblock + * @ip: the inode + * @blk: the block to check for data pointers + * + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +erase_ea_data_ptrs(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct buffer_head *dibh, uint64_t blk) +{ + struct gfs_holder rgd_gh; + int i, err = 0; + uint64_t *datablk; + struct buffer_head *eabh; + char *buf; + struct gfs_ea_header *ea; + struct gfs_rgrpd *rgd = NULL; + + err = gfs_dread(sdp, blk, ip->i_gl, DIO_WAIT | DIO_START, &eabh); + if (err) + goto fail; + + gfs_metatype_check(sdp, eabh, GFS_METATYPE_EA); + buf = (eabh)->b_data + sizeof (struct gfs_meta_header); + ea = (struct gfs_ea_header *) buf; + + while (1) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,); + if (GFS_EA_IS_UNSTUFFED(ea)) { + datablk = GFS_EA_DATA_PTRS(ea); + rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*datablk)); + GFS_ASSERT_INODE(rgd, ip, + printk("block = %" PRIu64 "\n", + gfs64_to_cpu(*datablk));); + err = + gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, + &rgd_gh); + if (err) + goto fail_eabh; + /* Trans may require: + One block for the RG header. One block for each ea data block. One + One block for the dinode. One block for the current ea block. + One block for a quote change. + FIXME */ + err = + gfs_trans_begin(sdp, + 3 + ea->ea_num_ptrs, 1); + if (err) + goto fail_glock_rg; + gfs_trans_add_bh(ip->i_gl, dibh); + for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) { + gfs_metafree(ip, gfs64_to_cpu(*datablk), 1); + ip->i_di.di_blocks--; + } + ea->ea_num_ptrs = 0; + gfs_trans_add_bh(ip->i_gl, eabh); + gfs_dinode_out(&ip->i_di, (dibh)->b_data); + gfs_trans_end(sdp); + gfs_glock_dq_uninit(&rgd_gh); + } + if (GFS_EA_IS_LAST(ea)) + break; + ea = GFS_EA_NEXT(ea); + } + + brelse(eabh); + + return err; + + fail_glock_rg: + gfs_glock_dq_uninit(&rgd_gh); + + fail_eabh: + brelse(eabh); + + fail: + return err; +} + +/** + * gfs_ea_dealloc - deallocate the extended attribute fork + * @ip: the inode + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_ea_dealloc(struct gfs_inode *ip) +{ + struct gfs_holder ri_gh, rgd_gh; + int err = 0; + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh, *indbh = NULL; + uint64_t *startblk, *eablk, *end, *next; + uint64_t temp; + int num_blks; + struct gfs_rgrpd *rgd = NULL; + + if (!ip->i_di.di_eattr) + goto out; + + gfs_alloc_get(ip); + + err = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (err) + goto out_alloc; + + err = gfs_rindex_hold(sdp, &ri_gh); + if (err) + goto out_unhold_q; + + err = gfs_get_inode_buffer(ip, &dibh); + if (err) + goto out_rindex_release; + + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + err = + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, + DIO_WAIT | DIO_START, &indbh); + if (err) + goto out_dibh; + + gfs_metatype_check(sdp, indbh, GFS_METATYPE_IN); + + eablk = + (uint64_t *) ((indbh)->b_data + + sizeof (struct gfs_indirect)); + end = + eablk + + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8); + + while (*eablk && eablk < end) { + err = + erase_ea_data_ptrs(sdp, ip, dibh, + gfs64_to_cpu(*eablk)); + if (err) + goto out_indbh; + eablk++; + } + + startblk = eablk - 1; + end = + (uint64_t *) ((indbh)->b_data + + sizeof (struct gfs_indirect)); + + while (startblk >= end) { + rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*startblk)); + GFS_ASSERT_INODE(rgd, ip,); + + num_blks = 1; + next = eablk = startblk - 1; + + while (eablk >= end) { + if (rgd == + gfs_blk2rgrpd(sdp, gfs64_to_cpu(*eablk))) { + if (eablk != next) { + temp = *eablk; + *eablk = *next; + *next = temp; + } + num_blks++; + next--; + } + eablk--; + } + + err = + gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, + &rgd_gh); + if (err) + goto out_rindex_release; + + /* Trans may require: + One block for the RG header. One block for each block from this + resource group. One block for the indirect ea block, + One block for the quote change */ + + err = + gfs_trans_begin(sdp, 3 + num_blks, + 1); + if (err) + goto out_gunlock_rg; + + gfs_trans_add_bh(ip->i_gl, dibh); + + while (startblk > next) { + gfs_metafree(ip, gfs64_to_cpu(*startblk), 1); + ip->i_di.di_blocks--; + *startblk = 0; + startblk--; + } + + gfs_trans_add_bh(ip->i_gl, indbh); + gfs_dinode_out(&ip->i_di, (dibh)->b_data); + + gfs_trans_end(sdp); + + gfs_glock_dq_uninit(&rgd_gh); + } + + brelse(indbh); + indbh = NULL; + } else { + err = erase_ea_data_ptrs(sdp, ip, dibh, ip->i_di.di_eattr); + if (err) + goto out_rindex_release; + } + + rgd = gfs_blk2rgrpd(sdp, ip->i_di.di_eattr); + GFS_ASSERT_INODE(rgd, ip, + printk("block = %" PRIu64 "\n", ip->i_di.di_eattr); + ); + + err = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (err) + goto out_rindex_release; + + err = gfs_trans_begin(sdp, 3, 1); + if (err) + goto out_gunlock_rg; + + gfs_metafree(ip, ip->i_di.di_eattr, 1); + + ip->i_di.di_blocks--; + ip->i_di.di_eattr = 0; + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, (dibh)->b_data); + + gfs_trans_end(sdp); + + out_gunlock_rg: + gfs_glock_dq_uninit(&rgd_gh); + + out_indbh: + if (indbh) + brelse(indbh); + + out_dibh: + brelse(dibh); + + out_rindex_release: + gfs_glock_dq_uninit(&ri_gh); + + out_unhold_q: + gfs_quota_unhold_m(ip); + + out_alloc: + gfs_alloc_put(ip); + + out: + + return err; +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static void +remove_ea(struct gfs_inode *ip, struct gfs_ea_header *ea, + struct gfs_ea_header *prev) +{ + uint64_t *datablk; + int i; + + if (GFS_EA_IS_UNSTUFFED(ea)) { + datablk = GFS_EA_DATA_PTRS(ea); + for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) { + gfs_metafree(ip, gfs64_to_cpu(*datablk), 1); + ip->i_di.di_blocks--; + } + } + + ea->ea_type = GFS_EATYPE_UNUSED; + ea->ea_num_ptrs = 0; + + if (prev && prev != ea) { + prev->ea_rec_len = + cpu_to_gfs32(GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea)); + if (GFS_EA_IS_LAST(ea)) + prev->ea_flags |= GFS_EAFLAG_LAST; + } +} + +int +init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip, + struct gfs_easet_io *req) +{ + int err; + struct buffer_head *bh; + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_ea_header *ea; + + err = gfs_metaalloc(dip, &ip->i_di.di_eattr); + if (err) + goto out; + + err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, + DIO_NEW | DIO_START | DIO_WAIT, &bh); + if (err) + goto out; + + gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA); + + ip->i_di.di_blocks++; + + ea = GFS_FIRST_EA(bh); + ea->ea_flags = GFS_EAFLAG_LAST; + ea->ea_rec_len = + cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header)); + ea->ea_num_ptrs = 0; + ea->ea_type = GFS_EATYPE_UNUSED; + err = write_ea(sdp, dip, ip, ea, req); + if (err) + goto out_drelse; + + gfs_trans_add_bh(ip->i_gl, bh); + + out_drelse: + brelse(bh); + + out: + return err; +} + +int +do_init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct gfs_easet_io *req) +{ + int err; + struct buffer_head *bh; + struct gfs_ea_header *ea; + + bh = alloc_eattr_blk(sdp, ip, ip, &ip->i_di.di_eattr); + if (bh) { + ea = GFS_FIRST_EA(bh); + err = write_ea(sdp, ip, ip, ea, req); + brelse(bh); + } else + err = -EIO; + + return err; +} + +/** + * init_eattr - initializes a new eattr block + */ + +static int +init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req) +{ + int err = 0; + struct gfs_alloc *al; + uint32_t ea_metablks; + struct buffer_head *dibh; + struct posix_acl *acl = NULL; + uint32_t avail_size = + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + + ea_metablks = + GFS_EAREQ_IS_STUFFED(req, + avail_size) ? 1 : (1 + + GFS_EADATA_NUM_PTRS(req-> + es_data_len, + avail_size)); + + if (IS_ACCESS_ACL(req->es_name, req->es_name_len)){ + acl = posix_acl_from_xattr(req->es_data, req->es_data_len); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + goto out; + } + } + + al = gfs_alloc_get(ip); + + err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (err) + goto out_alloc; + + al->al_requested_meta = ea_metablks; + + err = gfs_inplace_reserve(ip); + if (err) + goto out_gunlock_q; + + err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (err) + goto out_ipres; + + err = gfs_get_inode_buffer(ip, &dibh); + if (err) + goto out_ipres; + + /* Trans may require: + A modified dinode, multiple EA metadata blocks, and all blocks for a RG + bitmap */ + + err = + gfs_trans_begin(sdp, + 1 + ea_metablks + al->al_rgd->rd_ri.ri_length, 1); + if (err) + goto out_dibh; + + err = do_init_eattr(sdp, ip, req); + if (err) + goto out_end_trans; + + if (acl) + gfs_acl_set_mode(ip, acl); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, (dibh)->b_data); + + out_end_trans: + gfs_trans_end(sdp); + + out_dibh: + brelse(dibh); + + out_ipres: + gfs_inplace_release(ip); + + out_gunlock_q: + gfs_quota_unlock_m(ip); + + out_alloc: + gfs_alloc_put(ip); + posix_acl_release(acl); + + out: + return err; +} + +/** + * alloc_eattr_blk - allocates a new block for extended attributes. + * @sdp: A pointer to the superblock + * @alloc_ip: A pointer to the inode that has reserved the blocks for + * allocation + * @ip: A pointer to the inode that's getting extended attributes + * @block: the block allocated + * + * Returns: the buffer head on success, NULL on failure + */ + +static struct buffer_head * +alloc_eattr_blk(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip, + struct gfs_inode *ip, uint64_t * block) +{ + int err = 0; + struct buffer_head *bh = NULL; + struct gfs_ea_header *ea; + + err = gfs_metaalloc(alloc_ip, block); + if (err) + goto out; + + err = + gfs_dread(sdp, *block, ip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh); + if (err) + goto out; + + gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA); + + ip->i_di.di_blocks++; + + ea = GFS_FIRST_EA(bh); + ea->ea_flags = GFS_EAFLAG_LAST; + ea->ea_rec_len = + cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header)); + ea->ea_num_ptrs = 0; + ea->ea_type = GFS_EATYPE_UNUSED; + + gfs_trans_add_bh(ip->i_gl, bh); + + out: + + return bh; +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static int +list_direct_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct buffer_head *bh, struct gfs_eaget_io *req, + gfs_ea_copy_fn_t copy_fn, uint32_t * size) +{ + int err = 0; + struct gfs_ea_header *ea; + char buf[256]; + char *ptr; + + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA); + + ea = (struct gfs_ea_header *) ((bh)->b_data + + sizeof (struct gfs_meta_header)); + if (ea->ea_type == GFS_EATYPE_UNUSED) { + if (GFS_EA_IS_LAST(ea)) + goto out; + else + ea = GFS_EA_NEXT(ea); + } + + while (1) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,); + + if (req->eg_data_len) { + if (*size > req->eg_data_len) { + err = -ERANGE; + break; + } + ptr = buf; + + GFS_ASSERT_INODE(GFS_EATYPE_VALID(ea->ea_type), ip,); + if (ea->ea_type == GFS_EATYPE_USR) { + memcpy(ptr, "user.", 5); + ptr += 5; + } else { + memcpy(ptr, "system.", 7); + ptr += 7; + } + memcpy(ptr, GFS_EA_NAME(ea), ea->ea_name_len); + ptr += ea->ea_name_len; + *ptr = 0; + err = + copy_fn(req->eg_data + *size, buf, + GFS_EA_STRLEN(ea)); + if (err) + break; + } + + *size = *size + GFS_EA_STRLEN(ea); + + if (GFS_EA_IS_LAST(ea)) + break; + ea = GFS_EA_NEXT(ea); + } + + out: + + return err; +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static int +list_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req, + gfs_ea_copy_fn_t copy_fn) +{ + int err; + struct buffer_head *bh, *eabh; + uint64_t *eablk, *end; + uint32_t size = 0; + + err = + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT, + &bh); + if (err) + goto out; + + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN); + eablk = + (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect)); + end = + eablk + + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8); + + while (*eablk && eablk < end) { + err = + gfs_dread(sdp, gfs64_to_cpu(*eablk), ip->i_gl, + DIO_START | DIO_WAIT, &eabh); + if (err) + goto out_drelse; + err = list_direct_ea(sdp, ip, eabh, req, copy_fn, &size); + brelse(eabh); + if (err) + goto out_drelse; + eablk++; + } + } else { + err = list_direct_ea(sdp, ip, bh, req, copy_fn, &size); + if (err) + goto out_drelse; + } + + if (!err) + err = size; + + out_drelse: + brelse(bh); + + out: + + return err; +} + +/** + * gfs_get_eattr - read an extended attribute, or a list of ea names + * @sdp: pointer to the superblock + * @ip: pointer to the inode for the target file + * @req: the request information + * @copy_fn: the function to use to do the actual copying + * + * Returns: actual size of data on success, -EXXX on error + */ +int +gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn) +{ + struct gfs_holder i_gh; + int err; + + if (req->eg_name) { + err = gfs_ea_read_permission(req, ip); + if (err) + goto out; + } + + /* This seems to be a read. Are we sure we don't want to acquire the lock in LM_ST_SHARED? */ + + err = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (err) + goto out; + + if (ip->i_di.di_eattr == 0) { + if (!req->eg_name) { + if (!req->eg_data_len && req->eg_len) { + uint32_t no_data = 0; + + err = + copy_fn(req->eg_len, &no_data, + sizeof (uint32_t)); + } + } else + err = -ENODATA; + + goto out_gunlock; + } + + if (req->eg_name) + err = get_ea(sdp, ip, req, copy_fn); + else + err = list_ea(sdp, ip, req, copy_fn); + + out_gunlock: + gfs_glock_dq_uninit(&i_gh); + + out: + + return err; +} + +static int +do_set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req, + struct gfs_ea_location location) +{ + int err = 0; + int req_size; + uint32_t avail_size = + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + struct gfs_ea_location space; + + req_size = get_req_size(req, avail_size); + + if (location.ea) { + struct gfs_ea_header *new_space; + if (req->es_cmd == GFS_EACMD_REMOVE) { + remove_ea(ip, location.ea, location.prev); + gfs_trans_add_bh(ip->i_gl, location.bh); + goto out; + } + if (can_replace(location.ea, req, avail_size)) { + err = replace_ea(sdp, ip, location.ea, req); + if (!err) + gfs_trans_add_bh(ip->i_gl, location.bh); + goto out; + } + /* + * This part is kind of confusing. If the inode has direct EAs + * Then adding another EA can't run it out of space, so it is safe to + * delete the EA before looking for space. If the inode has indirect + * EAs, there may not be enough space left, so first you check for space + * and they you delete the EA. + */ + if ((ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) == 0) { + remove_ea(ip, location.ea, location.prev); + err = find_space(ip, req_size, req->es_type, &space); + if (err) + goto out; + new_space = prep_ea(space.ea); + err = write_ea(sdp, ip, ip, new_space, req); + if (!err) { + gfs_trans_add_bh(ip->i_gl, location.bh); + gfs_trans_add_bh(ip->i_gl, space.bh); + } + brelse(space.bh); + goto out; + } + if (can_replace_in_block(ip, req_size, location, &new_space)) { + remove_ea(ip, location.ea, location.prev); + new_space = prep_ea(new_space); + err = write_ea(sdp, ip, ip, new_space, req); + if (!err) + gfs_trans_add_bh(ip->i_gl, location.bh); + goto out; + } + err = find_space(ip, req_size, req->es_type, &space); + if (err) + /* You can return a non IO error here. If there is no space left, + * you can return -ENOSPC. So you must not have added a buffer to + * the transaction yet. + */ + goto out; + remove_ea(ip, location.ea, location.prev); + new_space = prep_ea(space.ea); + err = write_ea(sdp, ip, ip, new_space, req); + if (!err) { + gfs_trans_add_bh(ip->i_gl, location.bh); + gfs_trans_add_bh(ip->i_gl, space.bh); + } + brelse(space.bh); + goto out; + } + err = find_space(ip, req_size, req->es_type, &space); + if (err) + /* you can also get -ENOSPC here */ + goto out; + space.ea = prep_ea(space.ea); + err = write_ea(sdp, ip, ip, space.ea, req); + if (!err) + gfs_trans_add_bh(ip->i_gl, space.bh); + brelse(space.bh); + + out: + return err; +} + +static int +set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req, + struct gfs_ea_location location) +{ + int err; + struct gfs_alloc *al; + struct gfs_rgrpd *rgd = NULL; + struct buffer_head *dibh; + uint32_t avail_size = + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + int unstuffed_ea_blks = 0; + struct gfs_holder ri_gh, rgd_gh; + struct posix_acl *acl = NULL; + + if (IS_ACCESS_ACL(req->es_name, req->es_name_len) && req->es_data){ + acl = posix_acl_from_xattr(req->es_data, req->es_data_len); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + goto out; + } + } + + err = gfs_get_inode_buffer(ip, &dibh); + if (err) + goto out_acl; + al = gfs_alloc_get(ip); + + err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (err) + goto out_alloc; + + /* + * worst case, you need to switch from direct to indirect, which can + * take up to 3 new blocks, and you need to create enough unstuffed data + * blocks to hold all the data + */ + al->al_requested_meta = 3 + GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size); + + err = gfs_inplace_reserve(ip); + if (err) + goto out_lock_quota; + + err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (err) + goto out_reserve; + + if (location.ea && GFS_EA_IS_UNSTUFFED(location.ea)) { + /* + * If there is an EA, we might need to delete it. + * Since all unstuffed data blocks are added at the same time, + * they are all from the same resource group. + */ + err = gfs_rindex_hold(sdp, &ri_gh); + if (err) + goto out_reserve; + rgd = + gfs_blk2rgrpd(sdp, + gfs64_to_cpu(*GFS_EA_DATA_PTRS(location.ea))); + GFS_ASSERT_INODE(rgd, ip, + printk("block = %" PRIu64 "\n", + gfs64_to_cpu(*GFS_EA_DATA_PTRS + (location.ea))); + ); + err = + gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (err) + goto out_rindex; + unstuffed_ea_blks = location.ea->ea_num_ptrs; + } + + /* + * The transaction may require: + * Modifying the dinode block, Modifying the indirect ea block, + * modifying an ea block, all the allocation blocks, all the blocks for + * a RG bitmap, the RG header block, a RG block for each unstuffed data + * block you might be deleting. + */ + err = gfs_trans_begin(sdp, 4 + al->al_requested_meta + + al->al_rgd->rd_ri.ri_length + unstuffed_ea_blks, + 1); + if (err) + goto out_lock_rg; + + err = do_set_ea(sdp, ip, req, location); + + if (!err) { + if (acl) + gfs_acl_set_mode(ip, acl); + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, (dibh)->b_data); + } + + gfs_trans_end(sdp); + + out_lock_rg: + if (rgd) + gfs_glock_dq_uninit(&rgd_gh); + + out_rindex: + if (rgd) + gfs_glock_dq_uninit(&ri_gh); + + out_reserve: + gfs_inplace_release(ip); + + out_lock_quota: + gfs_quota_unlock_m(ip); + + out_alloc: + gfs_alloc_put(ip); + brelse(dibh); + + out_acl: + posix_acl_release(acl); + + out: + return err; +} + +/** + * gfs_set_eattr - sets (or creates or replaces) an extended attribute + * @sdp: pointer to the superblock + * @ip: pointer to the inode of the target file + * @req: request information + * + * Returns: 0 on success -EXXX on error + */ +int +gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct gfs_easet_io *req) +{ + struct gfs_holder i_gh; + int err; + uint32_t req_size; + uint32_t avail_size = + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header); + struct gfs_ea_location location; + + if (!GFS_EACMD_VALID(req->es_cmd)) { + err = -EOPNOTSUPP; + goto out; + } + + if (strlen(req->es_name) == 0) { + err = -EINVAL; + goto out; + } + + err = gfs_ea_write_permission(req, ip); + if (err) + goto out; + + if ((req_size = get_req_size(req, avail_size)) > avail_size) { + /* This can only happen with 512 byte blocks */ + err = -ERANGE; + goto out; + } + err = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (err) + goto out; + + if (ip->i_di.di_eattr == 0) { + if (req->es_cmd == GFS_EACMD_REPLACE + || req->es_cmd == GFS_EACMD_REMOVE) { + err = -ENODATA; + goto out_gunlock; + } + err = init_eattr(sdp, ip, req); + goto out_gunlock; + } + + err = find_eattr(ip, req->es_name, req->es_name_len, req->es_type, + &location); + if (err < 0) + goto out_gunlock; + if (err == 0 && (req->es_cmd == GFS_EACMD_REPLACE || + req->es_cmd == GFS_EACMD_REMOVE)) { + err = -ENODATA; + goto out_relse; + } + err = set_ea(sdp, ip, req, location); + + out_relse: + if (location.bh) + brelse(location.bh); + + out_gunlock: + gfs_glock_dq_uninit(&i_gh); + + out: + return err; +} + +/** + * gfs_set_eattr_ioctl - creates, modifies, or removes an extended attribute. + * @sdp: pointer to the superblock + * @ip: a pointer to the gfs inode for the file + * @arg: a pointer to gfs_set_eattr_io_t struct with the request + * + * Notes: ioctl wrapper for gfs_set_eattr + * Returns: 0 on success, -EXXX or error + */ + +int +gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg) +{ + struct gfs_easet_io req; + int err = 0; + char *name = NULL; + char *data = NULL; + + if (copy_from_user(&req, arg, sizeof (struct gfs_easet_io))) { + err = -EFAULT; + goto out; + } + + name = gmalloc(req.es_name_len); + + if (req.es_data) { + data = gmalloc(req.es_data_len); + + if (copy_from_user(data, req.es_data, req.es_data_len)) { + err = -EFAULT; + goto out_free; + } + } + if (copy_from_user(name, req.es_name, req.es_name_len)) { + err = -EFAULT; + goto out_free; + } + req.es_data = data; + req.es_name = name; + err = gfs_set_eattr(sdp, ip, &req); + + out_free: + kfree(name); + if (data) + kfree(data); + + out: + return err; +} + +/** + * gfs_get_eattr_ioctl - gets the value for the requested attribute name, + * or a list of all the extended attribute names. + * @sdp: pointer to the superblock + * @ip: a pointer to the inode for the file + * @arg: a pointer to the struct gfs_eaget_io struct holding the request + * + * Notes: ioctl wrapper for the gfs_get_eattr function + * Returns: 0 on success, -EXXX on error. + */ + +int +gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg) +{ + struct gfs_eaget_io req; + int result = 0; + char *name = NULL; + uint32_t size; + + if (copy_from_user(&req, arg, sizeof (struct gfs_eaget_io))) { + result = -EFAULT; + goto out; + } + + if (req.eg_name) { + name = gmalloc(req.eg_name_len); + + if (copy_from_user(name, req.eg_name, req.eg_name_len)) { + result = -EFAULT; + goto out_free; + } + req.eg_name = name; + } + result = gfs_get_eattr(sdp, ip, &req, gfs_ea_copy_to_user); + + out_free: + if (name) + kfree(name); + + if (result >= 0) { + size = result; + result = + gfs_ea_copy_to_user(req.eg_len, &size, sizeof(uint32_t)); + } + + out: + + return result; +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static int +gfs_get_direct_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub, + uint64_t blk) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *databh, *bh; + struct gfs_ea_header *ea; + uint64_t *datablk; + unsigned int i; + int error; + + error = gfs_dread(sdp, blk, ip->i_gl, DIO_START | DIO_WAIT, &bh); + if (error) + goto out; + + error = gfs_add_bh_to_ub(ub, bh); + + ea = (struct gfs_ea_header *) ((bh)->b_data + + sizeof (struct gfs_meta_header)); + for (;;) { + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,); + + datablk = GFS_EA_DATA_PTRS(ea); + + for (i = 0; i < ea->ea_num_ptrs; i++) { + error = + gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl, + DIO_START | DIO_WAIT, &databh); + if (error) + goto out_relse; + + error = gfs_add_bh_to_ub(ub, databh); + + brelse(databh); + + if (error) + goto out_relse; + + datablk++; + } + + if (GFS_EA_IS_LAST(ea)) + break; + ea = GFS_EA_NEXT(ea); + } + + out_relse: + brelse(bh); + + out: + + return error; +} + +/** + * gfs_get_eattr_meta - return all the eattr blocks of a file + * @dip: the directory + * @ub: the structure representing the user buffer to copy to + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + int error; + uint64_t *eablk, *end; + + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) { + error = + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, + DIO_WAIT | DIO_START, &bh); + if (error) + goto out; + + error = gfs_add_bh_to_ub(ub, bh); + + eablk = + (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect)); + end = + eablk + + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8); + + while (*eablk && eablk < end) { + error = + gfs_get_direct_eattr_meta(ip, ub, + gfs64_to_cpu(*eablk)); + if (error) { + brelse(bh); + goto out; + } + eablk++; + } + brelse(bh); + } else + error = gfs_get_direct_eattr_meta(ip, ub, ip->i_di.di_eattr); + + out: + + return error; +} diff -urN linux-orig/fs/gfs/eattr.h linux-patched/fs/gfs/eattr.h --- linux-orig/fs/gfs/eattr.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/eattr.h 2004-06-30 13:27:49.338712290 -0500 @@ -0,0 +1,90 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __EATTR_DOT_H__ +#define __EATTR_DOT_H__ + +#define GFS_EA_MAY_WRITE 1 +#define GFS_EA_MAY_READ 2 + +#define GFS_EA_DATA_LEN(x) gfs32_to_cpu((x)->ea_data_len) +#define GFS_EA_IS_UNSTUFFED(x) ((x)->ea_num_ptrs) +#define GFS_EA_DATA(x) ((char *)(x) + sizeof(struct gfs_ea_header) + (x)->ea_name_len) + +struct gfs_ea_location { + struct buffer_head *bh; + struct gfs_ea_header *ea; + struct gfs_ea_header *prev; +}; + +#define GFS_POSIX_ACL_ACCESS "posix_acl_access" +#define GFS_POSIX_ACL_ACCESS_LEN 16 +#define GFS_POSIX_ACL_DEFAULT "posix_acl_default" +#define GFS_POSIX_ACL_DEFAULT_LEN 17 + +#define IS_ACCESS_ACL(name, len) \ + ((len) == GFS_POSIX_ACL_ACCESS_LEN && \ + !memcmp(GFS_POSIX_ACL_ACCESS, (name), (len))) + +#define IS_DEFAULT_ACL(name, len) \ + ((len) == GFS_POSIX_ACL_DEFAULT_LEN && \ + !memcmp(GFS_POSIX_ACL_DEFAULT, (name), (len))) + +#define GFS_MAX_EA_ACL_BLKS 66 /* 65 for unstuffed data blocks, 1 for the ea + itself */ + +typedef int (*gfs_ea_copy_fn_t) (void *dest, void *src, unsigned long size); + +int gfs_ea_memcpy(void *dest, void *src, unsigned long size); +int gfs_ea_copy_to_user(void *dest, void *src, unsigned long size); + +int find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size, + struct gfs_ea_location *avail); + +struct gfs_ea_header *prep_ea(struct gfs_ea_header *ea); + +int write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip, + struct gfs_inode *ip, struct gfs_ea_header *ea, + struct gfs_easet_io *req); + +int gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn); +int gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct gfs_easet_io *req); + +int gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg); +int gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg); + +int gfs_ea_dealloc(struct gfs_inode *ip); + +int gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub); + +int replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, + struct gfs_ea_header *ea, struct gfs_easet_io *req); + +int find_eattr(struct gfs_inode *ip, char *name, int name_len, int type, + struct gfs_ea_location *location); + +int read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp, + struct gfs_ea_header *ea, uint32_t avail_size, + gfs_ea_copy_fn_t copy_fn); + +int get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req, + gfs_ea_copy_fn_t copy_fn); + +int init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip, + struct gfs_easet_io *req); + +int gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip); + +#endif /* __EATTR_DOT_H__ */ diff -urN linux-orig/fs/gfs/file.c linux-patched/fs/gfs/file.c --- linux-orig/fs/gfs/file.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/file.c 2004-06-30 13:27:49.339712058 -0500 @@ -0,0 +1,382 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "dio.h" +#include "file.h" +#include "inode.h" +#include "trans.h" + +/** + * gfs_copy2mem - Trivial copy function for gfs_readi() + * @bh: The buffer to copy from, or NULL meaning zero the buffer + * @buf: The buffer to copy/zero + * @offset: The offset in the buffer to copy from + * @size: The amount of data to copy/zero + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_copy2mem(struct buffer_head *bh, void **buf, unsigned int offset, + unsigned int size) +{ + char **p = (char **)buf; + + if (bh) + memcpy(*p, bh->b_data + offset, size); + else + memset(*p, 0, size); + + *p += size; + + return 0; +} + +/** + * gfs_copy2user - Copy data to user space + * @bh: The buffer + * @buf: The destination of the data + * @offset: The offset into the buffer + * @size: The amount of data to copy + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_copy2user(struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size) +{ + char **p = (char **)buf; + int error; + + if (bh) + error = copy_to_user(*p, bh->b_data + offset, size); + else + error = clear_user(*p, size); + + if (error) + error = -EFAULT; + else + *p += size; + + return error; +} + +/** + * gfs_readi - Read a file + * @ip: The GFS Inode + * @buf: The buffer to place result into + * @offset: File offset to begin reading from + * @size: Amount of data to transfer + * @copy_fn: Function to actually perform the copy + * + * The @copy_fn only copies a maximum of a single block at once so + * we are safe calling it with int arguments. It is done so that + * we don't needlessly put 64bit arguments on the stack and it + * also makes the code in the @copy_fn nicer too. + * + * Returns: The amount of data actually copied or the error + */ + +int +gfs_readi(struct gfs_inode *ip, void *buf, + uint64_t offset, unsigned int size, + read_copy_fn_t copy_fn) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + uint64_t lblock, dblock; + unsigned int o; + uint32_t extlen = 0; + unsigned int amount; + int not_new = 0; + int journaled = gfs_is_jdata(ip); + int copied = 0; + int error = 0; + + if (offset >= ip->i_di.di_size) + return 0; + + if ((offset + size) > ip->i_di.di_size) + size = ip->i_di.di_size - offset; + + if (!size) + return 0; + + if (journaled) { + lblock = offset; + o = do_div(lblock, sdp->sd_jbsize); + } else { + lblock = offset >> sdp->sd_sb.sb_bsize_shift; + o = offset & (sdp->sd_sb.sb_bsize - 1); + } + + if (gfs_is_stuffed(ip)) + o += sizeof(struct gfs_dinode); + else if (journaled) + o += sizeof(struct gfs_meta_header); + + while (copied < size) { + amount = size - copied; + if (amount > sdp->sd_sb.sb_bsize - o) + amount = sdp->sd_sb.sb_bsize - o; + + if (!extlen) { + error = gfs_block_map(ip, lblock, ¬_new, + &dblock, &extlen); + if (error) + goto fail; + } + + if (extlen > 1) + gfs_start_ra(ip->i_gl, dblock, extlen); + + if (dblock) { + error = gfs_get_data_buffer(ip, dblock, not_new, &bh); + if (error) + goto fail; + + dblock++; + extlen--; + } else + bh = NULL; + + error = copy_fn(bh, &buf, o, amount); + if (bh) + brelse(bh); + if (error) + goto fail; + + copied += amount; + lblock++; + + o = (journaled) ? sizeof(struct gfs_meta_header) : 0; + } + + return copied; + + fail: + return (copied) ? copied : error; +} + +/** + * gfs_copy_from_mem - Trivial copy function for gfs_writei() + * @ip: The file to write to + * @bh: The buffer to copy to or clear + * @buf: The buffer to copy from + * @offset: The offset in the buffer to write to + * @size: The amount of data to write + * @new: Flag indicating that remaining space in the buffer should be zeroed + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_copy_from_mem(struct gfs_inode *ip, struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size, int new) +{ + char **p = (char **)buf; + int error = 0; + + if (bh->b_blocknr == ip->i_num.no_addr) { + GFS_ASSERT_INODE(!new, ip,); + gfs_trans_add_bh(ip->i_gl, bh); + memcpy(bh->b_data + offset, *p, size); + } else if (gfs_is_jdata(ip)) { + gfs_trans_add_bh(ip->i_gl, bh); + memcpy(bh->b_data + offset, *p, size); + if (new) + gfs_buffer_clear_ends(bh, offset, size, TRUE); + } else { + memcpy(bh->b_data + offset, *p, size); + if (new) + gfs_buffer_clear_ends(bh, offset, size, FALSE); + error = gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY); + } + + if (!error) + *p += size; + + return error; +} + +/** + * gfs_copy_from_user - Copy bytes from user space for gfs_writei() + * @ip: The file to write to + * @bh: The buffer to copy to or clear + * @buf: The buffer to copy from + * @offset: The offset in the buffer to write to + * @size: The amount of data to write + * @new: Flag indicating that remaining space in the buffer should be zeroed + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_copy_from_user(struct gfs_inode *ip, struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size, int new) +{ + char **p = (char **)buf; + int error = 0; + + if (bh->b_blocknr == ip->i_num.no_addr) { + GFS_ASSERT_INODE(!new, ip,); + gfs_trans_add_bh(ip->i_gl, bh); + if (copy_from_user(bh->b_data + offset, *p, size)) + error = -EFAULT; + } else if (gfs_is_jdata(ip)) { + gfs_trans_add_bh(ip->i_gl, bh); + if (copy_from_user(bh->b_data + offset, *p, size)) + error = -EFAULT; + if (new) { + gfs_buffer_clear_ends(bh, offset, size, TRUE); + if (error) + memset(bh->b_data + offset, 0, size); + } + } else { + if (copy_from_user(bh->b_data + offset, *p, size)) + error = -EFAULT; + if (error) { + if (new) + gfs_buffer_clear(bh); + gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY); + } else { + if (new) + gfs_buffer_clear_ends(bh, offset, size, FALSE); + error = gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY); + } + } + + if (!error) + *p += size; + + return error; +} + +/** + * gfs_writei - Write bytes to a file + * @ip: The GFS inode + * @buf: The buffer containing information to be written + * @offset: The file offset to start writing at + * @size: The amount of data to write + * @copy_fn: Function to do the actual copying + * + * Returns: The number of bytes correctly written or error code + */ + +int +gfs_writei(struct gfs_inode *ip, void *buf, + uint64_t offset, unsigned int size, + write_copy_fn_t copy_fn) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh, *bh; + uint64_t lblock, dblock; + unsigned int o; + uint32_t extlen = 0; + unsigned int amount; + int new; + int journaled = gfs_is_jdata(ip); + const uint64_t start = offset; + int copied = 0; + int error = 0; + + if (!size) + return 0; + + if (gfs_is_stuffed(ip) && + ((start + size) > (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)))) { + error = gfs_unstuff_dinode(ip, gfs_unstuffer_async, NULL); + if (error) + return error; + } + + if (journaled) { + lblock = offset; + o = do_div(lblock, sdp->sd_jbsize); + } else { + lblock = offset >> sdp->sd_sb.sb_bsize_shift; + o = offset & (sdp->sd_sb.sb_bsize - 1); + } + + if (gfs_is_stuffed(ip)) + o += sizeof(struct gfs_dinode); + else if (journaled) + o += sizeof(struct gfs_meta_header); + + while (copied < size) { + amount = size - copied; + if (amount > sdp->sd_sb.sb_bsize - o) + amount = sdp->sd_sb.sb_bsize - o; + + if (!extlen) { + new = TRUE; + error = gfs_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + goto fail; + GFS_ASSERT_INODE(dblock, ip,); + } + + if (journaled && extlen > 1) + gfs_start_ra(ip->i_gl, dblock, extlen); + + error = gfs_get_data_buffer(ip, dblock, + (amount == sdp->sd_sb.sb_bsize) ? TRUE : new, + &bh); + if (error) + goto fail; + + error = copy_fn(ip, bh, &buf, o, amount, new); + brelse(bh); + if (error) + goto fail; + + copied += amount; + lblock++; + dblock++; + extlen--; + + o = (journaled) ? sizeof(struct gfs_meta_header) : 0; + } + + out: + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + return error; + + if (ip->i_di.di_size < start + copied) + ip->i_di.di_size = start + copied; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return copied; + + fail: + if (copied) + goto out; + return error; +} diff -urN linux-orig/fs/gfs/file.h linux-patched/fs/gfs/file.h --- linux-orig/fs/gfs/file.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/file.h 2004-06-30 13:27:49.339712058 -0500 @@ -0,0 +1,51 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __FILE_DOT_H__ +#define __FILE_DOT_H__ + +typedef int (*read_copy_fn_t) (struct buffer_head * bh, void **buf, + unsigned int offset, unsigned int size); +typedef int (*write_copy_fn_t) (struct gfs_inode * ip, struct buffer_head * bh, + void **buf, unsigned int offset, + unsigned int size, int new); + +int gfs_copy2mem(struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size); +int gfs_copy2user(struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size); +int gfs_readi(struct gfs_inode *ip, void *buf, uint64_t offset, + unsigned int size, read_copy_fn_t copy_fn); + +int gfs_copy_from_mem(struct gfs_inode *ip, struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size, int new); +int gfs_copy_from_user(struct gfs_inode *ip, struct buffer_head *bh, void **buf, + unsigned int offset, unsigned int size, int new); +int gfs_writei(struct gfs_inode *ip, void *buf, uint64_t offset, + unsigned int size, write_copy_fn_t copy_fn); + +static __inline__ int +gfs_internal_read(struct gfs_inode *ip, char *buf, uint64_t offset, + unsigned int size) +{ + return gfs_readi(ip, buf, offset, size, gfs_copy2mem); +} + +static __inline__ int +gfs_internal_write(struct gfs_inode *ip, char *buf, uint64_t offset, + unsigned int size) +{ + return gfs_writei(ip, buf, offset, size, gfs_copy_from_mem); +} + +#endif /* __FILE_DOT_H__ */ diff -urN linux-orig/fs/gfs/fixed_div64.h linux-patched/fs/gfs/fixed_div64.h --- linux-orig/fs/gfs/fixed_div64.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/fixed_div64.h 2004-06-30 13:27:49.339712058 -0500 @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * + * Additional munging: + * Copyright (C) 2004 Red Hat, Inc. All rights reserved. + */ + +#ifndef __FIXED_DIV64_DOT_H__ +#define __FIXED_DIV64_DOT_H__ + +#include + +#if defined __i386__ +/* For ia32 we need to pull some tricks to get past various versions + * of the compiler which do not like us using do_div in the middle + * of large functions. + */ +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + *(__u64 *)a = c; + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} +#else +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + mod = do_div(*(__u64 *)a, b); + return mod; + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + __u64 c = *(__u64 *)a; + return do_div(c, b); + } + } + + /* NOTREACHED */ + return 0; +} +#endif + +#undef do_div +#define do_div(a, b) fixed_div64_do_div(&(a), (b), sizeof(a)) +#define do_mod(a, b) fixed_div64_do_mod(&(a), (b), sizeof(a)) + +#endif /* __FIXED_DIV64_DOT_H__ */ diff -urN linux-orig/fs/gfs/flock.c linux-patched/fs/gfs/flock.c --- linux-orig/fs/gfs/flock.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/flock.c 2004-06-30 13:27:49.339712058 -0500 @@ -0,0 +1,98 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "flock.h" +#include "glock.h" +#include "glops.h" + +/** + * gfs_flock - Acquire a flock on a file + * @fp: the file + * @ex: exclusive lock + * @wait: wait for lock + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_flock(struct gfs_file *fp, int ex, int wait) +{ + struct gfs_holder *fl_gh = &fp->f_fl_gh; + struct gfs_inode *ip = fp->f_inode; + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_glock *gl; + int error = 0; + + down(&fp->f_fl_lock); + + if (fl_gh->gh_gl) { + gfs_glock_dq_uninit(fl_gh); + error = -EDEADLK; + goto out; + } + + error = gfs_glock_get(sdp, + ip->i_num.no_formal_ino, &gfs_flock_glops, + CREATE, &gl); + if (error) + goto out; + + gfs_holder_init(gl, (ex) ? LM_ST_EXCLUSIVE : LM_ST_SHARED, + ((wait) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE, + fl_gh); + fl_gh->gh_owner = NULL; + + gfs_glock_put(gl); + + error = gfs_glock_nq(fl_gh); + if (error) { + gfs_holder_uninit(fl_gh); + if (error == GLR_TRYFAILED) { + GFS_ASSERT_INODE(!wait, ip,); + error = -EAGAIN; + } + } + + out: + up(&fp->f_fl_lock); + + return error; +} + +/** + * gfs_funlock - Release a flock on a file + * @fp: the file + * + */ + +int +gfs_funlock(struct gfs_file *fp) +{ + struct gfs_holder *fl_gh = &fp->f_fl_gh; + + down(&fp->f_fl_lock); + if (fl_gh->gh_gl) + gfs_glock_dq_uninit(fl_gh); + up(&fp->f_fl_lock); + + return 0; +} diff -urN linux-orig/fs/gfs/flock.h linux-patched/fs/gfs/flock.h --- linux-orig/fs/gfs/flock.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/flock.h 2004-06-30 13:27:49.339712058 -0500 @@ -0,0 +1,20 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __FLOCK_DOT_H__ +#define __FLOCK_DOT_H__ + +int gfs_flock(struct gfs_file *fp, int ex, int wait); +int gfs_funlock(struct gfs_file *fp); + +#endif /* __FLOCK_DOT_H__ */ diff -urN linux-orig/fs/gfs/format.h linux-patched/fs/gfs/format.h --- linux-orig/fs/gfs/format.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/format.h 2004-06-30 13:27:49.340711826 -0500 @@ -0,0 +1,30 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __FORMAT_DOT_H__ +#define __FORMAT_DOT_H__ + +static const uint32_t gfs_old_fs_formats[] = { + 1308, + 1307, + 1306, + 1305, + 0 +}; + +static const uint32_t gfs_old_multihost_formats[] = { + 1400, + 0 +}; + +#endif /* __FORMAT_DOT_H__ */ diff -urN linux-orig/fs/gfs/gfs.h linux-patched/fs/gfs/gfs.h --- linux-orig/fs/gfs/gfs.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/gfs.h 2004-06-30 13:27:49.340711826 -0500 @@ -0,0 +1,130 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __GFS_DOT_H__ +#define __GFS_DOT_H__ + +#define GFS_RELEASE_NAME "" + +#include +#include +#include + +#include "fixed_div64.h" +#include "lvb.h" +#include "incore.h" +#include "util.h" + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +#define NO_CREATE (0) +#define CREATE (1) + +#if (BITS_PER_LONG == 64) +#define PRIu64 "lu" +#define PRId64 "ld" +#define PRIo64 "lo" +#define PRIx64 "lx" +#define PRIX64 "lX" +#define SCNu64 "lu" +#define SCNd64 "ld" +#define SCNo64 "lo" +#define SCNx64 "lx" +#define SCNX64 "lX" +#else +#define PRIu64 "Lu" +#define PRId64 "Ld" +#define PRIo64 "Lo" +#define PRIx64 "Lx" +#define PRIX64 "LX" +#define SCNu64 "Lu" +#define SCNd64 "Ld" +#define SCNo64 "Lo" +#define SCNx64 "Lx" +#define SCNX64 "LX" +#endif + +/* Divide x by y. Round up if there is a remainder. */ +#define DIV_RU(x, y) (((x) + (y) - 1) / (y)) + +#define GFS_FAST_NAME_SIZE (8) + +#define vfs2sdp(sb) ((struct gfs_sbd *)(sb)->s_fs_info) +#define vn2ip(inode) ((struct gfs_inode *)(inode)->u.generic_ip) +#define vf2fp(file) ((struct gfs_file *)(file)->private_data) +#define bh2bd(bh) ((struct gfs_bufdata *)(bh)->b_private) +#define current_transaction ((struct gfs_trans *)(current->journal_info)) + +#define gl2ip(gl) ((struct gfs_inode *)(gl)->gl_object) +#define gl2rgd(gl) ((struct gfs_rgrpd *)(gl)->gl_object) +#define gl2gl(gl) ((struct gfs_glock *)(gl)->gl_object) + +#define gfs_meta_check(sdp, bh) \ +do \ +{ \ + uint32_t meta_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \ + meta_check_magic = gfs32_to_cpu(meta_check_magic); \ + GFS_ASSERT_SBD(meta_check_magic == GFS_MAGIC, (sdp), \ + struct gfs_meta_header meta_check_mh; \ + printk("Bad metadata at %"PRIu64"\n", (uint64_t)(bh)->b_blocknr); \ + gfs_meta_header_in(&meta_check_mh, (bh)->b_data); \ + gfs_meta_header_print(&meta_check_mh);); \ +} \ +while (0) + +#define gfs_metatype_check(sdp, bh, type) \ +do \ +{ \ + uint32_t metatype_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \ + uint32_t metatype_check_type = ((struct gfs_meta_header *)(bh)->b_data)->mh_type; \ + metatype_check_magic = gfs32_to_cpu(metatype_check_magic); \ + metatype_check_type = gfs32_to_cpu(metatype_check_type); \ + GFS_ASSERT_SBD(metatype_check_magic == GFS_MAGIC && \ + metatype_check_type == (type), (sdp), \ + struct gfs_meta_header metatype_check_mh; \ + printk("Bad metadata at %"PRIu64", should be %u\n", (uint64_t)(bh)->b_blocknr, (type)); \ + gfs_meta_header_in(&metatype_check_mh, (bh)->b_data); \ + gfs_meta_header_print(&metatype_check_mh);); \ +} \ +while (0) + +#define gfs_metatype_set(sdp, bh, type, format) \ +do \ +{ \ + gfs_meta_check((sdp), (bh)); \ + ((struct gfs_meta_header *)(bh)->b_data)->mh_type = cpu_to_gfs32((type)); \ + ((struct gfs_meta_header *)(bh)->b_data)->mh_format = cpu_to_gfs32((format)); \ +} \ +while (0) + +#define gfs_sprintf(fmt, args...) \ +do { \ + if (buf) { \ + if (*count + 256 > size) { \ + error = -ENOMEM; \ + goto out; \ + } \ + *count += snprintf(buf + *count, 256, fmt, ##args); \ + } \ + else \ + printk(fmt, ##args); \ +} \ +while (0) + +#endif /* __GFS_DOT_H__ */ diff -urN linux-orig/fs/gfs/glock.c linux-patched/fs/gfs/glock.c --- linux-orig/fs/gfs/glock.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/glock.c 2004-06-30 13:27:49.341711594 -0500 @@ -0,0 +1,2524 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "lops.h" +#include "quota.h" +#include "recovery.h" + +/* Must be kept in sync with the beginning of struct gfs_glock */ +struct glock_plug { + struct list_head gl_list; + unsigned long gl_flags; +}; + +typedef void (*glock_examiner) (struct gfs_glock * gl); + +/** + * relaxed_state_ok - is a requested lock compatible with the current lock mode? + * @actual: the current state of the lock + * @requested: the lock state that was requested by the caller + * @flags: the modifier flags passed in by the caller + * + * Returns: TRUE if the locks are compatible, FALSE otherwise + */ + +static __inline__ int +relaxed_state_ok(unsigned int actual, unsigned requested, int flags) +{ + if (actual == requested) + return TRUE; + + if (flags & GL_EXACT) + return FALSE; + + if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) + return TRUE; + + if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) + return TRUE; + + return FALSE; +} + +/** + * gl_hash() - Turn glock number into hash bucket number + * @lock: The glock number + * + * Returns: The number of the corresponding hash bucket + */ + +static unsigned int +gl_hash(struct lm_lockname *name) +{ + unsigned int h; + + h = gfs_hash(&name->ln_number, sizeof(uint64_t)); + h = gfs_hash_more(&name->ln_type, sizeof(unsigned int), h); + h &= GFS_GL_HASH_MASK; + + return h; +} + +/** + * glock_hold() - increment reference count on glock + * @gl: The glock to put + * + */ + +static __inline__ void +glock_hold(struct gfs_glock *gl) +{ + atomic_inc(&gl->gl_count); +} + +/** + * glock_put() - Decrement reference count on glock + * @gl: The glock to put + * + */ + +static __inline__ void +glock_put(struct gfs_glock *gl) +{ + if (atomic_read(&gl->gl_count) == 1) + gfs_glock_schedule_for_reclaim(gl); + atomic_dec(&gl->gl_count); + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) >= 0, gl,); +} + +/** + * queue_empty - check to see if a glock's queue is empty + * @gl: the glock + * @head: the head of the queue to check + * + * Returns: TRUE if the queue is empty + */ + +static __inline__ int +queue_empty(struct gfs_glock *gl, struct list_head *head) +{ + int empty; + spin_lock(&gl->gl_spin); + empty = list_empty(head); + spin_unlock(&gl->gl_spin); + return empty; +} + +/** + * search_bucket() - Find struct gfs_glock by lock number + * @bucket: the bucket to search + * @name: The lock name + * + * Returns: NULL, or the struct gfs_glock with the requested number + */ + +static struct gfs_glock * +search_bucket(struct gfs_gl_hash_bucket *bucket, struct lm_lockname *name) +{ + struct list_head *tmp, *head; + struct gfs_glock *gl; + + for (head = &bucket->hb_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gl = list_entry(tmp, struct gfs_glock, gl_list); + + if (test_bit(GLF_PLUG, &gl->gl_flags)) + continue; + if (!lm_name_equal(&gl->gl_name, name)) + continue; + + glock_hold(gl); + + return gl; + } + + return NULL; +} + +/** + * gfs_glock_find() - Find glock by lock number + * @sdp: The GFS superblock + * @name: The lock name + * + * Figure out what bucket the lock is in, acquire the read lock on + * it and call search_bucket(). + * + * Returns: NULL, or the struct gfs_glock with the requested number + */ + +struct gfs_glock * +gfs_glock_find(struct gfs_sbd *sdp, struct lm_lockname *name) +{ + struct gfs_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)]; + struct gfs_glock *gl; + + read_lock(&bucket->hb_lock); + gl = search_bucket(bucket, name); + read_unlock(&bucket->hb_lock); + + return gl; +} + +/** + * glock_free() - Perform a few checks and then release struct gfs_glock + * @gl: The glock to release + * + */ + +static void +glock_free(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct inode *aspace = gl->gl_aspace; + + GFS_ASSERT_GLOCK(list_empty(&gl->gl_list), gl,); + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) == 1, gl,); + GFS_ASSERT_GLOCK(list_empty(&gl->gl_holders), gl,); + GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters1), gl,); + GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters2), gl,); + GFS_ASSERT_GLOCK(gl->gl_state == LM_ST_UNLOCKED, gl,); + GFS_ASSERT_GLOCK(!gl->gl_object, gl,); + GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,); + GFS_ASSERT_GLOCK(list_empty(&gl->gl_reclaim), gl,); + + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); + + if (aspace) + gfs_aspace_put(aspace); + + kmem_cache_free(gfs_glock_cachep, gl); + + atomic_dec(&sdp->sd_glock_count); +} + +/** + * gfs_glock_get() - Get a glock, or create one if one doesn't exist + * @sdp: The GFS superblock + * @number: the lock number + * @glops: The glock_operations to use + * @create: If FALSE, don't create the glock if it doesn't exist + * @glp: the glock is returned here + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_glock_get(struct gfs_sbd *sdp, + uint64_t number, struct gfs_glock_operations *glops, + int create, struct gfs_glock **glp) +{ + struct lm_lockname name; + struct gfs_glock *gl, *tmp; + struct gfs_gl_hash_bucket *bucket; + int error; + + name.ln_number = number; + name.ln_type = glops->go_type; + bucket = &sdp->sd_gl_hash[gl_hash(&name)]; + + read_lock(&bucket->hb_lock); + gl = search_bucket(bucket, &name); + read_unlock(&bucket->hb_lock); + + if (gl || !create) { + *glp = gl; + return 0; + } + + gl = kmem_cache_alloc(gfs_glock_cachep, GFP_KERNEL); + if (!gl) + return -ENOMEM; + + memset(gl, 0, sizeof(struct gfs_glock)); + + INIT_LIST_HEAD(&gl->gl_list); + gl->gl_name = name; + atomic_set(&gl->gl_count, 1); + + spin_lock_init(&gl->gl_spin); + + gl->gl_state = LM_ST_UNLOCKED; + INIT_LIST_HEAD(&gl->gl_holders); + INIT_LIST_HEAD(&gl->gl_waiters1); + INIT_LIST_HEAD(&gl->gl_waiters2); + + gl->gl_ops = glops; + + INIT_LE(&gl->gl_new_le, &gfs_glock_lops); + INIT_LE(&gl->gl_incore_le, &gfs_glock_lops); + + gl->gl_bucket = bucket; + INIT_LIST_HEAD(&gl->gl_reclaim); + + gl->gl_sbd = sdp; + + INIT_LIST_HEAD(&gl->gl_dirty_buffers); + INIT_LIST_HEAD(&gl->gl_ail_bufs); + + if (glops == &gfs_inode_glops || + glops == &gfs_rgrp_glops || + glops == &gfs_meta_glops) { + gl->gl_aspace = gfs_aspace_get(sdp); + if (!gl->gl_aspace) { + error = -ENOMEM; + goto fail; + } + } + + error = sdp->sd_lockstruct.ls_ops->lm_get_lock(sdp->sd_lockstruct.ls_lockspace, + &name, + &gl->gl_lock); + if (error) + goto fail_aspace; + + atomic_inc(&sdp->sd_glock_count); + + write_lock(&bucket->hb_lock); + tmp = search_bucket(bucket, &name); + if (tmp) { + write_unlock(&bucket->hb_lock); + glock_free(gl); + gl = tmp; + } else { + list_add_tail(&gl->gl_list, &bucket->hb_list); + write_unlock(&bucket->hb_lock); + } + + *glp = gl; + + return 0; + + fail_aspace: + if (gl->gl_aspace) + gfs_aspace_put(gl->gl_aspace); + + fail: + kmem_cache_free(gfs_glock_cachep, gl); + + return error; +} + +/** + * gfs_glock_hold() - As glock_hold(), but suitable for exporting + * @gl: The glock to hold + * + */ + +void +gfs_glock_hold(struct gfs_glock *gl) +{ + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,); + glock_hold(gl); +} + +/** + * gfs_glock_put() - As glock_put(), but suitable for exporting + * @gl: The glock to put + * + */ + +void +gfs_glock_put(struct gfs_glock *gl) +{ + glock_put(gl); +} + +/** + * gfs_holder_init - initialize a struct gfs_holder in the default way + * @gl: the glock + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure + * + */ + +void +gfs_holder_init(struct gfs_glock *gl, unsigned int state, int flags, + struct gfs_holder *gh) +{ + memset(gh, 0, sizeof(struct gfs_holder)); + + INIT_LIST_HEAD(&gh->gh_list); + gh->gh_gl = gl; + gh->gh_owner = current; + gh->gh_state = state; + gh->gh_flags = flags; + + if (gh->gh_state == LM_ST_EXCLUSIVE) + gh->gh_flags |= GL_LOCAL_EXCL; + + init_completion(&gh->gh_wait); + + glock_hold(gl); +} + +/** + * gfs_holder_reinit - reinitialize a struct gfs_holder so we can requeue it + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure + * + * Don't mess with the glock. + * + */ + +void +gfs_holder_reinit(unsigned int state, int flags, struct gfs_holder *gh) +{ + int alloced; + + GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gh->gh_gl,); + + gh->gh_state = state; + gh->gh_flags = flags; + + if (gh->gh_state == LM_ST_EXCLUSIVE) + gh->gh_flags |= GL_LOCAL_EXCL; + + alloced = test_bit(HIF_ALLOCED, &gh->gh_iflags); + memset(&gh->gh_iflags, 0, sizeof(unsigned long)); + if (alloced) + set_bit(HIF_ALLOCED, &gh->gh_iflags); +} + +/** + * gfs_holder_uninit - uninitialize a holder structure (drop reference on glock) + * @gh: the holder structure + * + */ + +void +gfs_holder_uninit(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + + GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gl,); + gh->gh_gl = NULL; + + glock_put(gl); +} + +/** + * gfs_holder_get - get a struct gfs_holder structure + * @gl: the glock + * @state: the state we're requesting + * @flags: the modifier flags + * + * Figure out how big an impact this function has. Either: + * 1) Replace it with a cache of structures hanging off the struct gfs_sbd + * 2) Get rid of it and call gmalloc() directly + * 3) Leave it like it is + * + * Returns: the holder structure + */ + +struct gfs_holder * +gfs_holder_get(struct gfs_glock *gl, unsigned int state, int flags) +{ + struct gfs_holder *gh; + + gh = gmalloc(sizeof(struct gfs_holder)); + gfs_holder_init(gl, state, flags, gh); + set_bit(HIF_ALLOCED, &gh->gh_iflags); + + return gh; +} + +/** + * gfs_holder_put - get rid of a struct gfs_holder structure + * @gh: the holder structure + * + */ + +void +gfs_holder_put(struct gfs_holder *gh) +{ + GFS_ASSERT_GLOCK(test_bit(HIF_ALLOCED, &gh->gh_iflags), gh->gh_gl,); + gfs_holder_uninit(gh); + kfree(gh); +} + +/** + * handle_recurse - put other holder structures (marked recursive) into the holders list + * @gh: the holder structure + * + */ + +static void +handle_recurse(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct list_head *tmp, *head, *next; + struct gfs_holder *tmp_gh; + int found = FALSE; + + GFS_ASSERT_GLOCK(gh->gh_owner, gl,); + + for (head = &gl->gl_waiters2, tmp = head->next, next = tmp->next; + tmp != head; + tmp = next, next = tmp->next) { + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list); + if (tmp_gh->gh_owner != gh->gh_owner) + continue; + + GFS_ASSERT_GLOCK(test_bit(HIF_RECURSE, &tmp_gh->gh_iflags), + gl,); + + list_move_tail(&tmp_gh->gh_list, &gl->gl_holders); + tmp_gh->gh_error = 0; + set_bit(HIF_HOLDER, &tmp_gh->gh_iflags); + + complete(&tmp_gh->gh_wait); + + found = TRUE; + } + + GFS_ASSERT_GLOCK(found, gl,); +} + +/** + * do_unrecurse - a recursive holder was just dropped of the waiters2 list + * @gh: the holder + * + * If there is only one other recursive holder, clear is HIF_RECURSE bit. + * If there is more than one, leave them alone. + * + */ + +static void +do_unrecurse(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct list_head *tmp, *head; + struct gfs_holder *tmp_gh, *last_gh = NULL; + int found = FALSE; + + GFS_ASSERT_GLOCK(gh->gh_owner, gl,); + + for (head = &gl->gl_waiters2, tmp = head->next; + tmp != head; + tmp = tmp->next) { + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list); + if (tmp_gh->gh_owner != gh->gh_owner) + continue; + + GFS_ASSERT_GLOCK(test_bit(HIF_RECURSE, &tmp_gh->gh_iflags), + gl,); + + if (found) + return; + + found = TRUE; + last_gh = tmp_gh; + } + + GFS_ASSERT_GLOCK(found, gl,); + clear_bit(HIF_RECURSE, &last_gh->gh_iflags); +} + +/** + * rq_mutex - process a mutex request in the queue + * @gh: the glock holder + * + * Returns: TRUE if the queue is blocked, + */ + +static int +rq_mutex(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + + list_del_init(&gh->gh_list); + /* gh->gh_error never examined. */ + set_bit(GLF_LOCK, &gl->gl_flags); + complete(&gh->gh_wait); + + return TRUE; +} + +/** + * rq_promote - process a promote request in the queue + * @gh: the glock holder + * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module + * + * Returns: TRUE if the queue is blocked, + */ + +static int +rq_promote(struct gfs_holder *gh, int promote_ok) +{ + struct gfs_glock *gl = gh->gh_gl; + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_glock_operations *glops = gl->gl_ops; + int recurse; + + if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { + if (list_empty(&gl->gl_holders)) { + if (promote_ok || GFS_ASYNC_LM(sdp)) { + gl->gl_req_gh = gh; + set_bit(GLF_LOCK, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + + if (atomic_read(&sdp->sd_reclaim_count) > + sdp->sd_tune.gt_reclaim_limit && + !(gh->gh_flags & LM_FLAG_PRIORITY)) { + gfs_reclaim_glock(sdp); + gfs_reclaim_glock(sdp); + } + + glops->go_xmote_th(gl, gh->gh_state, + gh->gh_flags); + + spin_lock(&gl->gl_spin); + } else + if (!test_and_set_bit(HIF_WAKEUP, &gh->gh_iflags)) + complete(&gh->gh_wait); + } + return TRUE; + } + + if (list_empty(&gl->gl_holders)) { + set_bit(HIF_FIRST, &gh->gh_iflags); + set_bit(GLF_LOCK, &gl->gl_flags); + recurse = FALSE; + } else { + struct gfs_holder *next_gh; + if (gh->gh_flags & GL_LOCAL_EXCL) + return TRUE; + next_gh = list_entry(gl->gl_holders.next, struct gfs_holder, gh_list); + if (next_gh->gh_flags & GL_LOCAL_EXCL) + return TRUE; + recurse = test_bit(HIF_RECURSE, &gh->gh_iflags); + } + + list_move_tail(&gh->gh_list, &gl->gl_holders); + gh->gh_error = 0; + set_bit(HIF_HOLDER, &gh->gh_iflags); + + if (recurse) + handle_recurse(gh); + + complete(&gh->gh_wait); + + return FALSE; +} + +/** + * rq_demote - process a demote request in the queue + * @gh: the glock holder + * + * Returns: TRUE if the queue is blocked, + */ + +static int +rq_demote(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct gfs_glock_operations *glops = gl->gl_ops; + + if (!list_empty(&gl->gl_holders)) + return TRUE; + + if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { + list_del_init(&gh->gh_list); + gh->gh_error = 0; + spin_unlock(&gl->gl_spin); + if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) + gfs_holder_put(gh); + else + complete(&gh->gh_wait); + spin_lock(&gl->gl_spin); + } else { + gl->gl_req_gh = gh; + set_bit(GLF_LOCK, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + + if (gh->gh_state == LM_ST_UNLOCKED || + gl->gl_state != LM_ST_EXCLUSIVE) + glops->go_drop_th(gl); + else + glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); + + spin_lock(&gl->gl_spin); + } + + return FALSE; +} + +/** + * run_queue - process holder structures on a glock + * @gl: the glock + * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module + * + */ + +static void +run_queue(struct gfs_glock *gl, int promote_ok) +{ + struct gfs_holder *gh; + int blocked; + + for (;;) { + if (test_bit(GLF_LOCK, &gl->gl_flags)) + break; + + if (!list_empty(&gl->gl_waiters1)) { + gh = list_entry(gl->gl_waiters1.next, + struct gfs_holder, gh_list); + + if (test_bit(HIF_MUTEX, &gh->gh_iflags)) + blocked = rq_mutex(gh); + else + GFS_ASSERT_GLOCK(FALSE, gl,); + + } else if (!list_empty(&gl->gl_waiters2)) { + gh = list_entry(gl->gl_waiters2.next, + struct gfs_holder, gh_list); + + if (test_bit(HIF_PROMOTE, &gh->gh_iflags)) + blocked = rq_promote(gh, promote_ok); + else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) + blocked = rq_demote(gh); + else + GFS_ASSERT_GLOCK(FALSE, gl,); + + } else + break; + + if (blocked) + break; + } +} + +/** + * lock_on_glock - acquire a local lock on a glock + * @gl: the glock + * + */ + +static void +lock_on_glock(struct gfs_glock *gl) +{ + struct gfs_holder gh; + + gfs_holder_init(gl, 0, 0, &gh); + set_bit(HIF_MUTEX, &gh.gh_iflags); + + spin_lock(&gl->gl_spin); + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + list_add_tail(&gh.gh_list, &gl->gl_waiters1); + else + complete(&gh.gh_wait); + spin_unlock(&gl->gl_spin); + + wait_for_completion(&gh.gh_wait); + gfs_holder_uninit(&gh); +} + +/** + * trylock_on_glock - try to acquire a local lock on a glock + * @gl: the glock + * + * Returns: TRUE if the glock is acquired + */ + +static int +trylock_on_glock(struct gfs_glock *gl) +{ + int acquired = TRUE; + + spin_lock(&gl->gl_spin); + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + acquired = FALSE; + spin_unlock(&gl->gl_spin); + + return acquired; +} + +/** + * unlock_on_glock - release a local lock on a glock + * @gl: the glock + * + */ + +static void +unlock_on_glock(struct gfs_glock *gl) +{ + spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); +} + +/** + * handle_callback - add a demote request to a lock's queue + * @gl: the glock + * @state: the state the callback is us to change to + * + */ + +static void +handle_callback(struct gfs_glock *gl, unsigned int state) +{ + struct list_head *tmp, *head; + struct gfs_holder *gh, *new_gh = NULL; + + GFS_ASSERT_GLOCK(state != LM_ST_EXCLUSIVE, gl,); + + restart: + spin_lock(&gl->gl_spin); + + for (head = &gl->gl_waiters2, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gh = list_entry(tmp, struct gfs_holder, gh_list); + if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && + gl->gl_req_gh != gh) { + if (gh->gh_state != state) + gh->gh_state = LM_ST_UNLOCKED; + goto out; + } + } + + if (new_gh) { + list_add(&new_gh->gh_list, &gl->gl_waiters2); + new_gh = NULL; + } else { + spin_unlock(&gl->gl_spin); + + new_gh = gfs_holder_get(gl, state, LM_FLAG_TRY); + set_bit(HIF_DEMOTE, &new_gh->gh_iflags); + set_bit(HIF_DEALLOC, &new_gh->gh_iflags); + new_gh->gh_owner = NULL; + + goto restart; + } + + out: + spin_unlock(&gl->gl_spin); + + if (new_gh) + gfs_holder_put(new_gh); +} + +/** + * state_change - record that the glock is now in a different state + * @gl: the glock + * @new_state the new state + * + */ + +static void +state_change(struct gfs_glock *gl, unsigned int new_state) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + int held1, held2; + + held1 = (gl->gl_state != LM_ST_UNLOCKED); + held2 = (new_state != LM_ST_UNLOCKED); + + if (held1 != held2) { + if (held2) { + atomic_inc(&sdp->sd_glock_held_count); + glock_hold(gl); + } else { + atomic_dec(&sdp->sd_glock_held_count); + glock_put(gl); + } + } + + gl->gl_state = new_state; +} + +/** + * xmote_bh - Called after the lock module is done acquiring a lock + * @gl: The glock in question + * @ret: the int returned from the lock module + * + */ + +static void +xmote_bh(struct gfs_glock *gl, unsigned int ret) +{ + struct gfs_glock_operations *glops = gl->gl_ops; + struct gfs_holder *gh = gl->gl_req_gh; + int prev_state = gl->gl_state; + int op_done = TRUE; + + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,); + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,); + GFS_ASSERT_GLOCK(!(ret & LM_OUT_ASYNC), gl,); + + state_change(gl, ret & LM_OUT_ST_MASK); + + if (ret & LM_OUT_NEED_E) + handle_callback(gl, LM_ST_UNLOCKED); + else if (ret & LM_OUT_NEED_D) + handle_callback(gl, LM_ST_DEFERRED); + else if (ret & LM_OUT_NEED_S) + handle_callback(gl, LM_ST_SHARED); + + if (ret & LM_OUT_LVB_INVALID) + set_bit(GLF_LVB_INVALID, &gl->gl_flags); + + if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { + if (glops->go_inval) + glops->go_inval(gl, DIO_METADATA | DIO_DATA); + } else if (gl->gl_state == LM_ST_DEFERRED) { + /* We might not want to do this here. + Look at moving to the inode glops. */ + if (glops->go_inval) + glops->go_inval(gl, DIO_DATA); + } + + /* Deal with each possible exit condition */ + + if (!gh) + gl->gl_stamp = jiffies; + + else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { + spin_lock(&gl->gl_spin); + list_del_init(&gh->gh_list); + if (gl->gl_state == gh->gh_state || + gl->gl_state == LM_ST_UNLOCKED) + gh->gh_error = 0; + else + gh->gh_error = GLR_TRYFAILED; + spin_unlock(&gl->gl_spin); + + if (ret & LM_OUT_CANCELED) + handle_callback(gl, LM_ST_UNLOCKED); /* Lame */ + + } else if (ret & LM_OUT_CANCELED) { + spin_lock(&gl->gl_spin); + list_del_init(&gh->gh_list); + gh->gh_error = GLR_CANCELED; + if (test_bit(HIF_RECURSE, &gh->gh_iflags)) + do_unrecurse(gh); + spin_unlock(&gl->gl_spin); + + } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { + spin_lock(&gl->gl_spin); + list_move_tail(&gh->gh_list, &gl->gl_holders); + gh->gh_error = 0; + set_bit(HIF_HOLDER, &gh->gh_iflags); + spin_unlock(&gl->gl_spin); + + set_bit(HIF_FIRST, &gh->gh_iflags); + + op_done = FALSE; + + } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { + spin_lock(&gl->gl_spin); + list_del_init(&gh->gh_list); + gh->gh_error = GLR_TRYFAILED; + if (test_bit(HIF_RECURSE, &gh->gh_iflags)) + do_unrecurse(gh); + spin_unlock(&gl->gl_spin); + + } else + GFS_ASSERT_GLOCK(FALSE, gl,); + + if (glops->go_xmote_bh) + glops->go_xmote_bh(gl); + + if (op_done) { + spin_lock(&gl->gl_spin); + gl->gl_req_gh = NULL; + gl->gl_req_bh = NULL; + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); + } + + glock_put(gl); + + if (gh) { + if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) + gfs_holder_put(gh); + else + complete(&gh->gh_wait); + } +} + +/** + * gfs_glock_xmote_th - Call into the lock module to acquire a glock + * @gl: The glock in question + * @state: the requested state + * @flags: modifier flags to the lock call + * + */ + +void +gfs_glock_xmote_th(struct gfs_glock *gl, unsigned int state, int flags) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_glock_operations *glops = gl->gl_ops; + int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | + LM_FLAG_NOEXP | LM_FLAG_ANY | + LM_FLAG_PRIORITY); + unsigned int lck_ret; + + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,); + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,); + GFS_ASSERT_GLOCK(state != LM_ST_UNLOCKED, gl,); + GFS_ASSERT_GLOCK(state != gl->gl_state, gl,); + + if (gl->gl_state == LM_ST_EXCLUSIVE) { + if (glops->go_sync) + glops->go_sync(gl, DIO_METADATA | DIO_DATA); + } + + glock_hold(gl); + gl->gl_req_bh = xmote_bh; + + atomic_inc(&sdp->sd_lm_lock_calls); + + lck_ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl->gl_lock, + gl->gl_state, + state, lck_flags); + + if (lck_ret & LM_OUT_ASYNC) + GFS_ASSERT_GLOCK(lck_ret == LM_OUT_ASYNC, gl,); + else + xmote_bh(gl, lck_ret); +} + +/** + * drop_bh - Called after a lock module unlock completes + * @gl: the glock + * @ret: the return status + * + * Doesn't wake up the process waiting on the struct gfs_holder (if any) + * Doesn't drop the reference on the glock the top half took out + * + */ + +static void +drop_bh(struct gfs_glock *gl, unsigned int ret) +{ + struct gfs_glock_operations *glops = gl->gl_ops; + struct gfs_holder *gh = gl->gl_req_gh; + + clear_bit(GLF_PREFETCH, &gl->gl_flags); + + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,); + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,); + GFS_ASSERT_GLOCK(!ret, gl,); + + state_change(gl, LM_ST_UNLOCKED); + + if (glops->go_inval) + glops->go_inval(gl, DIO_METADATA | DIO_DATA); + + if (gh) { + spin_lock(&gl->gl_spin); + list_del_init(&gh->gh_list); + gh->gh_error = 0; + spin_unlock(&gl->gl_spin); + } + + if (glops->go_drop_bh) + glops->go_drop_bh(gl); + + spin_lock(&gl->gl_spin); + gl->gl_req_gh = NULL; + gl->gl_req_bh = NULL; + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); + + glock_put(gl); + + if (gh) { + if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) + gfs_holder_put(gh); + else + complete(&gh->gh_wait); + } +} + +/** + * gfs_glock_drop_th - call into the lock module to unlock a lock + * @gl: the glock + * + */ + +void +gfs_glock_drop_th(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_glock_operations *glops = gl->gl_ops; + unsigned int ret; + + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,); + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,); + GFS_ASSERT_GLOCK(gl->gl_state != LM_ST_UNLOCKED, gl,); + + if (gl->gl_state == LM_ST_EXCLUSIVE) { + if (glops->go_sync) + glops->go_sync(gl, DIO_METADATA | DIO_DATA); + } + + glock_hold(gl); + gl->gl_req_bh = drop_bh; + + atomic_inc(&sdp->sd_lm_unlock_calls); + + ret = sdp->sd_lockstruct.ls_ops->lm_unlock(gl->gl_lock, gl->gl_state); + + if (!ret) + drop_bh(gl, ret); + else + GFS_ASSERT_GLOCK(ret == LM_OUT_ASYNC, gl,); +} + +/** + * handle_cancels - cancel requests for locks stuck waiting on an expire flag + * @gh: the LM_FLAG_NOEXP holder waiting to acquire the lock + * + */ + +static void +handle_cancels(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + + spin_lock(&gl->gl_spin); + + while (gl->gl_req_gh != gh && + !test_bit(HIF_HOLDER, &gh->gh_iflags) && + !test_bit(HIF_WAKEUP, &gh->gh_iflags) && + !list_empty(&gh->gh_list)) { + if (gl->gl_req_bh) { + spin_unlock(&gl->gl_spin); + gl->gl_sbd->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); + yield(); + spin_lock(&gl->gl_spin); + } else { + spin_unlock(&gl->gl_spin); + yield(); + spin_lock(&gl->gl_spin); + } + } + + spin_unlock(&gl->gl_spin); +} + +/** + * glock_wait_internal - wait on a glock acquisition + * @gh: the glock holder + * + * Returns: 0 on success + */ + +static int +glock_wait_internal(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct gfs_glock_operations *glops = gl->gl_ops; + int error = 0; + + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { + spin_lock(&gl->gl_spin); + if (gl->gl_req_gh != gh && + !test_bit(HIF_HOLDER, &gh->gh_iflags) && + !test_bit(HIF_WAKEUP, &gh->gh_iflags) && + !list_empty(&gh->gh_list)) { + list_del_init(&gh->gh_list); + gh->gh_error = GLR_TRYFAILED; + if (test_bit(HIF_RECURSE, &gh->gh_iflags)) + do_unrecurse(gh); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); + return GLR_TRYFAILED; + } + spin_unlock(&gl->gl_spin); + } + + if (gh->gh_flags & LM_FLAG_NOEXP) + handle_cancels(gh); + + for (;;) { + wait_for_completion(&gh->gh_wait); + + spin_lock(&gl->gl_spin); + if (test_and_clear_bit(HIF_WAKEUP, &gh->gh_iflags)) { + run_queue(gl, TRUE); + spin_unlock(&gl->gl_spin); + } else { + spin_unlock(&gl->gl_spin); + break; + } + } + + if (gh->gh_error) + return gh->gh_error; + + GFS_ASSERT_GLOCK(test_bit(HIF_HOLDER, &gh->gh_iflags), gl,); + GFS_ASSERT_GLOCK(relaxed_state_ok(gl->gl_state, gh->gh_state, + gh->gh_flags), gl,); + + if (test_bit(HIF_FIRST, &gh->gh_iflags)) { + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,); + + if (glops->go_lock) { + error = glops->go_lock(gl, gh->gh_flags); + if (error) { + spin_lock(&gl->gl_spin); + list_del_init(&gh->gh_list); + gh->gh_error = error; + if (test_and_clear_bit(HIF_RECURSE, &gh->gh_iflags)) + do_unrecurse(gh); + spin_unlock(&gl->gl_spin); + } + } + + spin_lock(&gl->gl_spin); + gl->gl_req_gh = NULL; + gl->gl_req_bh = NULL; + clear_bit(GLF_LOCK, &gl->gl_flags); + if (test_bit(HIF_RECURSE, &gh->gh_iflags)) + handle_recurse(gh); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); + } + + return error; +} + +/** + * add_to_queue - Add a holder to the wait queue (but look for recursion) + * @gh: the holder structure + * + */ + +static void +add_to_queue(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct list_head *tmp, *head; + struct gfs_holder *tmp_gh; + + if (gh->gh_owner) { + for (head = &gl->gl_holders, tmp = head->next; + tmp != head; + tmp = tmp->next) { + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list); + if (tmp_gh->gh_owner == gh->gh_owner) { + GFS_ASSERT_GLOCK((gh->gh_flags & LM_FLAG_ANY) || + !(tmp_gh->gh_flags & LM_FLAG_ANY), + gl,); + GFS_ASSERT_GLOCK((tmp_gh->gh_flags & GL_LOCAL_EXCL) || + !(gh->gh_flags & GL_LOCAL_EXCL), + gl,); + GFS_ASSERT_GLOCK(relaxed_state_ok(gl->gl_state, + gh->gh_state, + gh->gh_flags), + gl,); + + list_add_tail(&gh->gh_list, &gl->gl_holders); + set_bit(HIF_HOLDER, &gh->gh_iflags); + + gh->gh_error = 0; + complete(&gh->gh_wait); + + return; + } + } + + for (head = &gl->gl_waiters2, tmp = head->next; + tmp != head; + tmp = tmp->next) { + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list); + if (tmp_gh->gh_owner == gh->gh_owner) { + GFS_ASSERT_GLOCK(test_bit(HIF_PROMOTE, + &tmp_gh->gh_iflags), + gl,); + GFS_ASSERT_GLOCK((gh->gh_flags & LM_FLAG_ANY) || + !(tmp_gh->gh_flags & LM_FLAG_ANY), + gl,); + GFS_ASSERT_GLOCK((tmp_gh->gh_flags & GL_LOCAL_EXCL) || + !(gh->gh_flags & GL_LOCAL_EXCL), + gl,); + GFS_ASSERT_GLOCK(relaxed_state_ok(tmp_gh->gh_state, + gh->gh_state, + gh->gh_flags), + gl,); + + set_bit(HIF_RECURSE, &gh->gh_iflags); + set_bit(HIF_RECURSE, &tmp_gh->gh_iflags); + + list_add_tail(&gh->gh_list, &gl->gl_waiters2); + + return; + } + } + } + + if (gh->gh_flags & LM_FLAG_PRIORITY) + list_add(&gh->gh_list, &gl->gl_waiters2); + else + list_add_tail(&gh->gh_list, &gl->gl_waiters2); +} + +/** + * gfs_glock_nq - enqueue a struct gfs_holder onto a glock (acquire a glock) + * @gh: the holder structure + * + * if (gh->gh_flags & GL_ASYNC), this never returns an error + * + * Returns: 0, GLR_TRYFAILED, or -EXXX on failure + */ + +int +gfs_glock_nq(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct gfs_sbd *sdp = gl->gl_sbd; + int error = 0; + + GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gl,); + GFS_ASSERT_GLOCK(gh->gh_state != LM_ST_UNLOCKED, gl,); + GFS_ASSERT_GLOCK((gh->gh_flags & (LM_FLAG_ANY | GL_EXACT)) != + (LM_FLAG_ANY | GL_EXACT), gl,); + GFS_ASSERT_GLOCK(GFS_ASYNC_LM(sdp) || + !(gh->gh_flags & GL_ASYNC), gl,); + + atomic_inc(&sdp->sd_glock_nq_calls); + + restart: + set_bit(HIF_PROMOTE, &gh->gh_iflags); + + spin_lock(&gl->gl_spin); + add_to_queue(gh); + run_queue(gl, TRUE); + spin_unlock(&gl->gl_spin); + + if (!(gh->gh_flags & GL_ASYNC)) { + error = glock_wait_internal(gh); + if (error == GLR_CANCELED) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto restart; + } + } + + clear_bit(GLF_PREFETCH, &gl->gl_flags); + + return error; +} + +/** + * gfs_glock_poll - poll to see if an async request has been completed + * @gh: the holder + * + * Returns: TRUE if the request is ready to be gfs_glock_wait()ed on + */ + +int +gfs_glock_poll(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + int ready = FALSE; + + GFS_ASSERT_GLOCK(gh->gh_flags & GL_ASYNC, gl,); + GFS_ASSERT_GLOCK(!test_bit(HIF_WAKEUP, &gh->gh_iflags), gl,); + + spin_lock(&gl->gl_spin); + + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + ready = TRUE; + else if (list_empty(&gh->gh_list)) { + if (gh->gh_error == GLR_CANCELED) { + spin_unlock(&gl->gl_spin); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + gfs_glock_nq(gh); + return FALSE; + } else + ready = TRUE; + } + + spin_unlock(&gl->gl_spin); + + return ready; +} + +/** + * gfs_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC + * @gh: the holder structure + * + * Returns: 0, GLR_TRYFAILED, or -EXXX on failure + */ + +int +gfs_glock_wait(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + int error; + + GFS_ASSERT_GLOCK(gh->gh_flags & GL_ASYNC, gl,); + GFS_ASSERT_GLOCK(!test_bit(HIF_WAKEUP, &gh->gh_iflags), gl,); + + error = glock_wait_internal(gh); + if (error == GLR_CANCELED) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + gh->gh_flags &= ~GL_ASYNC; + error = gfs_glock_nq(gh); + } + + return error; +} + +/** + * gfs_glock_dq - dequeue a struct gfs_holder from a glock (release a glock) + * @gh: the glock holder + * + */ + +void +gfs_glock_dq(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct gfs_glock_operations *glops = gl->gl_ops; + + GFS_ASSERT_GLOCK(!queue_empty(gl, &gh->gh_list), gl,); + GFS_ASSERT_GLOCK(test_bit(HIF_HOLDER, &gh->gh_iflags), gl,); + + atomic_inc(&gl->gl_sbd->sd_glock_dq_calls); + + if (gh->gh_flags & GL_SYNC) + set_bit(GLF_SYNC, &gl->gl_flags); + if (gh->gh_flags & GL_NOCACHE) + handle_callback(gl, LM_ST_UNLOCKED); + + lock_on_glock(gl); + + spin_lock(&gl->gl_spin); + list_del_init(&gh->gh_list); + if (list_empty(&gl->gl_holders)) { + spin_unlock(&gl->gl_spin); + + if (glops->go_unlock) + glops->go_unlock(gl, gh->gh_flags); + + if (test_bit(GLF_SYNC, &gl->gl_flags)) { + if (glops->go_sync) + glops->go_sync(gl, + DIO_METADATA | + DIO_DATA | + DIO_INVISIBLE); + } + + gl->gl_stamp = jiffies; + + spin_lock(&gl->gl_spin); + } + + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); +} + +/** + * gfs_glock_prefetch - Try to prefetch a glock + * @gl: the glock + * @state: the state to prefetch in + * @flags: flags passed to go_xmote_th() + * + */ + +void +gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags) +{ + struct gfs_glock_operations *glops = gl->gl_ops; + + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,); + GFS_ASSERT_GLOCK(state != LM_ST_UNLOCKED, gl,); + GFS_ASSERT_GLOCK((flags & (LM_FLAG_ANY | GL_EXACT)) != + (LM_FLAG_ANY | GL_EXACT), gl,); + + spin_lock(&gl->gl_spin); + + if (test_bit(GLF_LOCK, &gl->gl_flags) || + !list_empty(&gl->gl_holders) || + !list_empty(&gl->gl_waiters1) || + !list_empty(&gl->gl_waiters2) || + relaxed_state_ok(gl->gl_state, state, flags)) { + spin_unlock(&gl->gl_spin); + return; + } + + set_bit(GLF_PREFETCH, &gl->gl_flags); + + GFS_ASSERT_GLOCK(!gl->gl_req_gh, gl,); + set_bit(GLF_LOCK, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + + glops->go_xmote_th(gl, state, flags); + + atomic_inc(&gl->gl_sbd->sd_glock_prefetch_calls); +} + +/** + * gfs_glock_force_drop - Force a glock to be uncached + * @gl: the glock + * + */ + +void +gfs_glock_force_drop(struct gfs_glock *gl) +{ + struct gfs_holder gh; + + gfs_holder_init(gl, LM_ST_UNLOCKED, 0, &gh); + set_bit(HIF_DEMOTE, &gh.gh_iflags); + gh.gh_owner = NULL; + + spin_lock(&gl->gl_spin); + list_add(&gh.gh_list, &gl->gl_waiters2); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); + + wait_for_completion(&gh.gh_wait); + gfs_holder_uninit(&gh); +} + +/** + * gfs_glock_nq_init - intialize a holder and enqueue it on a glock + * @gl: the glock + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure + * + * Returns: 0, GLR_*, or -EXXX + */ + +int +gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags, + struct gfs_holder *gh) +{ + int error; + + gfs_holder_init(gl, state, flags, gh); + + error = gfs_glock_nq(gh); + if (error) + gfs_holder_uninit(gh); + + return error; +} + +/** + * gfs_glock_dq_uninit - dequeue a holder from a glock and initialize it + * @gh: the holder structure + * + */ + +void +gfs_glock_dq_uninit(struct gfs_holder *gh) +{ + gfs_glock_dq(gh); + gfs_holder_uninit(gh); +} + +/** + * gfs_glock_nq_num - acquire a glock based on lock number + * @sdp: the filesystem + * @number: the lock number + * @glops: the glock operations for the type of glock + * @state: the state to acquire the glock in + * @flags: modifier flags for the aquisition + * @gh: the struct gfs_holder + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_glock_nq_num(struct gfs_sbd *sdp, + uint64_t number, struct gfs_glock_operations *glops, + unsigned int state, int flags, struct gfs_holder *gh) +{ + struct gfs_glock *gl; + int error; + + error = gfs_glock_get(sdp, number, glops, CREATE, &gl); + if (!error) { + error = gfs_glock_nq_init(gl, state, flags, gh); + glock_put(gl); + } + + return error; +} + +/** + * glock_compare - Compare two struct gfs_glock structures for sorting + * @arg_a: the first structure + * @arg_b: the second structure + * + */ + +static int +glock_compare(const void *arg_a, const void *arg_b) +{ + struct gfs_holder *gh_a = *(struct gfs_holder **)arg_a; + struct gfs_holder *gh_b = *(struct gfs_holder **)arg_b; + struct lm_lockname *a = &gh_a->gh_gl->gl_name; + struct lm_lockname *b = &gh_b->gh_gl->gl_name; + int ret = 0; + + if (a->ln_number > b->ln_number) + ret = 1; + else if (a->ln_number < b->ln_number) + ret = -1; + else { + if (gh_a->gh_state == LM_ST_SHARED && + gh_b->gh_state == LM_ST_EXCLUSIVE) + ret = 1; + else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && + (gh_b->gh_flags & GL_LOCAL_EXCL)) + ret = 1; + } + + return ret; +} + +/** + * nq_m_sync - synchonously acquire more than one glock in deadlock free order + * @num_gh: the number of structures + * @ghs: an array of struct gfs_holder structures + * + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired) + */ + +static int +nq_m_sync(unsigned int num_gh, struct gfs_holder *ghs) +{ + struct gfs_holder *p[num_gh]; + unsigned int x; + int error = 0; + + for (x = 0; x < num_gh; x++) + p[x] = &ghs[x]; + + gfs_sort(p, num_gh, sizeof(struct gfs_holder *), glock_compare); + + for (x = 0; x < num_gh; x++) { + p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + + error = gfs_glock_nq(p[x]); + if (error) { + while (x--) + gfs_glock_dq(p[x]); + break; + } + } + + return error; +} + +/** + * gfs_glock_nq_m - acquire multiple glocks + * @num_gh: the number of structures + * @ghs: an array of struct gfs_holder structures + * + * Figure out how big an impact this function has. Either: + * 1) Replace this code with code that calls gfs_glock_prefetch() + * 2) Forget async stuff and just call nq_m_sync() + * 3) Leave it like it is + * + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired) + */ + +int +gfs_glock_nq_m(unsigned int num_gh, struct gfs_holder *ghs) +{ + int e[num_gh]; + unsigned int x; + int borked = FALSE, serious = 0; + int error = 0; + + GFS_ASSERT(num_gh,); + + if (num_gh == 1) { + ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + error = gfs_glock_nq(ghs); + return error; + } + + if (!GFS_ASYNC_LM(ghs->gh_gl->gl_sbd)) { + error = nq_m_sync(num_gh, ghs); + return error; + } + + for (x = 0; x < num_gh; x++) { + ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC; + gfs_glock_nq(&ghs[x]); + } + + for (x = 0; x < num_gh; x++) { + error = e[x] = glock_wait_internal(&ghs[x]); + if (error) { + borked = TRUE; + if (error != GLR_TRYFAILED && error != GLR_CANCELED) + serious = error; + } + } + + if (!borked) + return 0; + + for (x = 0; x < num_gh; x++) + if (!e[x]) + gfs_glock_dq(&ghs[x]); + + if (serious) + error = serious; + else { + for (x = 0; x < num_gh; x++) + gfs_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags, + &ghs[x]); + error = nq_m_sync(num_gh, ghs); + } + + return error; +} + +/** + * gfs_glock_dq_m - release multiple glocks + * @num_gh: the number of structures + * @ghs: an array of struct gfs_holder structures + * + */ + +void +gfs_glock_dq_m(unsigned int num_gh, struct gfs_holder *ghs) +{ + unsigned int x; + + for (x = 0; x < num_gh; x++) + gfs_glock_dq(&ghs[x]); +} + +/** + * gfs_glock_prefetch_num - prefetch a glock based on lock number + * @sdp: the filesystem + * @number: the lock number + * @glops: the glock operations for the type of glock + * @state: the state to acquire the glock in + * @flags: modifier flags for the aquisition + * + * Returns: 0 on success, -EXXX on failure + */ + +void +gfs_glock_prefetch_num(struct gfs_sbd *sdp, + uint64_t number, struct gfs_glock_operations *glops, + unsigned int state, int flags) +{ + struct gfs_glock *gl; + int error; + + if (atomic_read(&sdp->sd_reclaim_count) < sdp->sd_tune.gt_reclaim_limit) { + error = gfs_glock_get(sdp, number, glops, CREATE, &gl); + if (!error) { + gfs_glock_prefetch(gl, state, flags); + glock_put(gl); + } + } +} + +/** + * gfs_lvb_hold - attach a LVB from a glock + * @gl: The glock in question + * + */ + +int +gfs_lvb_hold(struct gfs_glock *gl) +{ + int error = 0; + + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,); + + lock_on_glock(gl); + + atomic_inc(&gl->gl_lvb_count); + if (atomic_read(&gl->gl_lvb_count) == 1) { + glock_hold(gl); + GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,); + error = gl->gl_sbd->sd_lockstruct.ls_ops->lm_hold_lvb(gl->gl_lock, + &gl->gl_lvb); + if (error) { + glock_put(gl); + atomic_dec(&gl->gl_lvb_count); + } + } + + unlock_on_glock(gl); + + return error; +} + +/** + * gfs_lvb_unhold - detach a LVB from a glock + * @gl: The glock in question + * + */ + +void +gfs_lvb_unhold(struct gfs_glock *gl) +{ + glock_hold(gl); + + lock_on_glock(gl); + + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_lvb_count), gl,); + if (atomic_dec_and_test(&gl->gl_lvb_count)) { + GFS_ASSERT_GLOCK(gl->gl_lvb, gl,); + gl->gl_sbd->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, + gl->gl_lvb); + gl->gl_lvb = NULL; + glock_put(gl); + } + + unlock_on_glock(gl); + + glock_put(gl); +} + +/** + * gfs_lvb_sync - sync a LVB + * @gl: The glock in question + * + */ + +void +gfs_lvb_sync(struct gfs_glock *gl) +{ + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_lvb_count), gl,); + + lock_on_glock(gl); + + GFS_ASSERT_GLOCK(gfs_glock_is_held_excl(gl), gl,); + gl->gl_sbd->sd_lockstruct.ls_ops->lm_sync_lvb(gl->gl_lock, gl->gl_lvb); + + unlock_on_glock(gl); +} + +/** + * gfs_glock_cb - Callback used by locking module + * @fsdata: Pointer to the superblock + * @type: Type of callback + * @data: Type dependent data pointer + * + * Called by the locking module when it wants to tell us something. + * Either we need to drop a lock or another client expired. + */ + +void +gfs_glock_cb(lm_fsdata_t * fsdata, unsigned int type, void *data) +{ + struct gfs_sbd *sdp = (struct gfs_sbd *)fsdata; + struct gfs_glock *gl; + struct lm_lockname *name = NULL; + unsigned int state = 0; + struct lm_async_cb *async; + unsigned int journal; + + atomic_inc(&sdp->sd_lm_callbacks); + + switch (type) { + case LM_CB_NEED_E: + name = (struct lm_lockname *)data; + state = LM_ST_UNLOCKED; + break; + + case LM_CB_NEED_D: + name = (struct lm_lockname *)data; + state = LM_ST_DEFERRED; + break; + + case LM_CB_NEED_S: + name = (struct lm_lockname *)data; + state = LM_ST_SHARED; + break; + + case LM_CB_ASYNC: + async = (struct lm_async_cb *)data; + + gl = gfs_glock_find(sdp, &async->lc_name); + GFS_ASSERT_SBD(gl, sdp,); + GFS_ASSERT_GLOCK(gl->gl_req_bh, gl,); + gl->gl_req_bh(gl, async->lc_ret); + glock_put(gl); + + break; + + case LM_CB_NEED_RECOVERY: + journal = *(unsigned int *)data; + + gfs_add_dirty_j(sdp, journal); + + if (test_bit(SDF_RECOVERD_RUN, &sdp->sd_flags)) + wake_up_process(sdp->sd_recoverd_process); + + break; + + case LM_CB_DROPLOCKS: + gfs_gl_hash_clear(sdp, FALSE); + gfs_quota_scan(sdp); + break; + + default: + GFS_ASSERT_SBD(FALSE, sdp, + printk("type = %u\n", type);); + break; + } + + if (name) { + gl = gfs_glock_find(sdp, name); + if (gl) { + if (gl->gl_ops->go_callback) + gl->gl_ops->go_callback(gl, state); + handle_callback(gl, state); + spin_lock(&gl->gl_spin); + run_queue(gl, FALSE); + spin_unlock(&gl->gl_spin); + glock_put(gl); + } + } +} + +/** + * gfs_try_toss_inode - try to remove a particular inode from GFS' cache + * sdp: the filesystem + * inum: the inode number + * + */ + +void +gfs_try_toss_inode(struct gfs_sbd *sdp, struct gfs_inum *inum) +{ + struct gfs_glock *gl; + struct gfs_inode *ip; + int error; + + error = gfs_glock_get(sdp, + inum->no_formal_ino, &gfs_inode_glops, + NO_CREATE, &gl); + if (error || !gl) + return; + + if (!trylock_on_glock(gl)) + goto out; + + if (!queue_empty(gl, &gl->gl_holders)) + goto out_unlock; + + ip = gl2ip(gl); + if (!ip) + goto out_unlock; + + if (atomic_read(&ip->i_count)) + goto out_unlock; + + gfs_inode_destroy(ip); + + out_unlock: + unlock_on_glock(gl); + + out: + glock_put(gl); +} + +/** + * gfs_iopen_go_callback - Try to kick the inode/vnode associated with an iopen glock from memory + * @io_gl: the iopen glock + * @state: the state into which the glock should be put + * + */ + +void +gfs_iopen_go_callback(struct gfs_glock *io_gl, unsigned int state) +{ + struct gfs_glock *i_gl; + struct gfs_inode *ip; + + if (state != LM_ST_UNLOCKED) + return; + + spin_lock(&io_gl->gl_spin); + i_gl = gl2gl(io_gl); + if (i_gl) { + glock_hold(i_gl); + spin_unlock(&io_gl->gl_spin); + } else { + spin_unlock(&io_gl->gl_spin); + return; + } + + if (trylock_on_glock(i_gl)) { + if (queue_empty(i_gl, &i_gl->gl_holders)) { + ip = gl2ip(i_gl); + if (ip) { + gfs_try_toss_vnode(ip); + unlock_on_glock(i_gl); + gfs_glock_schedule_for_reclaim(i_gl); + goto out; + } + } + unlock_on_glock(i_gl); + } + + out: + glock_put(i_gl); +} + +/** + * demote_ok - check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int +demote_ok(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_glock_operations *glops = gl->gl_ops; + int demote = TRUE; + + if (test_bit(GLF_STICKY, &gl->gl_flags)) + demote = FALSE; + else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) + demote = time_after_eq(jiffies, + gl->gl_stamp + + sdp->sd_tune.gt_prefetch_secs * HZ); + else if (glops->go_demote_ok) + demote = glops->go_demote_ok(gl); + + return demote; +} + +/** + * gfs_glock_schedule_for_reclaim - Add a glock to the reclaim list + * @gl: the glock + * + */ + +void +gfs_glock_schedule_for_reclaim(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + + spin_lock(&sdp->sd_reclaim_lock); + if (list_empty(&gl->gl_reclaim)) { + glock_hold(gl); + list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); + atomic_inc(&sdp->sd_reclaim_count); + } + spin_unlock(&sdp->sd_reclaim_lock); + + wake_up(&sdp->sd_reclaim_wchan); +} + +/** + * gfs_reclaim_glock - process an glock on the reclaim list + * @sdp: the filesystem + * + */ + +void +gfs_reclaim_glock(struct gfs_sbd *sdp) +{ + struct gfs_glock *gl; + struct gfs_gl_hash_bucket *bucket; + + spin_lock(&sdp->sd_reclaim_lock); + + if (list_empty(&sdp->sd_reclaim_list)) { + spin_unlock(&sdp->sd_reclaim_lock); + return; + } + + gl = list_entry(sdp->sd_reclaim_list.next, + struct gfs_glock, gl_reclaim); + list_del_init(&gl->gl_reclaim); + + spin_unlock(&sdp->sd_reclaim_lock); + + atomic_dec(&sdp->sd_reclaim_count); + atomic_inc(&sdp->sd_reclaimed); + + if (trylock_on_glock(gl)) { + if (queue_empty(gl, &gl->gl_holders)) { + if (gl->gl_ops == &gfs_inode_glops) { + struct gfs_inode *ip = gl2ip(gl); + if (ip && !atomic_read(&ip->i_count)) + gfs_inode_destroy(ip); + } + if (gl->gl_state != LM_ST_UNLOCKED && + demote_ok(gl)) + handle_callback(gl, LM_ST_UNLOCKED); + } + unlock_on_glock(gl); + } + + bucket = gl->gl_bucket; + + write_lock(&bucket->hb_lock); + if (atomic_read(&gl->gl_count) == 1) { + list_del_init(&gl->gl_list); + write_unlock(&bucket->hb_lock); + glock_free(gl); + } else { + write_unlock(&bucket->hb_lock); + glock_put(gl); + } +} + +/** + * examine_bucket - Call a function for glock in a hash bucket + * @examiner: the function + * @sdp: the filesystem + * @bucket: the bucket + * + * Returns: TRUE if the bucket is has entries + */ + +static int +examine_bucket(glock_examiner examiner, + struct gfs_sbd *sdp, struct gfs_gl_hash_bucket *bucket) +{ + struct glock_plug plug; + struct list_head *tmp; + struct gfs_glock *gl; + int entries; + + memset(&plug.gl_flags, 0, sizeof(unsigned long)); + set_bit(GLF_PLUG, &plug.gl_flags); + + write_lock(&bucket->hb_lock); + list_add(&plug.gl_list, &bucket->hb_list); + write_unlock(&bucket->hb_lock); + + for (;;) { + write_lock(&bucket->hb_lock); + + for (;;) { + tmp = plug.gl_list.next; + if (tmp == &bucket->hb_list) { + list_del(&plug.gl_list); + entries = !list_empty(&bucket->hb_list); + write_unlock(&bucket->hb_lock); + return entries; + } + gl = list_entry(tmp, struct gfs_glock, gl_list); + + list_move(&plug.gl_list, &gl->gl_list); + + if (test_bit(GLF_PLUG, &gl->gl_flags)) + continue; + + glock_hold(gl); + + break; + } + + write_unlock(&bucket->hb_lock); + + examiner(gl); + } +} + +/** + * scan_glock - lock at a glock and see if we can do stuff to it + * @gl: the glock to look at + * + */ + +static void +scan_glock(struct gfs_glock *gl) +{ + if (trylock_on_glock(gl)) { + if (queue_empty(gl, &gl->gl_holders)) { + if (gl->gl_ops == &gfs_inode_glops) { + struct gfs_inode *ip = gl2ip(gl); + if (ip && !atomic_read(&ip->i_count)) { + unlock_on_glock(gl); + gfs_glock_schedule_for_reclaim(gl); + goto out; + } + } + if (gl->gl_state != LM_ST_UNLOCKED && + demote_ok(gl)) { + unlock_on_glock(gl); + gfs_glock_schedule_for_reclaim(gl); + goto out; + } + } + + unlock_on_glock(gl); + } + + out: + glock_put(gl); +} + +/** + * gfs_scand_internal - Look for glocks and inodes to toss from memory + * @sdp: the filesystem + * + */ + +void +gfs_scand_internal(struct gfs_sbd *sdp) +{ + unsigned int x; + + for (x = 0; x < GFS_GL_HASH_SIZE; x++) { + examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]); + cond_resched(); + } +} + +/** + * clear_glock - lock at a glock and see if we can do stuff to it + * @gl: the glock to look at + * @timeout: demote locks left unused for longer than this many seconds + * + */ + +static void +clear_glock(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_gl_hash_bucket *bucket = gl->gl_bucket; + + spin_lock(&sdp->sd_reclaim_lock); + if (!list_empty(&gl->gl_reclaim)) { + list_del_init(&gl->gl_reclaim); + atomic_dec(&sdp->sd_reclaim_count); + glock_put(gl); + } + spin_unlock(&sdp->sd_reclaim_lock); + + if (trylock_on_glock(gl)) { + if (queue_empty(gl, &gl->gl_holders)) { + if (gl->gl_ops == &gfs_inode_glops) { + struct gfs_inode *ip = gl2ip(gl); + if (ip && !atomic_read(&ip->i_count)) + gfs_inode_destroy(ip); + } + if (gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED); + } + + unlock_on_glock(gl); + } + + write_lock(&bucket->hb_lock); + if (atomic_read(&gl->gl_count) == 1) { + list_del_init(&gl->gl_list); + write_unlock(&bucket->hb_lock); + glock_free(gl); + } else { + write_unlock(&bucket->hb_lock); + glock_put(gl); + } +} + +/** + * gfs_gl_hash_clear - Empty out the glock hash table + * @sdp: the filesystem + * @wait: wait until it's all gone + * + */ + +void +gfs_gl_hash_clear(struct gfs_sbd *sdp, int wait) +{ + unsigned long t; + unsigned int x; + int cont; + + t = jiffies; + + for (;;) { + cont = FALSE; + + for (x = 0; x < GFS_GL_HASH_SIZE; x++) + if (examine_bucket(clear_glock, sdp, &sdp->sd_gl_hash[x])) + cont = TRUE; + + if (!wait || !cont) + break; + + if (time_after_eq(jiffies, t + sdp->sd_tune.gt_stall_secs * HZ)) { + printk("GFS: fsid=%s: Unmount seems to be stalled. Dumping lock state...\n", + sdp->sd_fsname); + gfs_dump_lockstate(sdp, NULL); + t = jiffies; + } + + invalidate_inodes(sdp->sd_vfs); + yield(); + } +} + +/* + * Diagnostic routines to help debug distributed deadlock + */ + +/** + * dump_holder - print information about a glock holder + * @str: a string naming the type of holder + * @gh: the glock holder + * @buf: the buffer + * @size: the size of the buffer + * @count: where we are in the buffer + * + * Returns: 0 on success, -ENOBUFS when we run out of space + */ + +static int +dump_holder(char *str, struct gfs_holder *gh, + char *buf, unsigned int size, unsigned int *count) +{ + unsigned int x; + int error = 0; + + gfs_sprintf(" %s\n", str); + gfs_sprintf(" owner = %ld\n", + (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); + gfs_sprintf(" gh_state = %u\n", gh->gh_state); + gfs_sprintf(" gh_flags ="); + for (x = 0; x < 32; x++) + if (gh->gh_flags & (1 << x)) + gfs_sprintf(" %u", x); + gfs_sprintf(" \n"); + gfs_sprintf(" error = %d\n", gh->gh_error); + gfs_sprintf(" gh_iflags ="); + for (x = 0; x < 32; x++) + if (test_bit(x, &gh->gh_iflags)) + gfs_sprintf(" %u", x); + gfs_sprintf(" \n"); + + out: + return error; +} + +/** + * dump_inode - print information about an inode + * @ip: the inode + * @buf: the buffer + * @size: the size of the buffer + * @count: where we are in the buffer + * + * Returns: 0 on success, -ENOBUFS when we run out of space + */ + +static int +dump_inode(struct gfs_inode *ip, + char *buf, unsigned int size, unsigned int *count) +{ + unsigned int x; + int error = 0; + + gfs_sprintf(" Inode:\n"); + gfs_sprintf(" num = %" PRIu64 "/%" PRIu64 "\n", + ip->i_num.no_formal_ino, ip->i_num.no_addr); + gfs_sprintf(" type = %u\n", ip->i_di.di_type); + gfs_sprintf(" i_count = %d\n", atomic_read(&ip->i_count)); + gfs_sprintf(" i_flags ="); + for (x = 0; x < 32; x++) + if (test_bit(x, &ip->i_flags)) + gfs_sprintf(" %u", x); + gfs_sprintf(" \n"); + gfs_sprintf(" vnode = %s\n", (ip->i_vnode) ? "yes" : "no"); + + out: + return error; +} + +/** + * dump_glock - print information about a glock + * @gl: the glock + * @buf: the buffer + * @size: the size of the buffer + * @count: where we are in the buffer + * + * Returns: 0 on success, -ENOBUFS when we run out of space + */ + +static int +dump_glock(struct gfs_glock *gl, + char *buf, unsigned int size, unsigned int *count) +{ + struct list_head *head, *tmp; + struct gfs_holder *gh; + unsigned int x; + int error = 0; + + spin_lock(&gl->gl_spin); + + gfs_sprintf("Glock (%u, %" PRIu64 ")\n", + gl->gl_name.ln_type, + gl->gl_name.ln_number); + gfs_sprintf(" gl_flags ="); + for (x = 0; x < 32; x++) + if (test_bit(x, &gl->gl_flags)) + gfs_sprintf(" %u", x); + gfs_sprintf(" \n"); + gfs_sprintf(" gl_count = %d\n", atomic_read(&gl->gl_count)); + gfs_sprintf(" gl_state = %u\n", gl->gl_state); + gfs_sprintf(" lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); + gfs_sprintf(" object = %s\n", (gl->gl_object) ? "yes" : "no"); + if (gl->gl_aspace) + gfs_sprintf(" aspace = %lu\n", + gl->gl_aspace->i_mapping->nrpages); + else + gfs_sprintf(" aspace = no\n"); + gfs_sprintf(" reclaim = %s\n", + (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); + if (gl->gl_req_gh) { + error = dump_holder("Request", gl->gl_req_gh, buf, size, count); + if (error) + goto out; + } + for (head = &gl->gl_holders, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gh = list_entry(tmp, struct gfs_holder, gh_list); + error = dump_holder("Holder", gh, buf, size, count); + if (error) + goto out; + } + for (head = &gl->gl_waiters1, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gh = list_entry(tmp, struct gfs_holder, gh_list); + error = dump_holder("Waiter1", gh, buf, size, count); + if (error) + goto out; + } + for (head = &gl->gl_waiters2, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gh = list_entry(tmp, struct gfs_holder, gh_list); + error = dump_holder("Waiter2", gh, buf, size, count); + if (error) + goto out; + } + if (gl->gl_ops == &gfs_inode_glops && gl2ip(gl)) { + if (!test_bit(GLF_LOCK, &gl->gl_flags) && + list_empty(&gl->gl_holders)) { + error = dump_inode(gl2ip(gl), buf, size, count); + if (error) + goto out; + } else + gfs_sprintf(" Inode: busy\n"); + } + + out: + spin_unlock(&gl->gl_spin); + + return error; +} + +/** + * gfs_dump_lockstate - print out the current lockstate + * @sdp: the filesystem + * @ub: the buffer to copy the information into + * + * If @ub is NULL, dump the lockstate to the console. + * + */ + +int +gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub) +{ + struct gfs_gl_hash_bucket *bucket; + struct list_head *tmp, *head; + struct gfs_glock *gl; + char *buf = NULL; + unsigned int size = sdp->sd_tune.gt_lockdump_size; + unsigned int x, count; + int error = 0; + + if (ub) { + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + + for (x = 0; x < GFS_GL_HASH_SIZE; x++) { + bucket = &sdp->sd_gl_hash[x]; + count = 0; + + read_lock(&bucket->hb_lock); + + for (head = &bucket->hb_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gl = list_entry(tmp, struct gfs_glock, gl_list); + + if (test_bit(GLF_PLUG, &gl->gl_flags)) + continue; + + error = dump_glock(gl, buf, size, &count); + if (error) + break; + } + + read_unlock(&bucket->hb_lock); + + if (error) + break; + + if (ub) { + if (ub->ub_count + count > ub->ub_size) { + error = -ENOMEM; + break; + } + if (copy_to_user(ub->ub_data + ub->ub_count, buf, count)) { + error = -EFAULT; + break; + } + ub->ub_count += count; + } + } + + if (ub) + kfree(buf); + + return error; +} diff -urN linux-orig/fs/gfs/glock.h linux-patched/fs/gfs/glock.h --- linux-orig/fs/gfs/glock.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/glock.h 2004-06-30 13:27:49.342711362 -0500 @@ -0,0 +1,134 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __GFS_GLOCK_DOT_H__ +#define __GFS_GLOCK_DOT_H__ + +/* +#define LM_FLAG_TRY (0x00000001) +#define LM_FLAG_TRY_1CB (0x00000002) +#define LM_FLAG_NOEXP (0x00000004) +#define LM_FLAG_ANY (0x00000008) +#define LM_FLAG_PRIORITY (0x00000010) +*/ +#define GL_LOCAL_EXCL (0x00000020) +#define GL_ASYNC (0x00000040) +#define GL_EXACT (0x00000080) +#define GL_SKIP (0x00000100) +#define GL_ATIME (0x00000200) +#define GL_NOCACHE (0x00000400) +#define GL_SYNC (0x00000800) + +#define GLR_TRYFAILED (13) +#define GLR_CANCELED (14) + +static __inline__ int +gfs_glock_is_locked_by_me(struct gfs_glock *gl) +{ + struct list_head *tmp, *head; + struct gfs_holder *gh; + int locked = FALSE; + + spin_lock(&gl->gl_spin); + for (head = &gl->gl_holders, tmp = head->next; + tmp != head; + tmp = tmp->next) { + gh = list_entry(tmp, struct gfs_holder, gh_list); + if (gh->gh_owner == current) { + locked = TRUE; + break; + } + } + spin_unlock(&gl->gl_spin); + + return locked; +} +static __inline__ int +gfs_glock_is_held_excl(struct gfs_glock *gl) +{ + return (gl->gl_state == LM_ST_EXCLUSIVE); +} +static __inline__ int +gfs_glock_is_held_dfrd(struct gfs_glock *gl) +{ + return (gl->gl_state == LM_ST_DEFERRED); +} +static __inline__ int +gfs_glock_is_held_shrd(struct gfs_glock *gl) +{ + return (gl->gl_state == LM_ST_SHARED); +} + +#define GFS_ASYNC_LM(sdp) ((sdp)->sd_lockstruct.ls_flags & LM_LSFLAG_ASYNC) + +struct gfs_glock *gfs_glock_find(struct gfs_sbd *sdp, + struct lm_lockname *name); +int gfs_glock_get(struct gfs_sbd *sdp, + uint64_t number, struct gfs_glock_operations *glops, + int create, struct gfs_glock **glp); +void gfs_glock_hold(struct gfs_glock *gl); +void gfs_glock_put(struct gfs_glock *gl); + +void gfs_holder_init(struct gfs_glock *gl, unsigned int state, int flags, + struct gfs_holder *gh); +void gfs_holder_reinit(unsigned int state, int flags, struct gfs_holder *gh); +void gfs_holder_uninit(struct gfs_holder *gh); +struct gfs_holder *gfs_holder_get(struct gfs_glock *gl, unsigned int state, + int flags); +void gfs_holder_put(struct gfs_holder *gh); + +void gfs_glock_xmote_th(struct gfs_glock *gl, unsigned int state, int flags); +void gfs_glock_drop_th(struct gfs_glock *gl); + +int gfs_glock_nq(struct gfs_holder *gh); +int gfs_glock_poll(struct gfs_holder *gh); +int gfs_glock_wait(struct gfs_holder *gh); +void gfs_glock_dq(struct gfs_holder *gh); + +void gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags); +void gfs_glock_force_drop(struct gfs_glock *gl); + +int gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags, + struct gfs_holder *gh); +void gfs_glock_dq_uninit(struct gfs_holder *gh); +int gfs_glock_nq_num(struct gfs_sbd *sdp, + uint64_t number, struct gfs_glock_operations *glops, + unsigned int state, int flags, struct gfs_holder *gh); + +int gfs_glock_nq_m(unsigned int num_gh, struct gfs_holder *ghs); +void gfs_glock_dq_m(unsigned int num_gh, struct gfs_holder *ghs); + +void gfs_glock_prefetch_num(struct gfs_sbd *sdp, + uint64_t number, struct gfs_glock_operations *glops, + unsigned int state, int flags); + +/* Lock Value Block functions */ + +int gfs_lvb_hold(struct gfs_glock *gl); +void gfs_lvb_unhold(struct gfs_glock *gl); +void gfs_lvb_sync(struct gfs_glock *gl); + +void gfs_glock_cb(lm_fsdata_t * fsdata, unsigned int type, void *data); + +void gfs_try_toss_inode(struct gfs_sbd *sdp, struct gfs_inum *inum); +void gfs_iopen_go_callback(struct gfs_glock *gl, unsigned int state); + +void gfs_glock_schedule_for_reclaim(struct gfs_glock *gl); +void gfs_reclaim_glock(struct gfs_sbd *sdp); + +void gfs_scand_internal(struct gfs_sbd *sdp); +void gfs_gl_hash_clear(struct gfs_sbd *sdp, int wait); + +int gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub); + +#endif /* __GFS_GLOCK_DOT_H__ */ diff -urN linux-orig/fs/gfs/glops.c linux-patched/fs/gfs/glops.c --- linux-orig/fs/gfs/glops.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/glops.c 2004-06-30 13:27:49.342711362 -0500 @@ -0,0 +1,526 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "page.h" +#include "recovery.h" +#include "rgrp.h" + +/** + * meta_go_sync - sync out the metadata for this glock + * @gl: the glock + * @flags: DIO_* + * + */ + +static void +meta_go_sync(struct gfs_glock *gl, int flags) +{ + if (!(flags & DIO_METADATA)) + return; + + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { + gfs_log_flush_glock(gl); + gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK); + } + + clear_bit(GLF_DIRTY, &gl->gl_flags); + clear_bit(GLF_SYNC, &gl->gl_flags); +} + +/** + * meta_go_inval - invalidate the metadata for this glock + * @gl: the glock + * @flags: + * + */ + +static void +meta_go_inval(struct gfs_glock *gl, int flags) +{ + if (!(flags & DIO_METADATA)) + return; + + gfs_inval_buf(gl); + gl->gl_vn++; +} + +/** + * meta_go_demote_ok - check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int +meta_go_demote_ok(struct gfs_glock *gl) +{ + return (gl->gl_aspace->i_mapping->nrpages) ? FALSE : TRUE; +} + +/** + * inode_go_xmote_th - promote/demote a glock + * @gl: the glock + * @state: the requested state + * @flags: the flags passed into gfs_glock() + * + */ + +static void +inode_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags) +{ + if (gl->gl_state != LM_ST_UNLOCKED) + gfs_inval_pte(gl); + gfs_glock_xmote_th(gl, state, flags); +} + +/** + * inode_go_xmote_bh - promote/demote a glock + * @gl: the glock + * + * This will be really broken when (no_formal_ino != no_addr) + * + */ + +static void +inode_go_xmote_bh(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_holder *gh = gl->gl_req_gh; + struct buffer_head *bh; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + (!gh || !(gh->gh_flags & GL_SKIP))) { + error = gfs_dread(sdp, gl->gl_name.ln_number, gl, DIO_START, &bh); + if (!error) + brelse(bh); + } +} + +/** + * inode_go_drop_th - unlock a glock + * @gl: the glock + * + */ + +static void +inode_go_drop_th(struct gfs_glock *gl) +{ + gfs_inval_pte(gl); + gfs_glock_drop_th(gl); +} + +/** + * inode_go_sync - Sync the dirty data for a inode glock + * @gl: the glock + * @flags: + * + */ + +static void +inode_go_sync(struct gfs_glock *gl, int flags) +{ + int meta = (flags & DIO_METADATA); + int data = (flags & DIO_DATA); + + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { + if (meta && data) { + gfs_sync_page(gl, flags | DIO_START); + gfs_log_flush_glock(gl); + gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK); + gfs_sync_page(gl, flags | DIO_WAIT | DIO_CHECK); + } else if (meta) { + gfs_log_flush_glock(gl); + gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK); + } else if (data) + gfs_sync_page(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK); + } + + if (meta && data) { + if (!(flags & DIO_INVISIBLE)) + clear_bit(GLF_DIRTY, &gl->gl_flags); + clear_bit(GLF_SYNC, &gl->gl_flags); + } +} + +/** + * inode_go_inval - prepare a inode glock to be released + * @gl: the glock + * @flags: + * + */ + +static void +inode_go_inval(struct gfs_glock *gl, int flags) +{ + int meta = (flags & DIO_METADATA); + int data = (flags & DIO_DATA); + + if (meta) { + gfs_inval_buf(gl); + gl->gl_vn++; + } + if (data) + gfs_inval_page(gl); +} + +/** + * inode_go_demote_ok - check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int +inode_go_demote_ok(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + int demote = FALSE; + + if (!gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages) + demote = TRUE; + else if (!sdp->sd_args.ar_localcaching && + time_after_eq(jiffies, gl->gl_stamp + sdp->sd_tune.gt_demote_secs * HZ)) + demote = TRUE; + + return demote; +} + +/** + * inode_go_lock - operation done after an inode lock is locked by a process + * @gl: the glock + * @flags: the flags passed into gfs_glock() + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +inode_go_lock(struct gfs_glock *gl, int flags) +{ + struct gfs_inode *ip = gl2ip(gl); + int error = 0; + + if (ip && ip->i_vn != gl->gl_vn) { + error = gfs_copyin_dinode(ip); + if (!error) + gfs_inode_attr_in(ip); + } + + return error; +} + +/** + * inode_go_unlock - operation done before an inode lock is unlocked by a process + * @gl: the glock + * @flags: the flags passed into gfs_gunlock() + * + */ + +static void +inode_go_unlock(struct gfs_glock *gl, int flags) +{ + struct gfs_inode *ip = gl2ip(gl); + + if (ip && test_bit(GLF_DIRTY, &gl->gl_flags)) + gfs_inode_attr_in(ip); + + if (ip) + gfs_flush_meta_cache(ip); +} + +/** + * rgrp_go_xmote_th - promote/demote a glock + * @gl: the glock + * @state: the requested state + * @flags: the flags passed into gfs_glock() + * + */ + +static void +rgrp_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags) +{ + struct gfs_rgrpd *rgd = gl2rgd(gl); + + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,); + + gfs_mhc_zap(rgd); + gfs_depend_sync(rgd); + gfs_glock_xmote_th(gl, state, flags); +} + +/** + * rgrp_go_drop_th - unlock a glock + * @gl: the glock + * + */ + +static void +rgrp_go_drop_th(struct gfs_glock *gl) +{ + struct gfs_rgrpd *rgd = gl2rgd(gl); + + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,); + + gfs_mhc_zap(rgd); + gfs_depend_sync(rgd); + gfs_glock_drop_th(gl); +} + +/** + * rgrp_go_demote_ok - check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int +rgrp_go_demote_ok(struct gfs_glock *gl) +{ + struct gfs_rgrpd *rgd = gl2rgd(gl); + int demote = TRUE; + + if (gl->gl_aspace->i_mapping->nrpages) + demote = FALSE; + else if (rgd && !list_empty(&rgd->rd_mhc)) /* Don't bother with lock here */ + demote = FALSE; + + return demote; +} + +/** + * rgrp_go_lock - operation done after an rgrp lock is locked by a process + * @gl: the glock + * @flags: the flags passed into gfs_glock() + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +rgrp_go_lock(struct gfs_glock *gl, int flags) +{ + struct gfs_rgrpd *rgd = gl2rgd(gl); + int error = 0; + + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,); + + if (!(flags & GL_SKIP)) + error = gfs_rgrp_read(rgd); + + return error; +} + +/** + * rgrp_go_unlock - operation done before an rgrp lock is unlocked by a process + * @gl: the glock + * @flags: the flags passed into gfs_gunlock() + * + */ + +static void +rgrp_go_unlock(struct gfs_glock *gl, int flags) +{ + struct gfs_rgrpd *rgd = gl2rgd(gl); + + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,); + + if (!(flags & GL_SKIP)) { + gfs_rgrp_relse(rgd); + if (test_bit(GLF_DIRTY, &gl->gl_flags)) + gfs_rgrp_lvb_fill(rgd); + } +} + +/** + * trans_go_xmote_th - promote/demote a metadata glock + * @gl: the glock + * @state: the requested state + * @flags: the flags passed into gfs_glock() + * + */ + +static void +trans_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs_sync_meta(sdp); + + error = gfs_log_shutdown(sdp); + if (error) + gfs_io_error(sdp); + } + + gfs_glock_xmote_th(gl, state, flags); +} + +/** + * trans_go_xmote_bh - promote/demote a metadata glock + * @gl: the glock + * + */ + +static void +trans_go_xmote_bh(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_glock *j_gl = sdp->sd_journal_gh.gh_gl; + struct gfs_log_header head; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + + error = gfs_find_jhead(sdp, &sdp->sd_jdesc, j_gl, &head); + GFS_ASSERT_SBD(!error, sdp,); /* FixMe!!! */ + GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,); + + /* Initialize some head of the log stuff */ + sdp->sd_sequence = head.lh_sequence; + sdp->sd_log_head = head.lh_first + 1; + } +} + +/** + * trans_go_drop_th - prepare the transaction glock to be released + * @gl: the glock + * + * We want to sync the device even with localcaching. Remember + * that localcaching journal replay only marks buffers dirty. + */ + +static void +trans_go_drop_th(struct gfs_glock *gl) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + int error; + + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs_sync_meta(sdp); + + error = gfs_log_shutdown(sdp); + if (error) + gfs_io_error(sdp); + } + + gfs_glock_drop_th(gl); +} + +/** + * nondisk_go_demote_ok - check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int +nondisk_go_demote_ok(struct gfs_glock *gl) +{ + return FALSE; +} + +/** + * quota_go_demote_ok - check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int +quota_go_demote_ok(struct gfs_glock *gl) +{ + return !atomic_read(&gl->gl_lvb_count); +} + +struct gfs_glock_operations gfs_meta_glops = { + .go_xmote_th = gfs_glock_xmote_th, + .go_drop_th = gfs_glock_drop_th, + .go_sync = meta_go_sync, + .go_inval = meta_go_inval, + .go_demote_ok = meta_go_demote_ok, + .go_type = LM_TYPE_META +}; + +struct gfs_glock_operations gfs_inode_glops = { + .go_xmote_th = inode_go_xmote_th, + .go_xmote_bh = inode_go_xmote_bh, + .go_drop_th = inode_go_drop_th, + .go_sync = inode_go_sync, + .go_inval = inode_go_inval, + .go_demote_ok = inode_go_demote_ok, + .go_lock = inode_go_lock, + .go_unlock = inode_go_unlock, + .go_type = LM_TYPE_INODE +}; + +struct gfs_glock_operations gfs_rgrp_glops = { + .go_xmote_th = rgrp_go_xmote_th, + .go_drop_th = rgrp_go_drop_th, + .go_sync = meta_go_sync, + .go_inval = meta_go_inval, + .go_demote_ok = rgrp_go_demote_ok, + .go_lock = rgrp_go_lock, + .go_unlock = rgrp_go_unlock, + .go_type = LM_TYPE_RGRP +}; + +struct gfs_glock_operations gfs_trans_glops = { + .go_xmote_th = trans_go_xmote_th, + .go_xmote_bh = trans_go_xmote_bh, + .go_drop_th = trans_go_drop_th, + .go_type = LM_TYPE_NONDISK +}; + +struct gfs_glock_operations gfs_iopen_glops = { + .go_xmote_th = gfs_glock_xmote_th, + .go_drop_th = gfs_glock_drop_th, + .go_callback = gfs_iopen_go_callback, + .go_type = LM_TYPE_IOPEN +}; + +struct gfs_glock_operations gfs_flock_glops = { + .go_xmote_th = gfs_glock_xmote_th, + .go_drop_th = gfs_glock_drop_th, + .go_type = LM_TYPE_FLOCK +}; + +struct gfs_glock_operations gfs_nondisk_glops = { + .go_xmote_th = gfs_glock_xmote_th, + .go_drop_th = gfs_glock_drop_th, + .go_demote_ok = nondisk_go_demote_ok, + .go_type = LM_TYPE_NONDISK +}; + +struct gfs_glock_operations gfs_quota_glops = { + .go_xmote_th = gfs_glock_xmote_th, + .go_drop_th = gfs_glock_drop_th, + .go_demote_ok = quota_go_demote_ok, + .go_type = LM_TYPE_QUOTA +}; diff -urN linux-orig/fs/gfs/glops.h linux-patched/fs/gfs/glops.h --- linux-orig/fs/gfs/glops.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/glops.h 2004-06-30 13:27:49.343711130 -0500 @@ -0,0 +1,26 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __GLOPS_DOT_H__ +#define __GLOPS_DOT_H__ + +extern struct gfs_glock_operations gfs_meta_glops; +extern struct gfs_glock_operations gfs_inode_glops; +extern struct gfs_glock_operations gfs_rgrp_glops; +extern struct gfs_glock_operations gfs_trans_glops; +extern struct gfs_glock_operations gfs_iopen_glops; +extern struct gfs_glock_operations gfs_flock_glops; +extern struct gfs_glock_operations gfs_nondisk_glops; +extern struct gfs_glock_operations gfs_quota_glops; + +#endif /* __GLOPS_DOT_H__ */ diff -urN linux-orig/fs/gfs/incore.h linux-patched/fs/gfs/incore.h --- linux-orig/fs/gfs/incore.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/incore.h 2004-06-30 13:27:49.343711130 -0500 @@ -0,0 +1,726 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __INCORE_DOT_H__ +#define __INCORE_DOT_H__ + +#define DIO_NEW (0x00000001) +#define DIO_FORCE (0x00000002) +#define DIO_CLEAN (0x00000004) +#define DIO_DIRTY (0x00000008) +#define DIO_START (0x00000010) +#define DIO_WAIT (0x00000020) +#define DIO_METADATA (0x00000040) +#define DIO_DATA (0x00000080) +#define DIO_INVISIBLE (0x00000100) +#define DIO_CHECK (0x00000200) +#define DIO_ALL (0x00000400) + +/* Structure prototypes */ + +struct gfs_log_operations; +struct gfs_log_element; +struct gfs_meta_header_cache; +struct gfs_depend; +struct gfs_bitmap; +struct gfs_rgrpd; +struct gfs_bufdata; +struct gfs_glock_operations; +struct gfs_holder; +struct gfs_glock; +struct gfs_alloc; +struct gfs_inode; +struct gfs_file; +struct gfs_unlinked; +struct gfs_quota_le; +struct gfs_quota_data; +struct gfs_log_buf; +struct gfs_trans; +struct gfs_gl_hash_bucket; +struct gfs_sbd; + +typedef void (*gfs_glop_bh_t) (struct gfs_glock * gl, unsigned int ret); + +/* + * Structure of operations that are associated with each + * type of element in the log. + */ + +struct gfs_log_operations { + /* Operations specific to a given log element */ + + void (*lo_add) (struct gfs_sbd * sdp, struct gfs_log_element * le); + void (*lo_trans_end) (struct gfs_sbd * sdp, + struct gfs_log_element * le); + void (*lo_print) (struct gfs_sbd * sdp, struct gfs_log_element * le, + unsigned int where); + struct gfs_trans *(*lo_overlap_trans) (struct gfs_sbd * sdp, + struct gfs_log_element * le); + void (*lo_incore_commit) (struct gfs_sbd * sdp, struct gfs_trans * tr, + struct gfs_log_element * le); + void (*lo_add_to_ail) (struct gfs_sbd * sdp, + struct gfs_log_element * le); + void (*lo_clean_dump) (struct gfs_sbd * sdp, + struct gfs_log_element * le); + + /* Operations specific to a class of log elements */ + + void (*lo_trans_size) (struct gfs_sbd * sdp, struct gfs_trans * tr, + unsigned int *mblks, unsigned int *eblks, + unsigned int *blocks, unsigned int *bmem); + void (*lo_trans_combine) (struct gfs_sbd * sdp, struct gfs_trans * tr, + struct gfs_trans * new_tr); + void (*lo_build_bhlist) (struct gfs_sbd * sdp, struct gfs_trans * tr); + void (*lo_dump_size) (struct gfs_sbd * sdp, unsigned int *elements, + unsigned int *blocks, unsigned int *bmem); + void (*lo_build_dump) (struct gfs_sbd * sdp, struct gfs_trans * tr); + + /* Operations that happen at recovery time */ + + void (*lo_before_scan) (struct gfs_sbd * sdp, unsigned int jid, + struct gfs_log_header * head, + unsigned int pass); + int (*lo_scan_elements) (struct gfs_sbd * sdp, + struct gfs_jindex * jdesc, + struct gfs_glock * gl, uint64_t start, + struct gfs_log_descriptor * desc, + unsigned int pass); + void (*lo_after_scan) (struct gfs_sbd * sdp, unsigned int jid, + unsigned int pass); + + char *lo_name; +}; + +/* + * Structure that gets added to struct gfs_trans->tr_elements. They + * make up the "stuff" in each transaction. + */ + +struct gfs_log_element { + struct gfs_log_operations *le_ops; + + struct gfs_trans *le_trans; + struct list_head le_list; +}; + +struct gfs_meta_header_cache { + struct list_head mc_list_hash; + struct list_head mc_list_single; + struct list_head mc_list_rgd; + + uint64_t mc_block; + struct gfs_meta_header mc_mh; +}; + +struct gfs_depend { + struct list_head gd_list_hash; + struct list_head gd_list_rgd; + + struct gfs_rgrpd *gd_rgd; + uint64_t gd_formal_ino; + unsigned long gd_time; +}; + +/* + * Structure containing information about the allocation bitmaps. + * There are one of these for each fs block that the bitmap for + * the resource group header covers. + */ + +struct gfs_bitmap { + uint32_t bi_offset; /* The offset in the buffer of the first byte */ + uint32_t bi_start; /* The position of the first byte in this block */ + uint32_t bi_len; /* The number of bytes in this block */ +}; + +/* + * Structure containing information Resource Groups + */ + +struct gfs_rgrpd { + struct list_head rd_list; /* Link with superblock */ + struct list_head rd_list_mru; + struct list_head rd_recent; /* Recently used rgrps */ + + struct gfs_glock *rd_gl; /* Glock for rgrp */ + + unsigned long rd_flags; + + struct gfs_rindex rd_ri; /* Resource Index structure */ + struct gfs_rgrp rd_rg; /* Resource Group structure */ + uint64_t rd_rg_vn; + + struct gfs_bitmap *rd_bits; + struct buffer_head **rd_bh; + + uint32_t rd_last_alloc_data; + uint32_t rd_last_alloc_meta; + + struct list_head rd_mhc; + struct list_head rd_depend; + + struct gfs_sbd *rd_sbd; +}; + +/* + * Per-buffer data + */ + +struct gfs_bufdata { + struct buffer_head *bd_bh; /* struct buffer_head which this struct belongs to */ + struct gfs_glock *bd_gl; /* Pointer to Glock struct for this bh */ + + struct gfs_log_element bd_new_le; + struct gfs_log_element bd_incore_le; + + char *bd_frozen; + struct semaphore bd_lock; + + unsigned int bd_pinned; /* Pin count */ + struct list_head bd_ail_tr_list; /* List of buffers hanging off tr_ail_bufs */ + struct list_head bd_ail_gl_list; /* List of buffers hanging off gl_ail_bufs */ +}; + +/* + * Glock operations + */ + +struct gfs_glock_operations { + void (*go_xmote_th) (struct gfs_glock * gl, unsigned int state, + int flags); + void (*go_xmote_bh) (struct gfs_glock * gl); + void (*go_drop_th) (struct gfs_glock * gl); + void (*go_drop_bh) (struct gfs_glock * gl); + void (*go_sync) (struct gfs_glock * gl, int flags); + void (*go_inval) (struct gfs_glock * gl, int flags); + int (*go_demote_ok) (struct gfs_glock * gl); + int (*go_lock) (struct gfs_glock * gl, int flags); + void (*go_unlock) (struct gfs_glock * gl, int flags); + void (*go_callback) (struct gfs_glock * gl, unsigned int state); + int go_type; +}; + +/* Actions */ +#define HIF_MUTEX (0) +#define HIF_PROMOTE (1) +#define HIF_DEMOTE (2) + +/* States */ +#define HIF_ALLOCED (3) +#define HIF_DEALLOC (4) +#define HIF_HOLDER (5) +#define HIF_FIRST (6) +#define HIF_WAKEUP (7) +#define HIF_RECURSE (8) + +struct gfs_holder { + struct list_head gh_list; + + struct gfs_glock *gh_gl; + struct task_struct *gh_owner; + unsigned int gh_state; + int gh_flags; + + int gh_error; + unsigned long gh_iflags; + struct completion gh_wait; +}; + +/* + * Glock Structure + */ + +#define GLF_PLUG (0) +#define GLF_LOCK (1) +#define GLF_STICKY (2) +#define GLF_PREFETCH (3) +#define GLF_SYNC (4) +#define GLF_DIRTY (5) +#define GLF_LVB_INVALID (6) + +struct gfs_glock { + struct list_head gl_list; + unsigned long gl_flags; + struct lm_lockname gl_name; + atomic_t gl_count; + + spinlock_t gl_spin; + + unsigned int gl_state; + struct list_head gl_holders; + struct list_head gl_waiters1; /* HIF_MUTEX */ + struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_PROMOTE */ + + struct gfs_glock_operations *gl_ops; + + struct gfs_holder *gl_req_gh; + gfs_glop_bh_t gl_req_bh; + + lm_lock_t *gl_lock; + char *gl_lvb; + atomic_t gl_lvb_count; + + uint64_t gl_vn; + unsigned long gl_stamp; + void *gl_object; + + struct gfs_log_element gl_new_le; + struct gfs_log_element gl_incore_le; + + struct gfs_gl_hash_bucket *gl_bucket; + struct list_head gl_reclaim; + + struct gfs_sbd *gl_sbd; + + struct inode *gl_aspace; + struct list_head gl_dirty_buffers; + struct list_head gl_ail_bufs; +}; + +/* + * In-Place Reservation structure + */ + +struct gfs_alloc { + /* Quota stuff */ + + unsigned int al_qd_num; + struct gfs_quota_data *al_qd[4]; + struct gfs_holder al_qd_ghs[4]; + + /* Filled in by the caller to gfs_inplace_reserve() */ + + uint32_t al_requested_di; + uint32_t al_requested_meta; + uint32_t al_requested_data; + + /* Filled in by gfs_inplace_reserve() */ + + char *al_file; + unsigned int al_line; + struct gfs_holder al_ri_gh; + struct gfs_holder al_rgd_gh; + struct gfs_rgrpd *al_rgd; + uint32_t al_reserved_meta; + uint32_t al_reserved_data; + + /* Filled in by gfs_blkalloc() */ + + uint32_t al_alloced_di; + uint32_t al_alloced_meta; + uint32_t al_alloced_data; + + /* Dinode allocation crap */ + + struct gfs_unlinked *al_ul; +}; + +/* + * Incore inode structure + */ + +#define GIF_QD_LOCKED (0) +#define GIF_PAGED (1) +#define GIF_SW_PAGED (2) + +struct gfs_inode { + struct gfs_inum i_num; + + atomic_t i_count; + unsigned long i_flags; + + uint64_t i_vn; + struct gfs_dinode i_di; + + struct gfs_glock *i_gl; + struct gfs_sbd *i_sbd; + struct inode *i_vnode; + + struct gfs_holder i_iopen_gh; + + struct gfs_alloc *i_alloc; + uint64_t i_last_rg_alloc; + + struct task_struct *i_creat_task; + pid_t i_creat_pid; + + spinlock_t i_lock; + struct buffer_head *i_cache[GFS_MAX_META_HEIGHT]; +}; + +/* + * GFS per-fd structure + */ + +#define GFF_DID_DIRECT_ALLOC (0) + +struct gfs_file { + unsigned long f_flags; + + struct semaphore f_fl_lock; + struct gfs_holder f_fl_gh; + + struct gfs_inode *f_inode; + struct file *f_vfile; +}; + +/* + * Unlinked inode log entry + */ + +#define ULF_NEW_UL (0) +#define ULF_INCORE_UL (1) +#define ULF_IC_LIST (2) +#define ULF_OD_LIST (3) +#define ULF_LOCK (4) + +struct gfs_unlinked { + struct list_head ul_list; + unsigned int ul_count; + + struct gfs_inum ul_inum; + unsigned long ul_flags; + + struct gfs_log_element ul_new_le; + struct gfs_log_element ul_incore_le; + struct gfs_log_element ul_ondisk_le; +}; + +/* + * Quota log element + */ + +struct gfs_quota_le { + struct gfs_log_element ql_le; + + struct gfs_quota_data *ql_data; + struct list_head ql_data_list; + + int64_t ql_change; +}; + +#define QDF_USER (0) +#define QDF_OD_LIST (1) +#define QDF_LOCK (2) + +struct gfs_quota_data { + struct list_head qd_list; + unsigned int qd_count; + + uint32_t qd_id; + unsigned long qd_flags; + + struct list_head qd_le_list; + + int64_t qd_change_new; + int64_t qd_change_ic; + int64_t qd_change_od; + int64_t qd_change_sync; + + struct gfs_quota_le qd_ondisk_ql; + uint64_t qd_sync_gen; + + struct gfs_glock *qd_gl; + struct gfs_quota_lvb qd_qb; + + unsigned long qd_last_warn; +}; + +struct gfs_log_buf { + struct list_head lb_list; + + struct buffer_head lb_bh; + struct buffer_head *lb_unlock; +}; + +/* + * Transaction structures + */ + +#define TRF_LOG_DUMP (0x00000001) + +struct gfs_trans { + struct list_head tr_list; + + /* Initial creation stuff */ + + char *tr_file; + unsigned int tr_line; + + unsigned int tr_mblks_asked; /* Number of log blocks asked to be reserved */ + unsigned int tr_eblks_asked; + unsigned int tr_seg_reserved; /* Number of segments reserved */ + + struct gfs_holder *tr_t_gh; + + /* Stuff filled in during creation */ + + unsigned int tr_flags; + struct list_head tr_elements; + + /* Stuff modified during the commit */ + + unsigned int tr_num_free_bufs; + struct list_head tr_free_bufs; + unsigned int tr_num_free_bmem; + struct list_head tr_free_bmem; + + uint64_t tr_log_head; /* The current log head */ + uint64_t tr_first_head; /* First header block */ + + struct list_head tr_bufs; /* List of buffers going to the log */ + + /* Stuff that's part of the AIL */ + + struct list_head tr_ail_bufs; + + /* Private data for different log element types */ + + unsigned int tr_num_gl; + unsigned int tr_num_buf; + unsigned int tr_num_iul; + unsigned int tr_num_ida; + unsigned int tr_num_q; +}; + +/* + * One bucket of the glock hash table. + */ + +struct gfs_gl_hash_bucket { + rwlock_t hb_lock; + struct list_head hb_list; +} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); + +/* + * Super Block Data Structure (One per filesystem) + */ + +#define SDF_JOURNAL_LIVE (0) +#define SDF_SCAND_RUN (1) +#define SDF_GLOCKD_RUN (2) +#define SDF_RECOVERD_RUN (3) +#define SDF_LOGD_RUN (4) +#define SDF_QUOTAD_RUN (5) +#define SDF_INODED_RUN (6) +#define SDF_NOATIME (7) +#define SDF_ROFS (8) +#define SDF_NEED_LOG_DUMP (9) +#define SDF_FOUND_UL_DUMP (10) +#define SDF_FOUND_Q_DUMP (11) +#define SDF_IN_LOG_DUMP (12) + +#define GFS_GL_HASH_SHIFT (13) +#define GFS_GL_HASH_SIZE (1 << GFS_GL_HASH_SHIFT) +#define GFS_GL_HASH_MASK (GFS_GL_HASH_SIZE - 1) + +#define GFS_MHC_HASH_SHIFT (10) +#define GFS_MHC_HASH_SIZE (1 << GFS_MHC_HASH_SHIFT) +#define GFS_MHC_HASH_MASK (GFS_MHC_HASH_SIZE - 1) + +#define GFS_DEPEND_HASH_SHIFT (10) +#define GFS_DEPEND_HASH_SIZE (1 << GFS_DEPEND_HASH_SHIFT) +#define GFS_DEPEND_HASH_MASK (GFS_DEPEND_HASH_SIZE - 1) + +struct gfs_sbd { + struct gfs_sb sd_sb; /* Super Block */ + + struct super_block *sd_vfs; /* FS's device independent sb */ + + struct gfs_args sd_args; + unsigned long sd_flags; + + struct gfs_tune sd_tune; /* FS tuning structure */ + + /* Resource group stuff */ + + struct gfs_inode *sd_riinode; /* rindex inode */ + uint64_t sd_riinode_vn; /* Version number of the resource index inode */ + + struct list_head sd_rglist; /* List of resource groups */ + struct semaphore sd_rindex_lock; + + struct list_head sd_rg_mru_list; /* List of resource groups in MRU order */ + spinlock_t sd_rg_mru_lock; /* Lock for MRU list */ + struct list_head sd_rg_recent; /* Recently used rgrps */ + spinlock_t sd_rg_recent_lock; + struct gfs_rgrpd *sd_rg_forward; /* Next new rgrp to try for allocation */ + spinlock_t sd_rg_forward_lock; + + unsigned int sd_rgcount; /* Count of resource groups */ + + /* Constants computed on mount */ + + uint32_t sd_fsb2bb; + uint32_t sd_fsb2bb_shift; /* Shift FS Block numbers to the left by + this to get buffer cache blocks */ + uint32_t sd_diptrs; /* Number of pointers in a dinode */ + uint32_t sd_inptrs; /* Number of pointers in a indirect block */ + uint32_t sd_jbsize; /* Size of a journaled data block */ + uint32_t sd_hash_bsize; /* sizeof(exhash block) */ + uint32_t sd_hash_bsize_shift; + uint32_t sd_hash_ptrs; /* Number of points in a hash block */ + uint32_t sd_max_dirres; /* Maximum space needed to add a directory entry */ + uint32_t sd_max_height; /* Maximum height of a file's metadata tree */ + uint64_t sd_heightsize[GFS_MAX_META_HEIGHT]; + uint32_t sd_max_jheight; /* Maximum height of a journaled file's metadata tree */ + uint64_t sd_jheightsize[GFS_MAX_META_HEIGHT]; + + /* Lock Stuff */ + + struct gfs_gl_hash_bucket sd_gl_hash[GFS_GL_HASH_SIZE]; + + struct list_head sd_reclaim_list; + spinlock_t sd_reclaim_lock; + wait_queue_head_t sd_reclaim_wchan; + atomic_t sd_reclaim_count; + + struct lm_lockstruct sd_lockstruct; + + struct list_head sd_mhc[GFS_MHC_HASH_SIZE]; + struct list_head sd_mhc_single; + spinlock_t sd_mhc_lock; + atomic_t sd_mhc_count; + + struct list_head sd_depend[GFS_DEPEND_HASH_SIZE]; + spinlock_t sd_depend_lock; + atomic_t sd_depend_count; + + struct gfs_holder sd_live_gh; + + struct gfs_holder sd_freeze_gh; + struct semaphore sd_freeze_lock; + unsigned int sd_freeze_count; + + /* Inode Stuff */ + + struct gfs_inode *sd_rooti; /* FS's root inode */ + + struct gfs_glock *sd_rename_gl; /* rename glock */ + + /* Daemon stuff */ + + struct task_struct *sd_scand_process; + unsigned int sd_glockd_num; + struct task_struct *sd_recoverd_process; + struct task_struct *sd_logd_process; + struct task_struct *sd_quotad_process; + struct task_struct *sd_inoded_process; + + struct semaphore sd_thread_lock; + struct completion sd_thread_completion; + + /* Log stuff */ + + struct gfs_glock *sd_trans_gl; /* transaction glock */ + + struct gfs_inode *sd_jiinode; /* jindex inode */ + uint64_t sd_jiinode_vn; /* Version number of the journal index inode */ + + unsigned int sd_journals; /* Number of journals in the FS */ + struct gfs_jindex *sd_jindex; /* Array of Jindex structures describing this FS's journals */ + struct semaphore sd_jindex_lock; + unsigned long sd_jindex_refresh_time; + + struct gfs_jindex sd_jdesc; /* Jindex structure describing this machine's journal */ + struct gfs_holder sd_journal_gh; /* the glock for this machine's journal */ + + uint64_t sd_sequence; /* Assigned to xactions in order they commit */ + uint64_t sd_log_head; /* Block number of next journal write */ + uint64_t sd_log_wrap; + + spinlock_t sd_log_seg_lock; + unsigned int sd_log_seg_free; /* Free segments in the log */ + struct list_head sd_log_seg_list; + wait_queue_head_t sd_log_seg_wait; + + struct list_head sd_log_ail; /* struct gfs_trans structures that form the Active Items List + "next" is the head, "prev" is the tail */ + + struct list_head sd_log_incore; /* transactions that have been commited incore (but not ondisk) + "next" is the newest, "prev" is the oldest */ + unsigned int sd_log_buffers; /* Number of buffers in the incore log */ + + struct semaphore sd_log_lock; /* Lock for access to log values */ + + uint64_t sd_log_dump_last; + uint64_t sd_log_dump_last_wrap; + + /* unlinked crap */ + + struct list_head sd_unlinked_list; + spinlock_t sd_unlinked_lock; + + atomic_t sd_unlinked_ic_count; + atomic_t sd_unlinked_od_count; + + /* quota crap */ + + struct list_head sd_quota_list; + spinlock_t sd_quota_lock; + + atomic_t sd_quota_count; + atomic_t sd_quota_od_count; + + struct gfs_inode *sd_qinode; + + uint64_t sd_quota_sync_gen; + unsigned long sd_quota_sync_time; + + /* license crap */ + + struct gfs_inode *sd_linode; + + /* Recovery stuff */ + + struct list_head sd_dirty_j; + spinlock_t sd_dirty_j_lock; + + unsigned int sd_recovery_replays; + unsigned int sd_recovery_skips; + unsigned int sd_recovery_sames; + + /* Counters */ + + atomic_t sd_glock_count; + atomic_t sd_glock_held_count; + atomic_t sd_inode_count; + atomic_t sd_bufdata_count; + atomic_t sd_fh2dentry_misses; + atomic_t sd_reclaimed; + atomic_t sd_glock_nq_calls; + atomic_t sd_glock_dq_calls; + atomic_t sd_glock_prefetch_calls; + atomic_t sd_lm_lock_calls; + atomic_t sd_lm_unlock_calls; + atomic_t sd_lm_callbacks; + atomic_t sd_ops_address; + atomic_t sd_ops_dentry; + atomic_t sd_ops_export; + atomic_t sd_ops_file; + atomic_t sd_ops_inode; + atomic_t sd_ops_super; + atomic_t sd_ops_vm; + + char sd_fsname[256]; + + /* Debugging crud */ + + unsigned long sd_last_readdirplus; + unsigned long sd_last_unlocked_aop; + + spinlock_t sd_ail_lock; + struct list_head sd_recovery_bufs; +}; + +#endif /* __INCORE_DOT_H__ */ diff -urN linux-orig/fs/gfs/inode.c linux-patched/fs/gfs/inode.c --- linux-orig/fs/gfs/inode.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/inode.c 2004-06-30 13:27:49.343711130 -0500 @@ -0,0 +1,1993 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "acl.h" +#include "bmap.h" +#include "dio.h" +#include "dir.h" +#include "eattr.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "ops_address.h" +#include "ops_file.h" +#include "ops_inode.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "unlinked.h" + +/** + * inode_attr_in - Copy attributes from the dinode into the VFS inode + * @ip: The GFS inode + * + */ + +static void +inode_attr_in(struct gfs_inode *ip, struct inode *ino) +{ + unsigned int mode; + + ino->i_ino = ip->i_num.no_formal_ino; + + switch (ip->i_di.di_type) { + case GFS_FILE_REG: + mode = S_IFREG; + ino->i_rdev = 0; + break; + case GFS_FILE_DIR: + mode = S_IFDIR; + ino->i_rdev = 0; + break; + case GFS_FILE_LNK: + mode = S_IFLNK; + ino->i_rdev = 0; + break; + case GFS_FILE_BLK: + mode = S_IFBLK; + ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor); + break; + case GFS_FILE_CHR: + mode = S_IFCHR; + ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor); + break; + case GFS_FILE_FIFO: + mode = S_IFIFO; + ino->i_rdev = 0; + break; + case GFS_FILE_SOCK: + mode = S_IFSOCK; + ino->i_rdev = 0; + break; + default: + GFS_ASSERT_INODE(FALSE, ip, + printk("type = %u\n", ip->i_di.di_type);); + break; + }; + + ino->i_mode = mode | (ip->i_di.di_mode & S_IALLUGO); + ino->i_nlink = ip->i_di.di_nlink; + ino->i_uid = ip->i_di.di_uid; + ino->i_gid = ip->i_di.di_gid; + i_size_write(ino, ip->i_di.di_size); + ino->i_atime.tv_sec = ip->i_di.di_atime; + ino->i_mtime.tv_sec = ip->i_di.di_mtime; + ino->i_ctime.tv_sec = ip->i_di.di_ctime; + ino->i_atime.tv_nsec = ino->i_mtime.tv_nsec = ino->i_ctime.tv_nsec = 0; + ino->i_blksize = PAGE_SIZE; + ino->i_blocks = ip->i_di.di_blocks << + (ip->i_sbd->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT); + ino->i_generation = ip->i_di.di_header.mh_incarn; +} + +/** + * gfs_inode_attr_in - Copy attributes from the dinode into the VFS inode + * @ip: The GFS inode + * + */ + +void +gfs_inode_attr_in(struct gfs_inode *ip) +{ + struct inode *inode; + + inode = gfs_iget(ip, NO_CREATE); + if (inode) { + inode_attr_in(ip, inode); + iput(inode); + } + +} + +/** + * gfs_inode_attr_out - Copy attributes from VFS inode into the dinode + * @ip: The GFS inode + * + * Only copy out the attributes that we want the VFS layer + * to be able to modify. + */ + +void +gfs_inode_attr_out(struct gfs_inode *ip) +{ + struct inode *inode; + + inode = gfs_iget(ip, NO_CREATE); + if (inode) { + ip->i_di.di_mode = inode->i_mode & S_IALLUGO; + ip->i_di.di_uid = inode->i_uid; + ip->i_di.di_gid = inode->i_gid; + ip->i_di.di_atime = inode->i_atime.tv_sec; + ip->i_di.di_mtime = inode->i_mtime.tv_sec; + ip->i_di.di_ctime = inode->i_ctime.tv_sec; + iput(inode); + } +} + +/** + * gfs_iget - Get/Create a struct inode for a struct gfs_inode + * @ip: the struct gfs_inode to get the struct inode for + * + * Returns: An inode + */ + +struct inode * +gfs_iget(struct gfs_inode *ip, int create) +{ + struct inode *inode = NULL, *tmp; + + spin_lock(&ip->i_lock); + if (ip->i_vnode) + inode = igrab(ip->i_vnode); + spin_unlock(&ip->i_lock); + + if (inode || !create) + return inode; + + tmp = new_inode(ip->i_sbd->sd_vfs); + if (!tmp) + return NULL; + + inode_attr_in(ip, tmp); + + if (ip->i_di.di_type == GFS_FILE_REG) { + tmp->i_op = &gfs_file_iops; + tmp->i_fop = &gfs_file_fops; + tmp->i_mapping->a_ops = &gfs_file_aops; + } else if (ip->i_di.di_type == GFS_FILE_DIR) { + tmp->i_op = &gfs_dir_iops; + tmp->i_fop = &gfs_dir_fops; + } else if (ip->i_di.di_type == GFS_FILE_LNK) { + tmp->i_op = &gfs_symlink_iops; + } else { + tmp->i_op = &gfs_dev_iops; + init_special_inode(tmp, tmp->i_mode, tmp->i_rdev); + } + + vn2ip(tmp) = NULL; + + for (;;) { + spin_lock(&ip->i_lock); + if (!ip->i_vnode) + break; + inode = igrab(ip->i_vnode); + spin_unlock(&ip->i_lock); + + if (inode) { + iput(tmp); + return inode; + } + yield(); + } + + inode = tmp; + + gfs_inode_hold(ip); + ip->i_vnode = inode; + vn2ip(inode) = ip; + + spin_unlock(&ip->i_lock); + + insert_inode_hash(inode); + + return inode; +} + +/** + * gfs_copyin_dinode - Refresh the incore copy of the dinode + * @ip: The GFS inode + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_copyin_dinode(struct gfs_inode *ip) +{ + struct buffer_head *dibh; + int error; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + return error; + + gfs_metatype_check(ip->i_sbd, dibh, GFS_METATYPE_DI); + gfs_dinode_in(&ip->i_di, dibh->b_data); + + brelse(dibh); + + GFS_ASSERT_INODE(ip->i_num.no_formal_ino == + ip->i_di.di_num.no_formal_ino, ip, + gfs_dinode_print(&ip->i_di);); + + /* Handle a moved inode */ + + if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { + /* Not implemented yet */ + GFS_ASSERT_INODE(FALSE, ip,); + } + + ip->i_vn = ip->i_gl->gl_vn; + + return 0; +} + +/** + * inode_create - create a struct gfs_inode + * @i_gl: The glock covering the inode + * @inum: The inode number + * @io_gl: the iopen glock, or NULL + * @io_state: the state the iopen glock should be acquire in + * @ipp: pointer to put the returned inode in + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +inode_create(struct gfs_glock *i_gl, struct gfs_inum *inum, + struct gfs_glock *io_gl, unsigned int io_state, + struct gfs_inode **ipp) +{ + struct gfs_sbd *sdp = i_gl->gl_sbd; + struct gfs_inode *ip; + int error = 0; + + RETRY_MALLOC(ip = kmem_cache_alloc(gfs_inode_cachep, GFP_KERNEL), ip); + memset(ip, 0, sizeof(struct gfs_inode)); + + ip->i_num = *inum; + + atomic_set(&ip->i_count, 1); + + ip->i_gl = i_gl; + ip->i_sbd = sdp; + + spin_lock_init(&ip->i_lock); + + error = gfs_glock_nq_init(io_gl, + io_state, GL_LOCAL_EXCL | GL_EXACT, + &ip->i_iopen_gh); + if (error) + goto fail; + + ip->i_iopen_gh.gh_owner = NULL; + + spin_lock(&io_gl->gl_spin); + gfs_glock_hold(i_gl); + gl2gl(io_gl) = i_gl; + spin_unlock(&io_gl->gl_spin); + + error = gfs_copyin_dinode(ip); + if (error) + goto fail_iopen; + + gfs_glock_hold(i_gl); + gl2ip(i_gl) = ip; + + atomic_inc(&sdp->sd_inode_count); + + *ipp = ip; + + return 0; + + fail_iopen: + spin_lock(&io_gl->gl_spin); + gl2gl(io_gl) = NULL; + gfs_glock_put(i_gl); + spin_unlock(&io_gl->gl_spin); + + gfs_glock_dq_uninit(&ip->i_iopen_gh); + + fail: + gfs_flush_meta_cache(ip); + kmem_cache_free(gfs_inode_cachep, ip); + *ipp = NULL; + + return error; +} + +/** + * gfs_inode_get - Get an inode given its number + * @i_gl: The glock covering the inode + * @inum: The inode number + * @create: Flag to say if we are allowed to create a new struct gfs_inode + * @ipp: pointer to put the returned inode in + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_inode_get(struct gfs_glock *i_gl, struct gfs_inum *inum, int create, + struct gfs_inode **ipp) +{ + struct gfs_glock *io_gl; + int error = 0; + + *ipp = gl2ip(i_gl); + if (*ipp) { + atomic_inc(&(*ipp)->i_count); + GFS_ASSERT_INODE((*ipp)->i_num.no_formal_ino == + inum->no_formal_ino, + (*ipp),); + } else if (create) { + error = gfs_glock_get(i_gl->gl_sbd, + inum->no_addr, &gfs_iopen_glops, + CREATE, &io_gl); + if (!error) { + error = inode_create(i_gl, inum, io_gl, + LM_ST_SHARED, ipp); + gfs_glock_put(io_gl); + } + } + + return error; +} + +/** + * gfs_inode_hold - hold a struct gfs_inode structure + * @ip: The GFS inode + * + */ + +void +gfs_inode_hold(struct gfs_inode *ip) +{ + GFS_ASSERT_INODE(atomic_read(&ip->i_count), ip,); + atomic_inc(&ip->i_count); +} + +/** + * gfs_inode_put - put a struct gfs_inode structure + * @ip: The GFS inode + * + */ + +void +gfs_inode_put(struct gfs_inode *ip) +{ + atomic_dec(&ip->i_count); + GFS_ASSERT_INODE(atomic_read(&ip->i_count) >= 0, ip,); +} + +/** + * gfs_inode_destroy - Destroy an inode structure with no references on it + * @ip: The GFS inode + * + * This function must be called with a glock held on the inode. + * + */ + +void +gfs_inode_destroy(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_glock *io_gl = ip->i_iopen_gh.gh_gl; + struct gfs_glock *i_gl = ip->i_gl; + + GFS_ASSERT_INODE(!atomic_read(&ip->i_count), ip,); + GFS_ASSERT_INODE(gl2gl(io_gl) == i_gl, ip,); + + spin_lock(&io_gl->gl_spin); + gl2gl(io_gl) = NULL; + gfs_glock_put(i_gl); + spin_unlock(&io_gl->gl_spin); + + gfs_glock_dq_uninit(&ip->i_iopen_gh); + + gfs_flush_meta_cache(ip); + kmem_cache_free(gfs_inode_cachep, ip); + + gl2ip(i_gl) = NULL; + gfs_glock_put(i_gl); + + atomic_dec(&sdp->sd_inode_count); +} + +/** + * dinode_mark_unused - + * @ip: + * + * Returns: errno + */ + +static int +dinode_mark_unused(struct gfs_inode *ip) +{ + struct buffer_head *dibh; + struct gfs_dinode *di; + uint32_t incarn; + uint64_t ctime; + uint32_t flags; + int error; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + return error; + + di = (struct gfs_dinode *)dibh->b_data; + + gfs_trans_add_bh(ip->i_gl, dibh); + + incarn = gfs32_to_cpu(di->di_header.mh_incarn) + 1; + di->di_header.mh_incarn = cpu_to_gfs32(incarn); + + ctime = get_seconds(); + di->di_ctime = cpu_to_gfs64(ctime); + + flags = (gfs32_to_cpu(di->di_flags)) | GFS_DIF_UNUSED; + di->di_flags = cpu_to_gfs32(flags); + + brelse(dibh); + + return 0; +} + +/** + * dinode_dealloc - Put deallocate a dinode + * @ip: The GFS inode + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +dinode_dealloc(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_rgrpd *rgd; + struct gfs_holder ri_gh, rgd_gh; + int error; + + gfs_alloc_get(ip); + + error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + goto fail_qs; + + rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr); + GFS_ASSERT_INODE(rgd, ip,); + + error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (error) + goto fail_rindex_relse; + + GFS_ASSERT_INODE(ip->i_di.di_blocks == 1, ip, + gfs_dinode_print(&ip->i_di);); + + /* Trans may require: + One block for the RG header. + One block for the dinode bit. + One block for the dinode. + We also need a block for the unlinked change. + One block for the quota change. */ + + error = gfs_trans_begin(sdp, 3, 2); + if (error) + goto fail_rg_gunlock; + + error = dinode_mark_unused(ip); + if (error) + goto fail_end_trans; + + gfs_difree(rgd, ip); + + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, &ip->i_num); + clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); + + gfs_trans_end(sdp); + + gfs_glock_dq_uninit(&rgd_gh); + gfs_glock_dq_uninit(&ri_gh); + + gfs_quota_unhold_m(ip); + gfs_alloc_put(ip); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_rg_gunlock: + gfs_glock_dq_uninit(&rgd_gh); + + fail_rindex_relse: + gfs_glock_dq_uninit(&ri_gh); + + fail_qs: + gfs_quota_unhold_m(ip); + + fail: + gfs_alloc_put(ip); + + return error; +} + +/** + * inode_dealloc - Deallocate an inode + * @sdp: the filesystem + * @inum: the inode number to deallocate + * @io_gh: a holder for the iopen glock for this inode + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum, + struct gfs_holder *io_gh) +{ + struct gfs_inode *ip; + struct gfs_holder i_gh; + int error; + + error = gfs_glock_nq_num(sdp, + inum->no_formal_ino, &gfs_inode_glops, + LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + /* We reacquire the iopen lock here to avoid a race with the NFS server + calling gfs_read_inode() with the inode number of a inode we're in the + process of deallocating. And we can't keep our hold on the lock + from try_dealloc_inode() for deadlock reasons. */ + + gfs_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh); + error = gfs_glock_nq(io_gh); + switch (error) { + case 0: + break; + case GLR_TRYFAILED: + error = 0; + goto fail; + default: + GFS_ASSERT_SBD(error < 0, sdp,); + goto fail; + } + + GFS_ASSERT_GLOCK(!gl2ip(i_gh.gh_gl), i_gh.gh_gl,); + error = inode_create(i_gh.gh_gl, inum, io_gh->gh_gl, LM_ST_EXCLUSIVE, + &ip); + + gfs_glock_dq(io_gh); + + if (error) + goto fail; + + GFS_ASSERT_INODE(!ip->i_di.di_nlink, ip, + gfs_dinode_print(&ip->i_di);); + GFS_ASSERT_INODE(atomic_read(&ip->i_count) == 1, ip,); + GFS_ASSERT_INODE(!ip->i_vnode, ip,); + + if (ip->i_di.di_type == GFS_FILE_DIR && + (ip->i_di.di_flags & GFS_DIF_EXHASH)) { + error = gfs_dir_exhash_free(ip); + if (error) + goto fail_iput; + } + + if (ip->i_di.di_eattr) { + error = gfs_ea_dealloc(ip); + if (error) + goto fail_iput; + } + + error = gfs_shrink(ip, 0, NULL); + if (error) + goto fail_iput; + + error = dinode_dealloc(ip); + if (error) + goto fail_iput; + + gfs_inode_put(ip); + gfs_inode_destroy(ip); + + gfs_glock_dq_uninit(&i_gh); + + return 0; + + fail_iput: + gfs_inode_put(ip); + gfs_inode_destroy(ip); + + fail: + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * inode_dealloc_init - Try to deallocate an inode and all its blocks + * @sdp: the filesystem + * + * Returns: 0 on success, -errno on error, 1 on busy + */ + +static int +inode_dealloc_init(struct gfs_sbd *sdp, struct gfs_inum *inum) +{ + struct gfs_holder io_gh; + int error = 0; + + gfs_try_toss_inode(sdp, inum); + + error = gfs_glock_nq_num(sdp, + inum->no_addr, &gfs_iopen_glops, + LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh); + switch (error) { + case 0: + break; + case GLR_TRYFAILED: + return 1; + default: + GFS_ASSERT_SBD(error < 0, sdp,); + return error; + } + + gfs_glock_dq(&io_gh); + error = inode_dealloc(sdp, inum, &io_gh); + gfs_holder_uninit(&io_gh); + + return error; +} + +/** + * inode_dealloc_uninit - dealloc an uninitialized inode + * @sdp: the filesystem + * + * Returns: 0 on success, -errno on error, 1 on busy + */ + +static int +inode_dealloc_uninit(struct gfs_sbd *sdp, struct gfs_inum *inum) +{ + struct gfs_rgrpd *rgd; + struct gfs_holder ri_gh, rgd_gh; + int error; + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + rgd = gfs_blk2rgrpd(sdp, inum->no_addr); + GFS_ASSERT_SBD(rgd, sdp,); + + error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (error) + goto fail; + + /* Trans may require: + One block for the RG header. + One block for the dinode bit. + We also need a block for the unlinked change. */ + + error = gfs_trans_begin(sdp, 2, 1); + if (error) + goto fail_gunlock; + + gfs_difree_uninit(rgd, inum->no_addr); + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, inum); + + gfs_trans_end(sdp); + + gfs_glock_dq_uninit(&rgd_gh); + gfs_glock_dq_uninit(&ri_gh); + + return 0; + + fail_gunlock: + gfs_glock_dq_uninit(&rgd_gh); + + fail: + gfs_glock_dq_uninit(&ri_gh); + + return error; +} + +/** + * gfs_inode_dealloc - Grab an unlinked inode off the list and try to free it. + * @sdp: the filesystem + * + * Returns: 0 on success, -errno on error, 1 on busy + */ + +int +gfs_inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum) +{ + if (inum->no_formal_ino) + return inode_dealloc_init(sdp, inum); + else + return inode_dealloc_uninit(sdp, inum); +} + +/** + * gfs_change_nlink - Change nlink count on inode + * @ip: The GFS inode + * @diff: The change in the nlink count required + * + * Returns: 0 on success, -EXXXX on failure. + */ + +int +gfs_change_nlink(struct gfs_inode *ip, int diff) +{ + struct buffer_head *dibh; + uint32_t nlink; + int error; + + nlink = ip->i_di.di_nlink + diff; + + if (diff < 0) + GFS_ASSERT_INODE(nlink < ip->i_di.di_nlink, ip, + gfs_dinode_print(&ip->i_di);); + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + return error; + + ip->i_di.di_nlink = nlink; + ip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * gfs_lookupi - Look up a filename in a directory and return its inode + * @d_gh: An initialized holder for the directory glock + * @name: The name of the inode to look for + * @is_root: If TRUE, ignore the caller's permissions + * @i_gh: An uninitialized holder for the new inode glock + * + * Returns: 0 on success, -EXXXX on failure + */ + +int +gfs_lookupi(struct gfs_holder *d_gh, struct qstr *name, + int is_root, struct gfs_holder *i_gh) +{ + struct gfs_inode *dip = gl2ip(d_gh->gh_gl); + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_glock *gl; + struct gfs_inode *ip; + struct gfs_inum inum, inum2; + unsigned int type; + int error; + + i_gh->gh_gl = NULL; + + if (!name->len || name->len > GFS_FNAMESIZE) + return -ENAMETOOLONG; + + if (gfs_filecmp(name, ".", 1)) { + gfs_holder_reinit(LM_ST_SHARED, 0, d_gh); + error = gfs_glock_nq(d_gh); + if (!error) { + error = gfs_glock_nq_init(dip->i_gl, + LM_ST_SHARED, 0, + i_gh); + GFS_ASSERT_INODE(!error, ip,); + gfs_inode_hold(dip); + } + + return error; + } + + if (gfs_glock_is_locked_by_me(d_gh->gh_gl)) + bitch_about(sdp, &sdp->sd_last_readdirplus, + "readdirplus-type behavior"); + + gfs_holder_reinit(LM_ST_SHARED, 0, d_gh); + error = gfs_glock_nq(d_gh); + if (error) + return error; + + if (!is_root) { + struct inode *dir = gfs_iget(dip, NO_CREATE); + if (dir) { + error = permission(dir, MAY_EXEC, NULL); + iput(dir); + if (error) { + gfs_glock_dq(d_gh); + return error; + } + } + } + + error = gfs_dir_search(dip, name, &inum, &type); + if (error) { + gfs_glock_dq(d_gh); + if (error == -ENOENT) + error = 0; + return error; + } + + restart: + error = gfs_glock_get(sdp, inum.no_formal_ino, &gfs_inode_glops, + CREATE, &gl); + if (error) { + gfs_glock_dq(d_gh); + return error; + } + + /* Acquire the second lock */ + + if (gl->gl_name.ln_number < dip->i_gl->gl_name.ln_number) { + gfs_glock_dq(d_gh); + + error = gfs_glock_nq_init(gl, LM_ST_SHARED, + LM_FLAG_ANY | GL_LOCAL_EXCL, + i_gh); + if (error) + goto out; + + gfs_holder_reinit(LM_ST_SHARED, 0, d_gh); + error = gfs_glock_nq(d_gh); + if (error) { + gfs_glock_dq_uninit(i_gh); + goto out; + } + + if (!is_root) { + struct inode *dir = gfs_iget(dip, NO_CREATE); + if (dir) { + error = permission(dir, MAY_EXEC, NULL); + iput(dir); + if (error) { + gfs_glock_dq(d_gh); + gfs_glock_dq_uninit(i_gh); + goto out; + } + } + } + + error = gfs_dir_search(dip, name, &inum2, &type); + if (error) { + gfs_glock_dq(d_gh); + gfs_glock_dq_uninit(i_gh); + if (error == -ENOENT) + error = 0; + goto out; + } + + if (!gfs_inum_equal(&inum, &inum2)) { + gfs_glock_dq_uninit(i_gh); + gfs_glock_put(gl); + inum = inum2; + goto restart; + } + } else { + error = gfs_glock_nq_init(gl, LM_ST_SHARED, + LM_FLAG_ANY | GL_LOCAL_EXCL, + i_gh); + if (error) { + gfs_glock_dq(d_gh); + goto out; + } + } + + error = gfs_inode_get(gl, &inum, CREATE, &ip); + if (error) { + gfs_glock_dq(d_gh); + gfs_glock_dq_uninit(i_gh); + } + GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,); + + out: + gfs_glock_put(gl); + + return error; +} + +/** + * create_ok - + * @dip: + * @name: + * @type: + * + * Returns: errno + */ + +static int +create_ok(struct gfs_inode *dip, struct qstr *name, unsigned int type) +{ + int error; + + { + struct inode *dir = gfs_iget(dip, NO_CREATE); + if (dir) { + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); + iput(dir); + if (error) + return error; + } + } + + /* Don't create entries in an unlinked directory */ + + if (!dip->i_di.di_nlink) + return -EPERM; + + error = gfs_dir_search(dip, name, NULL, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + return -EEXIST; + default: + return error; + } + + if (dip->i_di.di_entries == (uint32_t)-1) + return -EFBIG; + if (type == GFS_FILE_DIR && dip->i_di.di_nlink == (uint32_t)-1) + return -EMLINK; + + return 0; +} + +/** + * dinode_alloc - + * @dip: + * @ul: + * + * Returns: errno + */ + +static int +dinode_alloc(struct gfs_inode *dip, struct gfs_unlinked **ul) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_alloc *al; + struct gfs_inum inum; + int error; + + al = gfs_alloc_get(dip); + + al->al_requested_di = 1; + + error = gfs_inplace_reserve(dip); + if (error) + goto out; + + error = gfs_trans_begin(sdp, al->al_rgd->rd_ri.ri_length, 1); + if (error) + goto out_inplace; + + inum.no_formal_ino = 0; + error = gfs_dialloc(dip, &inum.no_addr); + if (error) + goto out_end_trans; + + *ul = gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &inum); + gfs_unlinked_lock(sdp, *ul); + + gfs_trans_add_gl(dip->i_gl); + + out_end_trans: + gfs_trans_end(sdp); + + out_inplace: + gfs_inplace_release(dip); + + out: + gfs_alloc_put(dip); + + return error; +} + +/** + * pick_formal_ino - Pick a formal inode number for a given inode + * @sdp: the filesystem + * @inum: the inode number structure + * + */ + +static void +pick_formal_ino(struct gfs_sbd *sdp, struct gfs_inum *inum) +{ + /* This won't always be true */ + inum->no_formal_ino = inum->no_addr; +} + +/** + * make_dinode - Fill in a new dinode structure + * @dip: the directory this inode is being created in + * @gl: The glock covering the new inode + * @inum: the inode number + * @type: the file type + * @mode: the file permissions + * @uid: + * @gid: + * + */ + +static int +make_dinode(struct gfs_inode *dip, + struct gfs_glock *gl, struct gfs_inum *inum, + unsigned int type, unsigned int mode, + unsigned int uid, unsigned int gid) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_dinode di; + struct buffer_head *dibh; + struct gfs_rgrpd *rgd; + int error; + + error = gfs_dread(sdp, inum->no_addr, gl, + DIO_NEW | DIO_START | DIO_WAIT, + &dibh); + if (error) + return error; + + gfs_trans_add_bh(gl, dibh); + gfs_metatype_set(sdp, dibh, GFS_METATYPE_DI, GFS_FORMAT_DI); + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode)); + + memset(&di, 0, sizeof(struct gfs_dinode)); + + gfs_meta_header_in(&di.di_header, dibh->b_data); + + di.di_num = *inum; + + di.di_mode = mode & S_IALLUGO; + di.di_uid = uid; + di.di_gid = gid; + di.di_nlink = 1; + di.di_blocks = 1; + di.di_atime = di.di_mtime = di.di_ctime = get_seconds(); + + rgd = gfs_blk2rgrpd(sdp, inum->no_addr); + GFS_ASSERT_SBD(rgd, sdp, + printk("block = %"PRIu64"\n", inum->no_addr);); + + di.di_rgrp = rgd->rd_ri.ri_addr; + di.di_goal_rgrp = di.di_rgrp; + di.di_goal_dblk = di.di_goal_mblk = inum->no_addr - rgd->rd_ri.ri_data1; + + if (type == GFS_FILE_REG) { + if ((dip->i_di.di_flags & GFS_DIF_INHERIT_JDATA) || + sdp->sd_tune.gt_new_files_jdata) + di.di_flags |= GFS_DIF_JDATA; + if ((dip->i_di.di_flags & GFS_DIF_INHERIT_DIRECTIO) || + sdp->sd_tune.gt_new_files_directio) + di.di_flags |= GFS_DIF_DIRECTIO; + } else if (type == GFS_FILE_DIR) { + di.di_flags |= (dip->i_di.di_flags & GFS_DIF_INHERIT_DIRECTIO); + di.di_flags |= (dip->i_di.di_flags & GFS_DIF_INHERIT_JDATA); + } + + di.di_type = type; + + gfs_dinode_out(&di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * inode_init_and_link - + * @dip: + * @name: + * @inum: + * @gl: + * @type: + * @mode: + * + * Returns: errno + */ + +static int +inode_init_and_link(struct gfs_inode *dip, struct qstr *name, + struct gfs_inum *inum, struct gfs_glock *gl, + unsigned int type, unsigned int mode) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct posix_acl *acl = NULL; + struct gfs_alloc *al; + struct gfs_inode *ip; + unsigned int gid; + int alloc_required; + int error; + + error = gfs_setup_new_acl(dip, type, &mode, &acl); + if (error) + return error; + + if (dip->i_di.di_mode & S_ISGID) { + if (type == GFS_FILE_DIR) + mode |= S_ISGID; + gid = dip->i_di.di_gid; + } + else + gid = current->fsgid; + + al = gfs_alloc_get(dip); + + error = gfs_quota_lock_m(dip, + current->fsuid, + gid); + if (error) + goto fail; + + error = gfs_quota_check(dip, current->fsuid, gid); + if (error) + goto fail_gunlock_q; + + if (acl) + alloc_required = TRUE; + else { + error = gfs_diradd_alloc_required(dip, name, &alloc_required); + if (error) + goto fail_gunlock_q; + } + + if (alloc_required) { + error = gfs_quota_check(dip, dip->i_di.di_uid, dip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested_meta = sdp->sd_max_dirres + GFS_MAX_EA_ACL_BLKS; + + error = gfs_inplace_reserve(dip); + if (error) + goto fail_gunlock_q; + + /* Trans may require: + blocks for two dinodes, the directory blocks necessary for + a new entry, RG bitmap blocks for an allocation, + and one block for a quota change and + one block for an unlinked tag. */ + + error = gfs_trans_begin(sdp, + 2 + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + GFS_MAX_EA_ACL_BLKS, 2); + if (error) + goto fail_inplace; + } else { + /* Trans may require: + blocks for two dinodes, a leaf block, + and one block for a quota change and + one block for an unlinked tag. */ + + error = gfs_trans_begin(sdp, 3, 2); + if (error) + goto fail_gunlock_q; + } + + error = gfs_dir_add(dip, name, inum, type); + if (error) + goto fail_end_trans; + + error = make_dinode(dip, gl, inum, type, mode, current->fsuid, gid); + if (error) + goto fail_end_trans; + + al->al_ul = gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, + &(struct gfs_inum){0, inum->no_addr}); + gfs_trans_add_quota(sdp, +1, current->fsuid, gid); + + /* Gfs_inode_get() can't fail here. But then again, it shouldn't be + here (it should be in gfs_createi()). Gfs_init_acl() has no + business needing a memory-resident inode. */ + + gfs_inode_get(gl, inum, CREATE, &ip); + + if (acl) { + error = gfs_init_acl(dip, ip, type, acl); + GFS_ASSERT(!error, ); /* Sigh. */ + } + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_inplace: + if (alloc_required) + gfs_inplace_release(dip); + + fail_gunlock_q: + gfs_quota_unlock_m(dip); + + fail: + gfs_alloc_put(dip); + if (acl) + posix_acl_release(acl); + + return error; +} + +/** + * gfs_createi - Create a new inode + * @d_gh: An initialized holder for the directory glock + * @name: The name of the new file + * @type: The type of dinode (GFS_FILE_REG, GFS_FILE_DIR, GFS_FILE_LNK, ...) + * @mode: the permissions on the new inode + * @i_gh: An uninitialized holder for the new inode glock + * + * If the return value is 0, the glocks on both the directory and the new + * file are held. A transaction has been started and an inplace reservation + * is held, as well. + * + * Returns: 0 on success, -EXXXX on failure + */ + +int +gfs_createi(struct gfs_holder *d_gh, struct qstr *name, + unsigned int type, unsigned int mode, + struct gfs_holder *i_gh) +{ + struct gfs_inode *dip = gl2ip(d_gh->gh_gl); + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_unlinked *ul; + struct gfs_inum inum; + struct gfs_holder io_gh; + int error; + + if (!name->len || name->len > GFS_FNAMESIZE) + return -ENAMETOOLONG; + + gfs_holder_reinit(LM_ST_EXCLUSIVE, 0, d_gh); + error = gfs_glock_nq(d_gh); + if (error) + return error; + + error = create_ok(dip, name, type); + if (error) + goto fail; + + error = dinode_alloc(dip, &ul); + if (error) + goto fail; + + inum.no_addr = ul->ul_inum.no_addr; + pick_formal_ino(sdp, &inum); + + if (inum.no_formal_ino < dip->i_num.no_formal_ino) { + gfs_glock_dq(d_gh); + + error = gfs_glock_nq_num(sdp, + inum.no_formal_ino, &gfs_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, i_gh); + if (error) { + gfs_unlinked_unlock(sdp, ul); + return error; + } + + gfs_holder_reinit(LM_ST_EXCLUSIVE, 0, d_gh); + error = gfs_glock_nq(d_gh); + if (error) { + gfs_glock_dq_uninit(i_gh); + gfs_unlinked_unlock(sdp, ul); + return error; + } + + error = create_ok(dip, name, type); + if (error) + goto fail_gunlock_i; + } else { + error = gfs_glock_nq_num(sdp, + inum.no_formal_ino, &gfs_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, i_gh); + if (error) + goto fail_ul; + } + + error = gfs_glock_nq_num(sdp, + inum.no_addr, &gfs_iopen_glops, + LM_ST_SHARED, GL_LOCAL_EXCL | GL_EXACT, + &io_gh); + if (error) + goto fail_gunlock_i; + + error = inode_init_and_link(dip, name, &inum, i_gh->gh_gl, type, mode); + if (error) + goto fail_gunlock_io; + + gfs_glock_dq_uninit(&io_gh); + + return 0; + + fail_gunlock_io: + gfs_glock_dq_uninit(&io_gh); + + fail_gunlock_i: + gfs_glock_dq_uninit(i_gh); + + fail_ul: + gfs_unlinked_unlock(sdp, ul); + + fail: + gfs_glock_dq(d_gh); + + return error; +} + +/** + * gfs_unlinki - Unlink a file + * @dip: The inode of the directory + * @name: The name of the file to be unlinked + * @ip: The inode of the file to be removed + * + * Assumes Glocks on both dip and ip are held. + * + * Returns: 0 on success, -EXXXX on failure + */ + +int +gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = dip->i_sbd; + int error; + + error = gfs_dir_del(dip, name); + if (error) + return error; + + error = gfs_change_nlink(ip, -1); + if (error) + return error; + + /* If this inode is being unlinked from the directory structure, + we need to mark that in the log so that it isn't lost during + a crash. */ + + if (!ip->i_di.di_nlink) { + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &ip->i_num); + set_bit(GLF_STICKY, &ip->i_gl->gl_flags); + } + + return 0; +} + +/** + * gfs_rmdiri - Remove a directory + * @dip: The parent directory of the directory to be removed + * @name: The name of the directory to be removed + * @ip: The GFS inode of the directory to be removed + * + * Assumes Glocks on dip and ip are held + * + * Returns: 0 on success, -EXXXX on failure + */ + +int +gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = dip->i_sbd; + struct qstr dotname; + int error; + + GFS_ASSERT_INODE(ip->i_di.di_entries == 2, ip, + gfs_dinode_print(&ip->i_di);); + + error = gfs_dir_del(dip, name); + if (error) + return error; + + error = gfs_change_nlink(dip, -1); + if (error) + return error; + + dotname.len = 1; + dotname.name = "."; + error = gfs_dir_del(ip, &dotname); + if (error) + return error; + + dotname.len = 2; + dotname.name = ".."; + error = gfs_dir_del(ip, &dotname); + if (error) + return error; + + error = gfs_change_nlink(ip, -2); + if (error) + return error; + + /* This inode is being unlinked from the directory structure and + we need to mark that in the log so that it isn't lost during + a crash. */ + + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &ip->i_num); + set_bit(GLF_STICKY, &ip->i_gl->gl_flags); + + return 0; +} + +/* + * gfs_revalidate - check to see that a inode is still in a directory + * @dip: the directory + * @name: the name of the file + * @ip: the inode + * + * Assumes that the lock on (at least) @dip is held. + * + * Returns: 0 if the parent/child relationship is correct, -ENOENT if it isn't + */ + +int +gfs_revalidate(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip) +{ + struct gfs_inum inum; + unsigned int type; + int error; + + error = gfs_dir_search(dip, name, &inum, &type); + if (!error) { + if (inum.no_formal_ino == ip->i_num.no_formal_ino) + GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,); + else + error = -ENOENT; + } + + return error; +} + +/* + * gfs_ok_to_move - check if it's ok to move a directory to another directory + * @this: move this + * @to: to here + * + * Follow @to back to the root and make sure we don't encounter @this + * Assumes we already hold the rename lock. + * + * Returns: 0 if it's ok to move, -EXXX if it isn't + */ + +int +gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to) +{ + struct gfs_sbd *sdp = this->i_sbd; + struct gfs_inode *tmp; + struct gfs_holder to_gh, tmp_gh; + struct qstr dotdot; + int error = 0; + + memset(&dotdot, 0, sizeof (struct qstr)); + dotdot.name = ".."; + dotdot.len = 2; + + gfs_inode_hold(to); + + for (;;) { + if (to == this) { + error = -EINVAL; + break; + } + if (to == sdp->sd_rooti) { + error = 0; + break; + } + + gfs_holder_init(to->i_gl, 0, 0, &to_gh); + + error = gfs_lookupi(&to_gh, &dotdot, TRUE, &tmp_gh); + if (error) { + gfs_holder_uninit(&to_gh); + break; + } + if (!tmp_gh.gh_gl) { + gfs_holder_uninit(&to_gh); + error = -ENOENT; + break; + } + + tmp = gl2ip(tmp_gh.gh_gl); + + gfs_glock_dq_uninit(&to_gh); + gfs_glock_dq_uninit(&tmp_gh); + + gfs_inode_put(to); + to = tmp; + } + + gfs_inode_put(to); + + return error; +} + +/** + * gfs_readlinki - return the contents of a symlink + * @ip: the symlink's inode + * @buf: a pointer to the buffer to be filled + * @len: a pointer to the length of @buf + * + * If @buf is too small, a piece of memory is gmalloc()ed and needs + * to be freed by the caller. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len) +{ + struct gfs_holder i_gh; + struct buffer_head *dibh; + unsigned int x; + int error; + + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); + error = gfs_glock_nq_atime(&i_gh); + if (error) { + gfs_holder_uninit(&i_gh); + return error; + } + + GFS_ASSERT_INODE(ip->i_di.di_size, ip,); + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto out; + + x = ip->i_di.di_size + 1; + if (x > *len) + *buf = gmalloc(x); + + memcpy(*buf, dibh->b_data + sizeof(struct gfs_dinode), x); + *len = x; + + brelse(dibh); + + out: + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_glock_nq_atime - Acquire the glock and conditionally update the atime on an inode + * @gh: the holder to acquire + * + * Tests atime for gfs_read, gfs_readdir and gfs_test_mmap + * Update if the difference between the current time and the current atime + * is greater than a interval specfied at mount. + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_glock_nq_atime(struct gfs_holder *gh) +{ + struct gfs_glock *gl = gh->gh_gl; + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_inode *ip; + int64_t curtime, quantum = sdp->sd_tune.gt_atime_quantum; + unsigned int state; + int flags; + int error; + + GFS_ASSERT_GLOCK(gh->gh_flags & GL_ATIME, gl,); + GFS_ASSERT_GLOCK(!(gh->gh_flags & GL_ASYNC), gl,); + GFS_ASSERT_GLOCK(gl->gl_ops == &gfs_inode_glops, gl,); + + ip = gl2ip(gl); + GFS_ASSERT_GLOCK(ip, gl,); + + state = gh->gh_state; + flags = gh->gh_flags; + + error = gfs_glock_nq(gh); + if (error) + return error; + + if (test_bit(SDF_NOATIME, &sdp->sd_flags) || + test_bit(SDF_ROFS, &sdp->sd_flags)) + return 0; + + curtime = get_seconds(); + if (curtime - ip->i_di.di_atime >= quantum) { + int was_exclusive = (gl->gl_state == LM_ST_EXCLUSIVE); + + gfs_glock_dq(gh); + gfs_holder_reinit(LM_ST_EXCLUSIVE, + gh->gh_flags & ~LM_FLAG_ANY, + gh); + error = gfs_glock_nq(gh); + if (error) + return error; + + /* Verify this hasn't been updated while we were + trying to get exclusive lock. */ + + curtime = get_seconds(); + if (curtime - ip->i_di.di_atime >= quantum) { + struct buffer_head *dibh; + + error = gfs_trans_begin(sdp, 1, 0); + if (error == -EROFS) + return 0; + if (error) + goto fail; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_atime = curtime; + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs_trans_end(sdp); + } + + if (!was_exclusive) { + gfs_glock_dq(gh); + flags &= ~LM_FLAG_ANY; + flags |= GL_EXACT; + gfs_holder_reinit(state, flags, gh); + error = gfs_glock_nq(gh); + return error; + } + } + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail: + gfs_glock_dq(gh); + + return error; +} + +/** + * glock_compare_atime - Compare two struct gfs_glock structures for sorting + * @arg_a: the first structure + * @arg_b: the second structure + * + */ + +static int +glock_compare_atime(const void *arg_a, const void *arg_b) +{ + struct gfs_holder *gh_a = *(struct gfs_holder **)arg_a; + struct gfs_holder *gh_b = *(struct gfs_holder **)arg_b; + struct lm_lockname *a = &gh_a->gh_gl->gl_name; + struct lm_lockname *b = &gh_b->gh_gl->gl_name; + int ret = 0; + + if (a->ln_number > b->ln_number) + ret = 1; + else if (a->ln_number < b->ln_number) + ret = -1; + else { + if (gh_a->gh_state == LM_ST_SHARED && + gh_b->gh_state == LM_ST_EXCLUSIVE) + ret = 1; + else if (gh_a->gh_state == LM_ST_SHARED && + (gh_b->gh_flags & GL_ATIME)) + ret = 1; + } + + return ret; +} + +/** + * gfs_glock_nq_m_atime - acquire multiple glocks where one may need an atime update + * @num_gh: the number of structures + * @ghs: an array of struct gfs_holder structures + * + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired) + */ + +int +gfs_glock_nq_m_atime(unsigned int num_gh, struct gfs_holder *ghs) +{ + struct gfs_holder *p[num_gh]; + unsigned int x; + int error = 0; + + GFS_ASSERT(num_gh,); + + if (num_gh == 1) { + ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + if (ghs->gh_flags & GL_ATIME) + error = gfs_glock_nq_atime(ghs); + else + error = gfs_glock_nq(ghs); + return error; + } + + for (x = 0; x < num_gh; x++) + p[x] = &ghs[x]; + + gfs_sort(p, num_gh, sizeof(struct gfs_holder *), glock_compare_atime); + + for (x = 0; x < num_gh; x++) { + p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + + if (p[x]->gh_flags & GL_ATIME) + error = gfs_glock_nq_atime(p[x]); + else + error = gfs_glock_nq(p[x]); + + if (error) { + while (x--) + gfs_glock_dq(p[x]); + break; + } + } + + return error; +} + +/** + * gfs_try_toss_vnode - See if we can toss a vnode from memory + * @ip: the inode + * + * Returns: TRUE if the vnode was tossed + */ + +void +gfs_try_toss_vnode(struct gfs_inode *ip) +{ + struct inode *inode; + + inode = gfs_iget(ip, NO_CREATE); + if (!inode) + return; + + d_prune_aliases(inode); + + if (ip->i_di.di_type == GFS_FILE_DIR) { + struct list_head *head = &inode->i_dentry; + struct dentry *d = NULL; + + spin_lock(&dcache_lock); + if (list_empty(head)) + spin_unlock(&dcache_lock); + else { + d = list_entry(head->next, struct dentry, d_alias); + dget_locked(d); + spin_unlock(&dcache_lock); + + if (have_submounts(d)) + dput(d); + else { + shrink_dcache_parent(d); + dput(d); + d_prune_aliases(inode); + } + } + } + + inode->i_nlink = 0; + iput(inode); +} + +/** + * iah_make_jdata - + * @gl: + * @inum: + * + */ + +static void +iah_make_jdata(struct gfs_glock *gl, struct gfs_inum *inum) +{ + struct buffer_head *bh; + struct gfs_dinode *di; + uint32_t flags; + int error; + + error = gfs_dread(gl->gl_sbd, inum->no_addr, gl, DIO_START | DIO_WAIT, &bh); + GFS_ASSERT_GLOCK(!error, gl,); /* Already pinned */ + + di = (struct gfs_dinode *)bh->b_data; + + flags = di->di_flags; + flags = gfs32_to_cpu(flags) | GFS_DIF_JDATA; + di->di_flags = cpu_to_gfs32(flags); + + brelse(bh); +} + +/** + * iah_super_update - + * @sdp: + * + * Returns: errno + */ + +static int +iah_super_update(struct gfs_sbd *sdp) +{ + struct gfs_glock *gl; + struct buffer_head *bh; + int error; + + error = gfs_glock_get(sdp, + GFS_SB_LOCK, &gfs_meta_glops, + NO_CREATE, &gl); + GFS_ASSERT_SBD(!error && gl, sdp,); /* This should already be held. */ + + error = gfs_dread(sdp, + GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, gl, + DIO_START | DIO_WAIT, &bh); + if (!error) { + gfs_trans_add_bh(gl, bh); + gfs_sb_out(&sdp->sd_sb, bh->b_data); + brelse(bh); + } + + gfs_glock_put(gl); + + return error; +} + +/** + * inode_alloc_hidden - + * @sdp: + * @inum: + * + * Returns: errno + */ + +static int +inode_alloc_hidden(struct gfs_sbd *sdp, struct gfs_inum *inum) +{ + struct gfs_inode *dip = sdp->sd_rooti; + struct gfs_holder d_gh, i_gh; + struct gfs_unlinked *ul; + int error; + + error = gfs_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh); + if (error) + return error; + + error = dinode_alloc(dip, &ul); + if (error) + goto fail; + + inum->no_addr = ul->ul_inum.no_addr; + pick_formal_ino(sdp, inum); + + /* Don't worry about deadlock ordering here. We're the first + mounter and still under the mount lock (i.e. there is no + contention). */ + + error = gfs_glock_nq_num(sdp, + inum->no_formal_ino, &gfs_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); + if (error) + goto fail_ul; + + gfs_alloc_get(dip); + + error = gfs_quota_hold_m(dip, 0, 0); + if (error) + goto fail_al; + + /* Trans may require: + The new inode, the superblock, + and one block for a quota change and + one block for an unlinked tag. */ + + error = gfs_trans_begin(sdp, 2, 2); + if (error) + goto fail_unhold; + + error = make_dinode(dip, i_gh.gh_gl, inum, GFS_FILE_REG, 0600, 0, 0); + if (error) + goto fail_end_trans; + + iah_make_jdata(i_gh.gh_gl, inum); + + error = iah_super_update(sdp); + if (error) + goto fail_end_trans; + + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, + &(struct gfs_inum){0, inum->no_addr}); + gfs_trans_add_quota(sdp, +1, 0, 0); + gfs_trans_add_gl(dip->i_gl); + + gfs_trans_end(sdp); + gfs_quota_unhold_m(dip); + gfs_alloc_put(dip); + + gfs_glock_dq_uninit(&i_gh); + gfs_glock_dq_uninit(&d_gh); + + gfs_unlinked_unlock(sdp, ul); + + gfs_log_flush(sdp); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_unhold: + gfs_quota_unhold_m(dip); + + fail_al: + gfs_alloc_put(dip); + gfs_glock_dq_uninit(&i_gh); + + fail_ul: + gfs_unlinked_unlock(sdp, ul); + + fail: + gfs_glock_dq_uninit(&d_gh); + + return error; +} + +/** + * gfs_alloc_qinode - allocate a quota inode + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_alloc_qinode(struct gfs_sbd *sdp) +{ + return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_quota_di); +} + +/** + * gfs_alloc_linode - allocate a license inode + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_alloc_linode(struct gfs_sbd *sdp) +{ + return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_license_di); +} diff -urN linux-orig/fs/gfs/inode.h linux-patched/fs/gfs/inode.h --- linux-orig/fs/gfs/inode.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/inode.h 2004-06-30 13:27:49.344710898 -0500 @@ -0,0 +1,68 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __INODE_DOT_H__ +#define __INODE_DOT_H__ + +void gfs_inode_attr_in(struct gfs_inode *ip); +void gfs_inode_attr_out(struct gfs_inode *ip); +struct inode *gfs_iget(struct gfs_inode *ip, int create); + +int gfs_copyin_dinode(struct gfs_inode *ip); + +int gfs_inode_get(struct gfs_glock *i_gl, struct gfs_inum *inum, int create, + struct gfs_inode **ipp); +void gfs_inode_hold(struct gfs_inode *ip); +void gfs_inode_put(struct gfs_inode *ip); +void gfs_inode_destroy(struct gfs_inode *ip); + +int gfs_inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum); + +int gfs_change_nlink(struct gfs_inode *ip, int diff); +int gfs_lookupi(struct gfs_holder *d_gh, struct qstr *name, + int is_root, struct gfs_holder *i_gh); +int gfs_createi(struct gfs_holder *d_gh, struct qstr *name, + unsigned int type, unsigned int mode, + struct gfs_holder *i_gh); +int gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip); +int gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip); +int gfs_revalidate(struct gfs_inode *dip, struct qstr *name, + struct gfs_inode *ip); +int gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to); +int gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len); + +int gfs_glock_nq_atime(struct gfs_holder *gh); +int gfs_glock_nq_m_atime(unsigned int num_gh, struct gfs_holder *ghs); + +void gfs_try_toss_vnode(struct gfs_inode *ip); + +/* Backwards compatibility functions */ + +int gfs_alloc_qinode(struct gfs_sbd *sdp); +int gfs_alloc_linode(struct gfs_sbd *sdp); + +/* Inlines */ + +static __inline__ int +gfs_is_stuffed(struct gfs_inode *ip) +{ + return !ip->i_di.di_height; +} + +static __inline__ int +gfs_is_jdata(struct gfs_inode *ip) +{ + return ip->i_di.di_flags & GFS_DIF_JDATA; +} + +#endif /* __INODE_DOT_H__ */ diff -urN linux-orig/fs/gfs/ioctl.c linux-patched/fs/gfs/ioctl.c --- linux-orig/fs/gfs/ioctl.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ioctl.c 2004-06-30 13:27:49.345710666 -0500 @@ -0,0 +1,983 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "dio.h" +#include "dir.h" +#include "eattr.h" +#include "file.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ioctl.h" +#include "quota.h" +#include "rgrp.h" +#include "super.h" +#include "trans.h" + +/** + * gfs_add_bh_to_ub - copy a buffer up to user space + * @ub: the structure representing where to copy + * @bh: the buffer + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_add_bh_to_ub(struct gfs_user_buffer *ub, struct buffer_head *bh) +{ + uint64_t blkno = bh->b_blocknr; + + if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size) + return -ENOMEM; + + if (copy_to_user(ub->ub_data + ub->ub_count, + &blkno, + sizeof(uint64_t))) + return -EFAULT; + ub->ub_count += sizeof(uint64_t); + + if (copy_to_user(ub->ub_data + ub->ub_count, + bh->b_data, + bh->b_size)) + return -EFAULT; + ub->ub_count += bh->b_size; + + return 0; +} + +/** + * get_meta - Read out all the metadata for a file + * @ip: the file + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +get_meta(struct gfs_inode *ip, void *arg) +{ + struct gfs_holder i_gh; + struct gfs_user_buffer ub; + int error; + + if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer))) + return -EFAULT; + ub.ub_count = 0; + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + + error = gfs_get_file_meta(ip, &ub); + if (error) + goto out; + + if (ip->i_di.di_type == GFS_FILE_DIR && + (ip->i_di.di_flags & GFS_DIF_EXHASH)) { + error = gfs_get_dir_meta(ip, &ub); + if (error) + goto out; + } + + if (ip->i_di.di_eattr) { + error = gfs_get_eattr_meta(ip, &ub); + if (error) + goto out; + } + + if (copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer))) + error = -EFAULT; + + out: + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * file_stat - return the struct gfs_dinode of a file to user space + * @ip: the inode + * @arg: where to copy to + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +file_stat(struct gfs_inode *ip, void *arg) +{ + struct gfs_holder i_gh; + struct gfs_dinode di; + int error; + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + + memcpy(&di, &ip->i_di, sizeof(struct gfs_dinode)); + + gfs_glock_dq_uninit(&i_gh); + + if (copy_to_user(arg, &di, sizeof(struct gfs_dinode))) + return -EFAULT; + + return 0; +} + +/** + * do_get_super - Dump the superblock into a buffer + * @sb: The superblock + * @ptr: The buffer pointer + * + * Returns: 0 or error code + */ + +static int +do_get_super(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_sb *sb; + struct gfs_holder sb_gh; + struct buffer_head *bh; + int error; + + sb = gmalloc(sizeof(struct gfs_sb)); + + error = gfs_glock_nq_num(sdp, + GFS_SB_LOCK, &gfs_meta_glops, + LM_ST_SHARED, 0, &sb_gh); + if (error) + goto out; + + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, sb_gh.gh_gl, + DIO_START | DIO_WAIT, &bh); + if (error) { + gfs_glock_dq_uninit(&sb_gh); + goto out; + } + + gfs_sb_in(sb, bh->b_data); + brelse(bh); + + gfs_glock_dq_uninit(&sb_gh); + + if (copy_to_user(arg, sb, sizeof(struct gfs_sb))) + error = -EFAULT; + + out: + kfree(sb); + + return error; +} + +/** + * jt2ip - convert the file type in a jio struct to the right hidden ip + * @sdp: the filesystem + * @jt: the gfs_jio_structure + * + * Returns: The inode structure for the correct hidden file + */ + +static struct gfs_inode * +jt2ip(struct gfs_sbd *sdp, struct gfs_jio *jt) +{ + struct gfs_inode *ip = NULL; + + switch (jt->jio_file) { + case GFS_HIDDEN_JINDEX: + ip = sdp->sd_jiinode; + break; + + case GFS_HIDDEN_RINDEX: + ip = sdp->sd_riinode; + break; + + case GFS_HIDDEN_QUOTA: + ip = sdp->sd_qinode; + break; + + case GFS_HIDDEN_LICENSE: + ip = sdp->sd_linode; + break; + } + + return ip; +} + +/** + * jread_ioctl - Read from a journaled data file via ioctl + * @sdp: the filesystem + * @arg: The argument from ioctl + * + * Returns: Amount of data copied or error + */ + +static int +jread_ioctl(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_jio jt; + struct gfs_inode *ip; + struct gfs_holder i_gh; + int error; + + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio))) + return -EFAULT; + + ip = jt2ip(sdp, &jt); + if (!ip) + return -EINVAL; + + GFS_ASSERT_INODE(gfs_is_jdata(ip), ip,); + + if (!access_ok(VERIFY_WRITE, jt.jio_data, jt.jio_size)) + return -EFAULT; + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + if (error) + return error; + + error = gfs_readi(ip, jt.jio_data, jt.jio_offset, jt.jio_size, + gfs_copy2user); + + gfs_glock_dq_uninit(&i_gh); + + if (error < 0) + return error; + jt.jio_count = error; + + if (copy_to_user(arg, &jt, sizeof(struct gfs_jio))) + return -EFAULT; + + return 0; +} + +/** + * jwrite_ioctl - Write to a journaled file via ioctl + * @sdp: the filesystem + * @arg: The argument from ioctl + * + * Returns: Amount of data copied or error + */ + +static int +jwrite_ioctl(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_jio jt; + struct gfs_inode *ip; + struct gfs_alloc *al = NULL; + struct gfs_holder i_gh; + unsigned int data_blocks, ind_blocks; + int alloc_required; + int error; + + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio))) + return -EFAULT; + + ip = jt2ip(sdp, &jt); + if (!ip) + return -EINVAL; + + GFS_ASSERT_INODE(gfs_is_jdata(ip), ip,); + + if (!access_ok(VERIFY_READ, jt.jio_data, jt.jio_size)) + return -EFAULT; + + gfs_write_calc_reserv(ip, jt.jio_size, &data_blocks, &ind_blocks); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, + LM_FLAG_PRIORITY | GL_SYNC, &i_gh); + if (error) + return error; + + error = gfs_write_alloc_required(ip, jt.jio_offset, jt.jio_size, + &alloc_required); + if (error) + goto out; + + if (alloc_required) { + al = gfs_alloc_get(ip); + + error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, + NO_QUOTA_CHANGE); + if (error) + goto out_alloc; + + al->al_requested_meta = ind_blocks + data_blocks; + + error = gfs_inplace_reserve(ip); + if (error) + goto out_qs; + + /* Trans may require: + All blocks for a RG bitmap, all the "data" blocks, whatever + indirect blocks we need, a modified dinode, and a quota change */ + + error = gfs_trans_begin(sdp, + 1 + al->al_rgd->rd_ri.ri_length + + ind_blocks + data_blocks, 1); + if (error) + goto out_relse; + } else { + /* Trans may require: + All the "data" blocks and a modified dinode. */ + + error = gfs_trans_begin(sdp, 1 + data_blocks, 0); + if (error) + goto out_relse; + } + + error = gfs_writei(ip, jt.jio_data, jt.jio_offset, jt.jio_size, + gfs_copy_from_user); + if (error >= 0) { + jt.jio_count = error; + error = 0; + } + + gfs_trans_end(sdp); + + out_relse: + if (alloc_required) { + GFS_ASSERT_INODE(error || al->al_alloced_meta, ip,); + gfs_inplace_release(ip); + } + + out_qs: + if (alloc_required) + gfs_quota_unhold_m(ip); + + out_alloc: + if (alloc_required) + gfs_alloc_put(ip); + + out: + ip->i_gl->gl_vn++; + gfs_glock_dq_uninit(&i_gh); + + if (!error && copy_to_user(arg, &jt, sizeof(struct gfs_jio))) + return -EFAULT; + + return error; +} + +/** + * jstat_ioctl - Stat to a journaled file via ioctl + * @sdp: the filesystem + * @arg: The argument from ioctl + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +jstat_ioctl(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_jio jt; + struct gfs_inode *ip; + struct gfs_holder i_gh; + int error; + + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio))) + return -EFAULT; + + ip = jt2ip(sdp, &jt); + if (!ip) + return -EINVAL; + + if (jt.jio_size < sizeof(struct gfs_dinode)) + return -EINVAL; + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + + error = copy_to_user(jt.jio_data, &ip->i_di, sizeof(struct gfs_dinode)); + + gfs_glock_dq_uninit(&i_gh); + + if (error) + return -EFAULT; + + return 0; +} + +/** + * jtrunc_ioctl - Truncate to a journaled file via ioctl + * @sdp: the filesystem + * @arg: The argument from ioctl + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +jtrunc_ioctl(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_jio jt; + struct gfs_inode *ip; + struct gfs_holder i_gh; + int error; + + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio))) + return -EFAULT; + + ip = jt2ip(sdp, &jt); + if (!ip) + return -EINVAL; + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SYNC, &i_gh); + if (error) + return error; + + error = gfs_truncatei(ip, jt.jio_offset, NULL); + + ip->i_gl->gl_vn++; + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * lock_dump - copy out info about the GFS' lock space + * @sdp: the filesystem + * @arg: a pointer to a struct gfs_user_buffer in user space + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +lock_dump(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_user_buffer ub; + int error; + + if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer))) + return -EFAULT; + ub.ub_count = 0; + + error = gfs_dump_lockstate(sdp, &ub); + if (error) + return error; + + if (copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer))) + return -EFAULT; + + return 0; +} + +/** + * stat_gfs_ioctl - Do a GFS specific statfs + * @sdp: the filesystem + * @arg: the struct gfs_usage structure + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +stat_gfs_ioctl(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_usage *u; + int error; + + u = gmalloc(sizeof(struct gfs_usage)); + + error = gfs_stat_gfs(sdp, u, TRUE); + if (!error && copy_to_user(arg, u, sizeof(struct gfs_usage))) + return -EFAULT; + + kfree(u); + + return error; +} + +/** + * reclaim_ioctl - ioctl called to perform metadata reclaimation + * @sdp: the filesystem + * @arg: a pointer to a struct gfs_reclaim_stats in user space + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +reclaim_ioctl(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_reclaim_stats stats; + int error; + + memset(&stats, 0, sizeof(struct gfs_reclaim_stats)); + + error = gfs_reclaim_metadata(sdp, &stats); + if (error) + return error; + + if (copy_to_user(arg, &stats, sizeof(struct gfs_reclaim_stats))) + return -EFAULT; + + return 0; +} + +/** + * get_tune - pass the current tuneable parameters up to user space + * @sdp: the filesystem + * @arg: a pointer to a struct gfs_tune in user space + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +get_tune(struct gfs_sbd *sdp, void *arg) +{ + if (copy_to_user(arg, &sdp->sd_tune, sizeof(struct gfs_tune))) + return -EFAULT; + + return 0; +} + +/** + * set_tune - replace the current tuneable parameters with a set from user space + * @sdp: the filesystem + * @arg: a pointer to a struct gfs_tune in user space + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +set_tune(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_tune *gt; + int error = 0; + + gt = gmalloc(sizeof(struct gfs_tune)); + + if (copy_from_user(gt, arg, sizeof(struct gfs_tune))) + error = -EFAULT; + else { + if (gt->gt_tune_version != GFS_TUNE_VERSION) { + printk("GFS: fsid=%s: invalid version of tuneable parameters\n", + sdp->sd_fsname); + error = -EINVAL; + } else + memcpy(&sdp->sd_tune, gt, sizeof(struct gfs_tune)); + } + + kfree(gt); + + return error; +} + +/** + * gfs_set_flag - set/clear a flag on an inode + * @ip: the inode + * @cmd: GFS_SET_FLAG or GFS_CLEAR_FLAG + * @arg: the flag to change (in user space) + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_set_flag(struct gfs_inode *ip, unsigned int cmd, void *arg) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_holder i_gh; + struct buffer_head *dibh; + uint32_t flag; + int error; + + if (copy_from_user(&flag, arg, sizeof(uint32_t))) + return -EFAULT; + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + error = -EACCES; + if (ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER)) + goto out; + + error = -EINVAL; + + switch (flag) { + case GFS_DIF_EXHASH: + case GFS_DIF_UNUSED: + case GFS_DIF_EA_INDIRECT: + goto out; + + case GFS_DIF_JDATA: + if (ip->i_di.di_type != GFS_FILE_REG || ip->i_di.di_size) + goto out; + break; + + case GFS_DIF_DIRECTIO: + if (ip->i_di.di_type != GFS_FILE_REG) + goto out; + break; + + case GFS_DIF_IMMUTABLE: + case GFS_DIF_APPENDONLY: + case GFS_DIF_NOATIME: + case GFS_DIF_SYNC: + /* FixMe!!! */ + error = -ENOSYS; + goto out; + + case GFS_DIF_INHERIT_DIRECTIO: + case GFS_DIF_INHERIT_JDATA: + if (ip->i_di.di_type != GFS_FILE_DIR) + goto out; + break; + + default: + goto out; + } + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + goto out; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto out_trans_end; + + if (cmd == GFS_SET_FLAG) + ip->i_di.di_flags |= flag; + else + ip->i_di.di_flags &= ~flag; + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + + brelse(dibh); + + out_trans_end: + gfs_trans_end(sdp); + + out: + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * handle_roll - Read a atomic_t as an unsigned int + * @a: a counter + * + * if @a is negative, reset it to zero + * + * Returns: the value of the counter + */ + +static unsigned int +handle_roll(atomic_t *a) +{ + int x = atomic_read(a); + if (x < 0) { + atomic_set(a, 0); + return 0; + } + return (unsigned int)x; +} + +/** + * fill_counters - Write a FS' counters into a buffer + * @sdp: the filesystem + * @buf: the buffer + * @size: the size of the buffer + * @count: where we are in the buffer + * + * Returns: errno + */ + +static int +fill_counters(struct gfs_sbd *sdp, + char *buf, unsigned int size, unsigned int *count) +{ + int error = 0; + + gfs_sprintf("sd_glock_count:locks::%d\n", + atomic_read(&sdp->sd_glock_count)); + gfs_sprintf("sd_glock_held_count:locks held::%d\n", + atomic_read(&sdp->sd_glock_held_count)); + gfs_sprintf("sd_inode_count:incore inodes::%d\n", + atomic_read(&sdp->sd_inode_count)); + gfs_sprintf("sd_bufdata_count:metadata buffers::%d\n", + atomic_read(&sdp->sd_bufdata_count)); + gfs_sprintf("sd_unlinked_ic_count:unlinked inodes::%d\n", + atomic_read(&sdp->sd_unlinked_ic_count)); + gfs_sprintf("sd_quota_count:quota IDs::%d\n", + atomic_read(&sdp->sd_quota_count)); + gfs_sprintf("sd_log_buffers:incore log buffers::%u\n", + sdp->sd_log_buffers); + gfs_sprintf("sd_log_seg_free:log segments free::%u\n", + sdp->sd_log_seg_free); + gfs_sprintf("ji_nsegment:log segments total::%u\n", + sdp->sd_jdesc.ji_nsegment); + gfs_sprintf("sd_mhc_count:meta header cache entries::%d\n", + atomic_read(&sdp->sd_mhc_count)); + gfs_sprintf("sd_depend_count:glock dependencies::%d\n", + atomic_read(&sdp->sd_depend_count)); + gfs_sprintf("sd_reclaim_count:glocks on reclaim list::%d\n", + atomic_read(&sdp->sd_reclaim_count)); + gfs_sprintf("sd_log_wrap:log wraps::%"PRIu64"\n", + sdp->sd_log_wrap); + gfs_sprintf("sd_fh2dentry_misses:fh2dentry misses:diff:%u\n", + handle_roll(&sdp->sd_fh2dentry_misses)); + gfs_sprintf("sd_reclaimed:glocks reclaimed:diff:%u\n", + handle_roll(&sdp->sd_reclaimed)); + gfs_sprintf("sd_glock_nq_calls:glock nq calls:diff:%u\n", + handle_roll(&sdp->sd_glock_nq_calls)); + gfs_sprintf("sd_glock_dq_calls:glock dq calls:diff:%u\n", + handle_roll(&sdp->sd_glock_dq_calls)); + gfs_sprintf("sd_glock_prefetch_calls:glock prefetch calls:diff:%u\n", + handle_roll(&sdp->sd_glock_prefetch_calls)); + gfs_sprintf("sd_lm_lock_calls:lm_lock calls:diff:%u\n", + handle_roll(&sdp->sd_lm_lock_calls)); + gfs_sprintf("sd_lm_unlock_calls:lm_unlock calls:diff:%u\n", + handle_roll(&sdp->sd_lm_unlock_calls)); + gfs_sprintf("sd_lm_callbacks:lm callbacks:diff:%u\n", + handle_roll(&sdp->sd_lm_callbacks)); + gfs_sprintf("sd_ops_address:address operations:diff:%u\n", + handle_roll(&sdp->sd_ops_address)); + gfs_sprintf("sd_ops_dentry:dentry operations:diff:%u\n", + handle_roll(&sdp->sd_ops_dentry)); + gfs_sprintf("sd_ops_export:export operations:diff:%u\n", + handle_roll(&sdp->sd_ops_export)); + gfs_sprintf("sd_ops_file:file operations:diff:%u\n", + handle_roll(&sdp->sd_ops_file)); + gfs_sprintf("sd_ops_inode:inode operations:diff:%u\n", + handle_roll(&sdp->sd_ops_inode)); + gfs_sprintf("sd_ops_super:super operations:diff:%u\n", + handle_roll(&sdp->sd_ops_super)); + gfs_sprintf("sd_ops_vm:vm operations:diff:%u\n", + handle_roll(&sdp->sd_ops_vm)); + + out: + return error; +} + +/** + * get_counters - return usage counters to user space + * @sdp: the filesystem + * @arg: the counter structure to fill + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +get_counters(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_user_buffer ub; + unsigned int size = sdp->sd_tune.gt_lockdump_size; + char *buf; + int error; + + if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer))) + return -EFAULT; + ub.ub_count = 0; + + if (size > ub.ub_size) + size = ub.ub_size; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + error = fill_counters(sdp, buf, size, &ub.ub_count); + if (!error) { + if (copy_to_user(ub.ub_data, buf, ub.ub_count) || + copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer))) + error = -EFAULT; + } + + kfree(buf); + + return error; +} + +/** + * gfs_ioctli - filesystem independent ioctl function + * @ip: the inode the ioctl was on + * @cmd: the ioctl number + * @arg: the argument (still in user space) + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg) +{ + struct gfs_sbd *sdp = ip->i_sbd; + int error = 0; + + switch (cmd) { + case GFS_GET_META: + error = get_meta(ip, arg); + break; + + case GFS_FILE_STAT: + error = file_stat(ip, arg); + break; + + case GFS_SHRINK: + if (capable(CAP_SYS_ADMIN)) + gfs_gl_hash_clear(sdp, FALSE); + else + error = -EACCES; + break; + + case GFS_GET_ARGS: + if (copy_to_user(arg, &sdp->sd_args, + sizeof(struct gfs_args))) + error = -EFAULT; + break; + + case GFS_GET_LOCKSTRUCT: + if (copy_to_user(arg, &sdp->sd_lockstruct, + sizeof(struct lm_lockstruct))) + error = -EFAULT; + break; + + case GFS_GET_SUPER: + error = do_get_super(sdp, arg); + break; + + case GFS_JREAD: + if (capable(CAP_SYS_ADMIN)) + error = jread_ioctl(sdp, arg); + else + error = -EACCES; + break; + + case GFS_JWRITE: + if (capable(CAP_SYS_ADMIN)) + error = jwrite_ioctl(sdp, arg); + else + error = -EACCES; + break; + + case GFS_JSTAT: + error = jstat_ioctl(sdp, arg); + break; + + case GFS_JTRUNC: + if (capable(CAP_SYS_ADMIN)) + error = jtrunc_ioctl(sdp, arg); + else + error = -EACCES; + break; + + case GFS_LOCK_DUMP: + if (capable(CAP_SYS_ADMIN)) + error = lock_dump(sdp, arg); + else + error = -EACCES; + break; + + case GFS_STATGFS: + error = stat_gfs_ioctl(sdp, arg); + break; + + case GFS_FREEZE: + if (capable(CAP_SYS_ADMIN)) + error = gfs_freeze_fs(sdp); + else + error = -EACCES; + break; + + case GFS_UNFREEZE: + if (capable(CAP_SYS_ADMIN)) + gfs_unfreeze_fs(sdp); + else + error = -EACCES; + break; + + case GFS_RECLAIM_METADATA: + if (capable(CAP_SYS_ADMIN)) + error = reclaim_ioctl(sdp, arg); + else + error = -EACCES; + break; + + case GFS_QUOTA_SYNC: + if (capable(CAP_SYS_ADMIN)) + error = gfs_quota_sync(sdp); + else + error = -EACCES; + break; + + case GFS_QUOTA_REFRESH: + if (capable(CAP_SYS_ADMIN)) + error = gfs_quota_refresh(sdp, arg); + else + error = -EACCES; + break; + + case GFS_QUOTA_READ: + /* Permissions handled later */ + error = gfs_quota_read(sdp, arg); + break; + + case GFS_GET_TUNE: + error = get_tune(sdp, arg); + break; + + case GFS_SET_TUNE: + if (capable(CAP_SYS_ADMIN)) + error = set_tune(sdp, arg); + else + error = -EACCES; + break; + + case GFS_EATTR_GET: + /* Permissions handled later */ + error = gfs_get_eattr_ioctl(sdp, ip, arg); + break; + + case GFS_EATTR_SET: + /* Permissions handled later */ + error = gfs_set_eattr_ioctl(sdp, ip, arg); + break; + + case GFS_WHERE_ARE_YOU: + { + unsigned int x = GFS_MAGIC; + if (copy_to_user(arg, &x, sizeof(unsigned int))) + error = -EFAULT; + } + break; + + case GFS_SET_FLAG: + case GFS_CLEAR_FLAG: + /* Permissions handled later */ + error = gfs_set_flag(ip, cmd, arg); + break; + + case GFS_GET_COUNTERS: + error = get_counters(sdp, arg); + break; + + case GFS_FILE_FLUSH: + gfs_glock_force_drop(ip->i_gl); + break; + + default: + error = -ENOTTY; + break; + } + + return error; +} diff -urN linux-orig/fs/gfs/ioctl.h linux-patched/fs/gfs/ioctl.h --- linux-orig/fs/gfs/ioctl.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ioctl.h 2004-06-30 13:27:49.345710666 -0500 @@ -0,0 +1,21 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __IOCTL_DOT_H__ +#define __IOCTL_DOT_H__ + +int gfs_add_bh_to_ub(struct gfs_user_buffer *ub, struct buffer_head *bh); + +int gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg); + +#endif /* __IOCTL_DOT_H__ */ diff -urN linux-orig/fs/gfs/locking.c linux-patched/fs/gfs/locking.c --- linux-orig/fs/gfs/locking.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/locking.c 2004-06-30 13:27:49.345710666 -0500 @@ -0,0 +1,114 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "locking.h" +#include "super.h" + +/** + * gfs_mount_lockproto - mount a locking protocol + * @sdp: the filesystem + * @args: mount arguements + * @silent: if TRUE, don't complain if the FS isn't a GFS fs + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_mount_lockproto(struct gfs_sbd *sdp, int silent) +{ + struct gfs_sb *sb = NULL; + struct buffer_head *bh; + char *proto, *table, *p = NULL; + int error = 0; + + proto = sdp->sd_args.ar_lockproto; + table = sdp->sd_args.ar_locktable; + + /* Try to autodetect */ + + if (!proto[0] || !table[0]) { + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, NULL, + DIO_FORCE | DIO_START | DIO_WAIT, &bh); + if (error) + goto out; + + sb = gmalloc(sizeof(struct gfs_sb)); + gfs_sb_in(sb, bh->b_data); + brelse(bh); + + error = gfs_check_sb(sdp, sb, silent); + if (error) + goto out; + + if (!proto[0]) + proto = sb->sb_lockproto; + + if (!table[0]) + table = sb->sb_locktable; + } + + error = lm_mount(proto, table, sdp->sd_args.ar_hostdata, + gfs_glock_cb, sdp, + GFS_MIN_LVB_SIZE, &sdp->sd_lockstruct); + if (error) { + printk("GFS: can't mount proto = %s, table = %s, hostdata = %s\n", + proto, table, sdp->sd_args.ar_hostdata); + goto out; + } + + GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lockspace, sdp,); + GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_ops, sdp,); + GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lvb_size >= GFS_MIN_LVB_SIZE, + sdp,); + + if (!*table) { + table = p = gmalloc(sizeof(sdp->sd_vfs->s_id) + 1); + strncpy(table, sdp->sd_vfs->s_id, sizeof(sdp->sd_vfs->s_id)); + table[sizeof(sdp->sd_vfs->s_id)] = 0; + } + + snprintf(sdp->sd_fsname, 256, "%s.%u", table, + sdp->sd_lockstruct.ls_jid); + + if (p) + kfree(p); + + out: + if (sb) + kfree(sb); + + return error; +} + +/** + * gfs_unmount_lockproto - Unmount lock protocol + * @sdp: The GFS superblock + * + */ + +void +gfs_unmount_lockproto(struct gfs_sbd *sdp) +{ + lm_unmount(&sdp->sd_lockstruct); +} diff -urN linux-orig/fs/gfs/locking.h linux-patched/fs/gfs/locking.h --- linux-orig/fs/gfs/locking.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/locking.h 2004-06-30 13:27:49.345710666 -0500 @@ -0,0 +1,20 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __LOCKING_DOT_H__ +#define __LOCKING_DOT_H__ + +int gfs_mount_lockproto(struct gfs_sbd *sdp, int silent); +void gfs_unmount_lockproto(struct gfs_sbd *sdp); + +#endif /* __LOCKING_DOT_H__ */ diff -urN linux-orig/fs/gfs/log.c linux-patched/fs/gfs/log.c --- linux-orig/fs/gfs/log.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/log.c 2004-06-30 13:27:49.346710434 -0500 @@ -0,0 +1,1315 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + What rolls down stairs + Alone or in pairs + Rolls over your neighbor's dog. + What's great for a snack + And fits on your back + It's log, log, log! + It's lo-og, lo-og, + It's big, it's heavy, it's wood. + It's lo-og, lo-og, + It's better than bad, it's good. + Everyone wants a log, + You're gonna love it, log + Come on and get your log, + Everyone needs a log... + LOG... FROM BLAMMO! + + -- The Ren and Stimpy Show +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "log.h" +#include "lops.h" + +/** + * gfs_struct2blk - compute stuff + * @sdp: the filesystem + * @nstruct: the number of structures + * @ssize: the size of the structures + * + * Compute the number of log descriptor blocks needed to hold a certain number + * of structures of a certain size. + * + * Returns: the number of blocks needed + */ + +unsigned int +gfs_struct2blk(struct gfs_sbd *sdp, unsigned int nstruct, unsigned int ssize) +{ + unsigned int blks; + unsigned int first, second; + + blks = 1; + first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_log_descriptor)) / ssize; + + if (nstruct > first) { + second = sdp->sd_sb.sb_bsize / ssize; + blks += DIV_RU(nstruct - first, second); + } + + return blks; +} + +/** + * gfs_blk2seg - Convert number of blocks into number of segments + * @sdp: The GFS superblock + * @blocks: The number of blocks + * + * Returns: The number of journal segments + */ + +unsigned int +gfs_blk2seg(struct gfs_sbd *sdp, unsigned int blocks) +{ + return DIV_RU(blocks, sdp->sd_sb.sb_seg_size - 1); +} + +/** + * log_distance - Compute distance between two journal blocks + * @sdp: The GFS superblock + * @newer: The most recent journal block of the pair + * @older: The older journal block of the pair + * + * Compute the distance (in the journal direction) between two + * blocks in the journal + * + * Returns: the distance in blocks + */ + +static __inline__ unsigned int +log_distance(struct gfs_sbd *sdp, uint64_t newer, uint64_t older) +{ + int64_t dist; + + dist = newer - older; + if (dist < 0) + dist += sdp->sd_jdesc.ji_nsegment * sdp->sd_sb.sb_seg_size; + + return dist; +} + +/** + * log_incr_head - Increment journal head + * @sdp: The GFS superblock + * @head: the variable holding the head of the journal + * + * Increment journal head by one. + * At the end of the journal, wrap head back to the start. + * + */ + +static __inline__ void +log_incr_head(struct gfs_sbd *sdp, uint64_t * head) +{ + struct gfs_jindex *jdesc = &sdp->sd_jdesc; + + if (++*head == + jdesc->ji_addr + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size) + *head = jdesc->ji_addr; +} + +/** + * gfs_ail_start - Start I/O on the AIL + * @sdp: the filesystem + * @flags: + * + */ + +void +gfs_ail_start(struct gfs_sbd *sdp, int flags) +{ + struct list_head *head = &sdp->sd_log_ail; + struct list_head *first, *tmp; + struct gfs_trans *first_tr, *tr; + + gfs_log_lock(sdp); + + if (list_empty(head)) { + gfs_log_unlock(sdp); + return; + } + + first = head->prev; + first_tr = list_entry(first, struct gfs_trans, tr_list); + gfs_ail_start_trans(sdp, first_tr); + + if (flags & DIO_ALL) + first_tr = NULL; + + for (tmp = first->prev; tmp != head; tmp = tmp->prev) { + if (first_tr && gfs_ail_empty_trans(sdp, first_tr)) + break; + + tr = list_entry(tmp, struct gfs_trans, tr_list); + gfs_ail_start_trans(sdp, tr); + } + + gfs_log_unlock(sdp); +} + +/** + * current_tail - Find block number of current log tail + * @sdp: The GFS superblock + * + * Find the block number of the current tail of the log. + * Assumes that the log lock is held. + * + * Returns: The tail's block number + */ + +static uint64_t +current_tail(struct gfs_sbd *sdp) +{ + struct gfs_trans *tr; + uint64_t tail; + + if (list_empty(&sdp->sd_log_ail)) { + tail = sdp->sd_log_head; + + if (!gfs_log_is_header(sdp, tail)) { + tail--; + GFS_ASSERT_SBD(gfs_log_is_header(sdp, tail), sdp,); + } + } else { + tr = list_entry(sdp->sd_log_ail.prev, + struct gfs_trans, tr_list); + tail = tr->tr_first_head; + } + + return tail; +} + +/** + * gfs_ail_empty - move the tail of the log forward (if possible) + * @sdp: the filesystem + * + * Returns: TRUE if the AIL is empty + */ + +int +gfs_ail_empty(struct gfs_sbd *sdp) +{ + struct list_head *head, *tmp, *prev; + struct gfs_trans *tr; + uint64_t oldtail, newtail; + unsigned int dist; + unsigned int segments; + int ret; + + gfs_log_lock(sdp); + + oldtail = current_tail(sdp); + + for (head = &sdp->sd_log_ail, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + tr = list_entry(tmp, struct gfs_trans, tr_list); + + if (gfs_ail_empty_trans(sdp, tr)) { + list_del(&tr->tr_list); + kfree(tr); + } + } + + newtail = current_tail(sdp); + + if (oldtail != newtail) { + dist = log_distance(sdp, newtail, oldtail); + + segments = dist / sdp->sd_sb.sb_seg_size; + GFS_ASSERT_SBD(segments * sdp->sd_sb.sb_seg_size == dist, sdp,); + + spin_lock(&sdp->sd_log_seg_lock); + sdp->sd_log_seg_free += segments; + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, + sdp,); + spin_unlock(&sdp->sd_log_seg_lock); + } + + ret = list_empty(head); + + gfs_log_unlock(sdp); + + return ret; +} + +/** + * gfs_log_reserve - Make a log reservation + * @sdp: The GFS superblock + * @segments: The number of segments to reserve + * @jump_queue: if TRUE, don't care about fairness ordering + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_log_reserve(struct gfs_sbd *sdp, unsigned int segments, int jump_queue) +{ + unsigned long start; + struct list_head list; + unsigned int try = 0; + + GFS_ASSERT_SBD(segments, sdp,); + + if (segments >= sdp->sd_jdesc.ji_nsegment) { + printk("GFS: fsid=%s: error reserving log space (%u, %u)\n", + sdp->sd_fsname, segments, sdp->sd_jdesc.ji_nsegment); + return -EINVAL; + } + + INIT_LIST_HEAD(&list); + start = jiffies; + + for (;;) { + spin_lock(&sdp->sd_log_seg_lock); + + if (list_empty(&list)) { + if (jump_queue) + list_add(&list, &sdp->sd_log_seg_list); + else { + list_add_tail(&list, &sdp->sd_log_seg_list); + while (sdp->sd_log_seg_list.next != &list) { + DECLARE_WAITQUEUE(__wait_chan, current); + current->state = TASK_UNINTERRUPTIBLE; + add_wait_queue(&sdp->sd_log_seg_wait, + &__wait_chan); + spin_unlock(&sdp->sd_log_seg_lock); + schedule(); + spin_lock(&sdp->sd_log_seg_lock); + remove_wait_queue(&sdp->sd_log_seg_wait, + &__wait_chan); + current->state = TASK_RUNNING; + } + } + } + + if (sdp->sd_log_seg_free >= segments) { + sdp->sd_log_seg_free -= segments; + list_del(&list); + spin_unlock(&sdp->sd_log_seg_lock); + wake_up(&sdp->sd_log_seg_wait); + break; + } + + spin_unlock(&sdp->sd_log_seg_lock); + + if (try) { + gfs_log_flush(sdp); + gfs_ail_start(sdp, 0); + } + + gfs_ail_empty(sdp); + + try++; + if (time_after_eq(jiffies, start + 60 * HZ)) + printk("GFS: fsid=%s: pid %d can't make log reservation (asking for %u segments)\n", + sdp->sd_fsname, current->pid, segments); + yield(); + } + + return 0; +} + +/** + * gfs_log_release - Release a given number of log segments + * @sdp: The GFS superblock + * @segments: The number of segments + * + */ + +void +gfs_log_release(struct gfs_sbd *sdp, unsigned int segments) +{ + spin_lock(&sdp->sd_log_seg_lock); + sdp->sd_log_seg_free += segments; + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, sdp,); + spin_unlock(&sdp->sd_log_seg_lock); +} + +/** + * log_get_header - Get the journal header buffer + * @sdp: The GFS superblock + * @tr: The transaction + * @next: TRUE is this is not a continuation of an existing transaction + * + * Returns: the log buffer + */ + +static struct gfs_log_buf * +log_get_header(struct gfs_sbd *sdp, struct gfs_trans *tr, int next) +{ + struct gfs_log_buf *lb; + struct list_head *bmem; + struct gfs_log_header header; + + GFS_ASSERT_SBD(gfs_log_is_header(sdp, tr->tr_log_head), sdp,); + + GFS_ASSERT_SBD(tr->tr_num_free_bufs && + !list_empty(&tr->tr_free_bufs), sdp,); + lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list); + list_del(&lb->lb_list); + tr->tr_num_free_bufs--; + + GFS_ASSERT_SBD(tr->tr_num_free_bmem && + !list_empty(&tr->tr_free_bmem), sdp,); + bmem = tr->tr_free_bmem.next; + list_del(bmem); + tr->tr_num_free_bmem--; + + gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, (char *)bmem); + memset(bmem, 0, sdp->sd_sb.sb_bsize); + + memset(&header, 0, sizeof (header)); + + if (next) { + header.lh_header.mh_magic = GFS_MAGIC; + header.lh_header.mh_type = GFS_METATYPE_LH; + header.lh_header.mh_format = GFS_FORMAT_LH; + header.lh_first = tr->tr_log_head; + header.lh_sequence = sdp->sd_sequence + 1; + header.lh_tail = current_tail(sdp); + header.lh_last_dump = sdp->sd_log_dump_last; + } else { + header.lh_header.mh_magic = GFS_MAGIC; + header.lh_header.mh_type = GFS_METATYPE_LH; + header.lh_header.mh_format = GFS_FORMAT_LH; + header.lh_first = tr->tr_first_head; + header.lh_sequence = sdp->sd_sequence; + header.lh_tail = current_tail(sdp); + header.lh_last_dump = sdp->sd_log_dump_last; + + list_add(&lb->lb_list, &tr->tr_bufs); + } + + gfs_log_header_out(&header, lb->lb_bh.b_data); + gfs_log_header_out(&header, + lb->lb_bh.b_data + GFS_BASIC_BLOCK - + sizeof(struct gfs_log_header)); + + log_incr_head(sdp, &tr->tr_log_head); + + return lb; +} + +/** + * gfs_log_get_buf - Get a buffer to use for control data + * @sdp: The GFS superblock + * @tr: The GFS transaction + * + * Generate a regular buffer for use in the journal as control data. + * + * Returns: the buffer + */ + +struct gfs_log_buf * +gfs_log_get_buf(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct gfs_log_buf *lb; + struct list_head *bmem; + + if (gfs_log_is_header(sdp, tr->tr_log_head)) + log_get_header(sdp, tr, FALSE); + + GFS_ASSERT_SBD(tr->tr_num_free_bufs && + !list_empty(&tr->tr_free_bufs), sdp,); + lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list); + list_del(&lb->lb_list); + tr->tr_num_free_bufs--; + + GFS_ASSERT_SBD(tr->tr_num_free_bmem + && !list_empty(&tr->tr_free_bmem), sdp,); + bmem = tr->tr_free_bmem.next; + list_del(bmem); + tr->tr_num_free_bmem--; + + gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, (char *)bmem); + memset(bmem, 0, sdp->sd_sb.sb_bsize); + + list_add(&lb->lb_list, &tr->tr_bufs); + + log_incr_head(sdp, &tr->tr_log_head); + + return lb; +} + +/** + * gfs_log_fake_buf - Build a fake buffer head + * @sdp: the filesystem + * @tr: the transaction this is part of + * @data: the data the buffer should point to + * @unlock: a buffer that is unlocked as this struct gfs_log_buf is torn down + * + */ + +void +gfs_log_fake_buf(struct gfs_sbd *sdp, struct gfs_trans *tr, char *data, + struct buffer_head *unlock) +{ + struct gfs_log_buf *lb; + + if (gfs_log_is_header(sdp, tr->tr_log_head)) + log_get_header(sdp, tr, FALSE); + + GFS_ASSERT_SBD(tr->tr_num_free_bufs && + !list_empty(&tr->tr_free_bufs), sdp,); + lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list); + list_del(&lb->lb_list); + tr->tr_num_free_bufs--; + + gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, data); + lb->lb_unlock = unlock; + + list_add(&lb->lb_list, &tr->tr_bufs); + + log_incr_head(sdp, &tr->tr_log_head); +} + +/** + * check_seg_usage - Check that we didn't use too many segments + * @sdp: The GFS superblock + * @tr: The transaction + * + * Also, make sure we don't write ever get to a point where there are + * no dumps in the log (corrupting the log). Panic before we let + * that happen. + * + */ + +static void +check_seg_usage(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct gfs_jindex *jdesc = &sdp->sd_jdesc; + unsigned int dist; + unsigned int segments; + uint64_t head_off, head_wrap; + uint64_t dump_off, dump_wrap; + + dist = log_distance(sdp, tr->tr_log_head, tr->tr_first_head); + + segments = dist / sdp->sd_sb.sb_seg_size; + GFS_ASSERT_SBD(segments * sdp->sd_sb.sb_seg_size == dist, sdp,); + GFS_ASSERT_SBD(segments == tr->tr_seg_reserved, sdp,); + + if (sdp->sd_log_dump_last) { + head_off = tr->tr_first_head + + tr->tr_seg_reserved * sdp->sd_sb.sb_seg_size; + head_wrap = sdp->sd_log_wrap; + if (head_off >= jdesc->ji_addr + + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size) { + head_off -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size; + head_wrap++; + } + + dump_off = sdp->sd_log_dump_last; + dump_wrap = sdp->sd_log_dump_last_wrap; + + switch (head_wrap - dump_wrap) { + case 0: + break; + + case 1: + if (head_off < dump_off) + break; + else if (head_off == dump_off && + (tr->tr_flags & TRF_LOG_DUMP)) + break; + + default: + GFS_ASSERT_SBD(FALSE, sdp, + printk("head_off = %"PRIu64", head_wrap = %"PRIu64"\n", + head_off, head_wrap); + printk("dump_off = %"PRIu64", dump_wrap = %"PRIu64"\n", + dump_off, dump_wrap);); + break; + } + } +} + +/** + * log_free_buf - Free a struct gfs_log_buf (and possibly the data it points to) + * @sdp: the filesystem + * @lb: the log buffer + * + */ + +static void +log_free_buf(struct gfs_sbd *sdp, struct gfs_log_buf *lb) +{ + char *bmem; + + bmem = lb->lb_bh.b_data; + gfs_logbh_uninit(sdp, &lb->lb_bh); + + if (lb->lb_unlock) + gfs_unlock_buffer(lb->lb_unlock); + else + kfree(bmem); + + kfree(lb); +} + +/** + * sync_trans - Add "last" descriptor to transaction and sync to disk + * @sdp: The GFS superblock + * @tr: The transaction + * + * Add the "last" descriptor on to the end of the current transaction + * and sync it out to disk. Don't commit it yet, though. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +sync_trans(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *tmp, *head, *prev; + struct gfs_log_descriptor desc; + struct gfs_log_buf *lb; + uint64_t blk; + int error = 0, e; + + /* Build LAST descriptor */ + + lb = gfs_log_get_buf(sdp, tr); + + memset(&desc, 0, sizeof(struct gfs_log_descriptor)); + desc.ld_header.mh_magic = GFS_MAGIC; + desc.ld_header.mh_type = GFS_METATYPE_LD; + desc.ld_header.mh_format = GFS_FORMAT_LD; + desc.ld_type = GFS_LOG_DESC_LAST; + desc.ld_length = 1; + for (blk = tr->tr_log_head; !gfs_log_is_header(sdp, blk); blk++) + desc.ld_length++; + gfs_desc_out(&desc, lb->lb_bh.b_data); + + while (!gfs_log_is_header(sdp, tr->tr_log_head)) + log_incr_head(sdp, &tr->tr_log_head); + + check_seg_usage(sdp, tr); + + /* Start I/O + Go in "prev" direction to start the I/O in order. */ + + for (head = &tr->tr_bufs, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + lb = list_entry(tmp, struct gfs_log_buf, lb_list); + + if (error) { + list_del(&lb->lb_list); + log_free_buf(sdp, lb); + } else { + e = gfs_logbh_start(sdp, &lb->lb_bh); + if (e) { + list_del(&lb->lb_list); + log_free_buf(sdp, lb); + error = e; + } + } + } + + /* Wait on I/O + Go in "next" direction to minimize sleeps/wakeups. */ + + while (!list_empty(&tr->tr_bufs)) { + lb = list_entry(tr->tr_bufs.next, struct gfs_log_buf, lb_list); + + e = gfs_logbh_wait(sdp, &lb->lb_bh); + if (e) + error = e; + + list_del(&lb->lb_list); + log_free_buf(sdp, lb); + } + + return error; +} + +/** + * commit_trans - Commit the current transaction + * @sdp: The GFS superblock + * @tr: The transaction + * + * Write next header to commit + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +commit_trans(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct gfs_log_buf *lb; + int error; + + lb = log_get_header(sdp, tr, TRUE); + + error = gfs_logbh_start(sdp, &lb->lb_bh); + if (!error) + error = gfs_logbh_wait(sdp, &lb->lb_bh); + + log_free_buf(sdp, lb); + + return error; +} + +/** + * disk_commit - Write a transaction to the on-disk journal + * @sdp: The GFS superblock + * @tr: The transaction + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +disk_commit(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + uint64_t last_dump, last_dump_wrap; + int error = 0; + + GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,); + tr->tr_log_head = sdp->sd_log_head; + tr->tr_first_head = tr->tr_log_head - 1; + GFS_ASSERT_SBD(gfs_log_is_header(sdp, tr->tr_first_head), sdp,); + + LO_BUILD_BHLIST(sdp, tr); + + GFS_ASSERT_SBD(!list_empty(&tr->tr_bufs), sdp,); + + error = sync_trans(sdp, tr); + if (error) { + /* Eat unusable commit buffer */ + log_free_buf(sdp, log_get_header(sdp, tr, TRUE)); + goto out; + } + + if (tr->tr_flags & TRF_LOG_DUMP) { + /* This commit header should point to the log dump we're + commiting as the current one. But save the copy of the + old one in case we have problems commiting the dump. */ + + last_dump = sdp->sd_log_dump_last; + last_dump_wrap = sdp->sd_log_dump_last_wrap; + + sdp->sd_log_dump_last = tr->tr_first_head; + sdp->sd_log_dump_last_wrap = sdp->sd_log_wrap; + + error = commit_trans(sdp, tr); + if (error) { + sdp->sd_log_dump_last = last_dump; + sdp->sd_log_dump_last_wrap = last_dump_wrap; + goto out; + } + } else { + error = commit_trans(sdp, tr); + if (error) + goto out; + } + + if (sdp->sd_log_head > tr->tr_log_head) + sdp->sd_log_wrap++; + sdp->sd_log_head = tr->tr_log_head; + sdp->sd_sequence++; + + out: + GFS_ASSERT_SBD(!tr->tr_num_free_bufs && + list_empty(&tr->tr_free_bufs), sdp,); + GFS_ASSERT_SBD(!tr->tr_num_free_bmem && + list_empty(&tr->tr_free_bmem), sdp,); + + return error; +} + +/** + * add_trans_to_ail - Add a ondisk commited transaction to the AIL + * @sdp: the filesystem + * @tr: the transaction + * + */ + +static void +add_trans_to_ail(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct gfs_log_element *le; + + while (!list_empty(&tr->tr_elements)) { + le = list_entry(tr->tr_elements.next, + struct gfs_log_element, le_list); + LO_ADD_TO_AIL(sdp, le); + } + + list_add(&tr->tr_list, &sdp->sd_log_ail); +} + +/** + * log_refund - Refund log segments to the free pool + * @sdp: The GFS superblock + * @tr: The tranaction to examine + * + * Look at the number of segments reserved for this transaction and the + * number of segments actually needed for it. If they aren't the + * same, refund the difference to the free segment pool. + * + * Called with the log lock held + */ + +static void +log_refund(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct gfs_log_buf *lb; + struct list_head *bmem; + unsigned int num_bufs = 0, num_bmem = 0; + unsigned int segments; + + LO_TRANS_SIZE(sdp, tr, NULL, NULL, &num_bufs, &num_bmem); + + segments = gfs_blk2seg(sdp, num_bufs + 1); + num_bufs += segments + 1; + num_bmem += segments + 1; + + if (tr->tr_seg_reserved > segments) { + spin_lock(&sdp->sd_log_seg_lock); + sdp->sd_log_seg_free += tr->tr_seg_reserved - segments; + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, + sdp,); + spin_unlock(&sdp->sd_log_seg_lock); + + tr->tr_seg_reserved = segments; + } else + GFS_ASSERT_SBD(tr->tr_seg_reserved == segments, sdp,); + + GFS_ASSERT_SBD(tr->tr_num_free_bufs >= num_bufs, sdp,); + while (tr->tr_num_free_bufs > num_bufs) { + lb = list_entry(tr->tr_free_bufs.next, + struct gfs_log_buf, lb_list); + list_del(&lb->lb_list); + kfree(lb); + tr->tr_num_free_bufs--; + } + + GFS_ASSERT_SBD(tr->tr_num_free_bmem >= num_bmem, sdp,); + while (tr->tr_num_free_bmem > num_bmem) { + bmem = tr->tr_free_bmem.next; + list_del(bmem); + kfree(bmem); + tr->tr_num_free_bmem--; + } +} + +/** + * trans_combine - combine two transactions + * @sdp: the filesystem + * @tr: the surviving transaction + * @new_tr: the transaction that gets freed + * + * Assumes that the two transactions are independent. + */ + +static void +trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_trans *new_tr) +{ + struct gfs_log_element *le; + struct gfs_log_buf *lb; + struct list_head *bmem; + + tr->tr_file = __FILE__; + tr->tr_line = __LINE__; + tr->tr_seg_reserved += new_tr->tr_seg_reserved; + tr->tr_flags |= new_tr->tr_flags; + tr->tr_num_free_bufs += new_tr->tr_num_free_bufs; + tr->tr_num_free_bmem += new_tr->tr_num_free_bmem; + + /* Combine the elements of the two transactions */ + + while (!list_empty(&new_tr->tr_elements)) { + le = list_entry(new_tr->tr_elements.next, + struct gfs_log_element, le_list); + GFS_ASSERT_SBD(le->le_trans == new_tr, sdp,); + le->le_trans = tr; + list_move(&le->le_list, &tr->tr_elements); + } + + LO_TRANS_COMBINE(sdp, tr, new_tr); + + while (!list_empty(&new_tr->tr_free_bufs)) { + lb = list_entry(new_tr->tr_free_bufs.next, + struct gfs_log_buf, lb_list); + list_move(&lb->lb_list, &tr->tr_free_bufs); + new_tr->tr_num_free_bufs--; + } + while (!list_empty(&new_tr->tr_free_bmem)) { + bmem = new_tr->tr_free_bmem.next; + list_move(bmem, &tr->tr_free_bmem); + new_tr->tr_num_free_bmem--; + } + + GFS_ASSERT_SBD(!new_tr->tr_num_free_bufs, sdp,); + GFS_ASSERT_SBD(!new_tr->tr_num_free_bmem, sdp,); + + kfree(new_tr); +} + +/** + * log_flush_internal - flush incore transactions + * @sdp: the filesystem + * @gl: The glock structure to flush. If NULL, flush the whole incore log + * + */ + +static void +log_flush_internal(struct gfs_sbd *sdp, struct gfs_glock *gl) +{ + struct gfs_trans *trans = NULL, *tr; + int error; + + gfs_log_lock(sdp); + + if (list_empty(&sdp->sd_log_incore)) + goto out; + + if (gl) { + if (!gl->gl_incore_le.le_trans) + goto out; + + trans = gl->gl_incore_le.le_trans; + + list_del(&trans->tr_list); + } else { + while (!list_empty(&sdp->sd_log_incore)) { + tr = list_entry(sdp->sd_log_incore.next, + struct gfs_trans, tr_list); + + list_del(&tr->tr_list); + + if (trans) + trans_combine(sdp, trans, tr); + else + trans = tr; + } + } + + GFS_ASSERT_SBD(trans, sdp,); + + log_refund(sdp, trans); + + /* Actually do the stuff to commit the transaction */ + + error = disk_commit(sdp, trans); + if (error) + gfs_io_error(sdp); + + add_trans_to_ail(sdp, trans); + + if (log_distance(sdp, sdp->sd_log_head, sdp->sd_log_dump_last) * GFS_DUMPS_PER_LOG >= + sdp->sd_jdesc.ji_nsegment * sdp->sd_sb.sb_seg_size) + set_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags); + + out: + if (list_empty(&sdp->sd_log_incore)) + sdp->sd_vfs->s_dirt = FALSE; + + gfs_log_unlock(sdp); + + /* Dump if we need to. */ + + if (test_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags)) + gfs_log_dump(sdp, FALSE); +} + +/** + * gfs_log_flush - flush the whole incore log + * @sdp: the filesystem + * + */ + +void +gfs_log_flush(struct gfs_sbd *sdp) +{ + log_flush_internal(sdp, NULL); +} + +/** + * gfs_log_flush_glock - flush the incore log for a glock + * @gl: the glock + * + */ + +void +gfs_log_flush_glock(struct gfs_glock *gl) +{ + log_flush_internal(gl->gl_sbd, gl); +} + +/** + * incore_commit - commit a transaction in-core + * @sdp: the filesystem + * @new_tr: the transaction to commit + * + * Add the transaction @new_tr to the end of the incore commit list. + * Pull up and merge an previously commited transactions that share + * locks. Also pull up any rename transactions that need it. + */ + +static void +incore_commit(struct gfs_sbd *sdp, struct gfs_trans *new_tr) +{ + struct gfs_log_element *le; + struct gfs_trans *trans = NULL, *exist_tr; + struct gfs_log_buf *lb; + struct list_head *bmem; + struct list_head *tmp, *head, *next; + + for (head = &new_tr->tr_elements, tmp = head->next; + tmp != head; + tmp = tmp->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + + exist_tr = LO_OVERLAP_TRANS(sdp, le); + if (!exist_tr) + continue; + + if (exist_tr != trans) { + list_del(&exist_tr->tr_list); + if (trans) + trans_combine(sdp, trans, exist_tr); + else + trans = exist_tr; + } + } + + if (trans) { + trans->tr_file = __FILE__; + trans->tr_line = __LINE__; + trans->tr_seg_reserved += new_tr->tr_seg_reserved; + trans->tr_flags |= new_tr->tr_flags; + trans->tr_num_free_bufs += new_tr->tr_num_free_bufs; + trans->tr_num_free_bmem += new_tr->tr_num_free_bmem; + + while (!list_empty(&new_tr->tr_free_bufs)) { + lb = list_entry(new_tr->tr_free_bufs.next, + struct gfs_log_buf, lb_list); + list_move(&lb->lb_list, &trans->tr_free_bufs); + new_tr->tr_num_free_bufs--; + } + while (!list_empty(&new_tr->tr_free_bmem)) { + bmem = new_tr->tr_free_bmem.next; + list_move(bmem, &trans->tr_free_bmem); + new_tr->tr_num_free_bmem--; + } + } else + trans = new_tr; + + for (head = &new_tr->tr_elements, tmp = head->next, next = tmp->next; + tmp != head; + tmp = next, next = next->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + LO_INCORE_COMMIT(sdp, trans, le); + } + + if (trans != new_tr) { + GFS_ASSERT_SBD(!new_tr->tr_num_free_bufs, sdp,); + GFS_ASSERT_SBD(!new_tr->tr_num_free_bmem, sdp,); + GFS_ASSERT_SBD(list_empty(&new_tr->tr_elements), sdp,); + kfree(new_tr); + } + + log_refund(sdp, trans); + + list_add(&trans->tr_list, &sdp->sd_log_incore); +} + +/** + * gfs_log_commit - Commit a transaction to the log + * @sdp: the filesystem + * @tr: the transaction + * + * Returns: 0 on success, -EXXX on failure + */ + +void +gfs_log_commit(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct gfs_log_buf *lb; + struct list_head *bmem; + unsigned int num_mblks = 0, num_eblks = 0, num_bufs = 0, num_bmem = 0; + unsigned int segments; + + LO_TRANS_SIZE(sdp, tr, &num_mblks, &num_eblks, &num_bufs, &num_bmem); + + GFS_ASSERT_SBD(num_mblks <= tr->tr_mblks_asked && + num_eblks <= tr->tr_eblks_asked, sdp, + printk("type = (%s, %u)\n", + tr->tr_file, tr->tr_line); + printk("num_mblks = %u, tr->tr_mblks_asked = %u\n", + num_mblks, tr->tr_mblks_asked); + printk("num_eblks = %u, tr->tr_eblks_asked = %u\n", + num_eblks, tr->tr_eblks_asked);); + + segments = gfs_blk2seg(sdp, num_bufs + 1); + num_bufs += segments + 1; + num_bmem += segments + 1; + + while (num_bufs--) { + lb = gmalloc(sizeof(struct gfs_log_buf)); + memset(lb, 0, sizeof(struct gfs_log_buf)); + list_add(&lb->lb_list, &tr->tr_free_bufs); + tr->tr_num_free_bufs++; + } + while (num_bmem--) { + bmem = gmalloc(sdp->sd_sb.sb_bsize); + list_add(bmem, &tr->tr_free_bmem); + tr->tr_num_free_bmem++; + } + + gfs_log_lock(sdp); + + incore_commit(sdp, tr); + + if (sdp->sd_log_buffers > sdp->sd_tune.gt_incore_log_blocks) { + gfs_log_unlock(sdp); + gfs_log_flush(sdp); + } else { + sdp->sd_vfs->s_dirt = TRUE; + gfs_log_unlock(sdp); + } + +} + +/** + * gfs_log_dump - make a Log Dump entry in the log + * @sdp: the filesystem + * @force: if TRUE, always make the dump even if one has been made recently + * + */ + +void +gfs_log_dump(struct gfs_sbd *sdp, int force) +{ + struct gfs_log_element *le; + struct gfs_trans tr; + struct gfs_log_buf *lb; + struct list_head *bmem; + unsigned int num_bufs, num_bmem; + unsigned int segments; + int error; + + if (test_and_set_bit(SDF_IN_LOG_DUMP, &sdp->sd_flags)) { + GFS_ASSERT_SBD(!force, sdp,); + return; + } + + memset(&tr, 0, sizeof(struct gfs_trans)); + INIT_LIST_HEAD(&tr.tr_elements); + INIT_LIST_HEAD(&tr.tr_free_bufs); + INIT_LIST_HEAD(&tr.tr_free_bmem); + INIT_LIST_HEAD(&tr.tr_bufs); + tr.tr_flags = TRF_LOG_DUMP; + tr.tr_file = __FILE__; + tr.tr_line = __LINE__; + + for (;;) { + gfs_log_lock(sdp); + + if (!force && !test_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags)) + goto out; + + num_bufs = num_bmem = 0; + LO_DUMP_SIZE(sdp, NULL, &num_bufs, &num_bmem); + GFS_ASSERT_SBD(num_bufs, sdp,); + segments = gfs_blk2seg(sdp, num_bufs + 1); + num_bufs += segments + 1; + num_bmem += segments + 1; + + if (tr.tr_seg_reserved >= segments && + tr.tr_num_free_bufs >= num_bufs && + tr.tr_num_free_bmem >= num_bmem) + break; + + gfs_log_unlock(sdp); + + if (tr.tr_seg_reserved < segments) { + error = gfs_log_reserve(sdp, + segments - tr.tr_seg_reserved, + TRUE); + GFS_ASSERT_SBD(!error, sdp,); + tr.tr_seg_reserved = segments; + } + while (tr.tr_num_free_bufs < num_bufs) { + lb = gmalloc(sizeof(struct gfs_log_buf)); + memset(lb, 0, sizeof(struct gfs_log_buf)); + list_add(&lb->lb_list, &tr.tr_free_bufs); + tr.tr_num_free_bufs++; + } + while (tr.tr_num_free_bmem < num_bmem) { + bmem = gmalloc(sdp->sd_sb.sb_bsize); + list_add(bmem, &tr.tr_free_bmem); + tr.tr_num_free_bmem++; + } + } + + if (tr.tr_seg_reserved > segments) { + spin_lock(&sdp->sd_log_seg_lock); + sdp->sd_log_seg_free += tr.tr_seg_reserved - segments; + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, + sdp,); + spin_unlock(&sdp->sd_log_seg_lock); + tr.tr_seg_reserved = segments; + } + while (tr.tr_num_free_bufs > num_bufs) { + lb = list_entry(tr.tr_free_bufs.next, + struct gfs_log_buf, lb_list); + list_del(&lb->lb_list); + kfree(lb); + tr.tr_num_free_bufs--; + } + while (tr.tr_num_free_bmem > num_bmem) { + bmem = tr.tr_free_bmem.next; + list_del(bmem); + kfree(bmem); + tr.tr_num_free_bmem--; + } + + LO_BUILD_DUMP(sdp, &tr); + + error = disk_commit(sdp, &tr); + if (error) + gfs_io_error(sdp); + + while (!list_empty(&tr.tr_elements)) { + le = list_entry(tr.tr_elements.next, + struct gfs_log_element, le_list); + LO_CLEAN_DUMP(sdp, le); + } + + /* If there isn't anything the AIL, we won't get back the log + space we reserved unless we do it ourselves. */ + + if (list_empty(&sdp->sd_log_ail)) { + spin_lock(&sdp->sd_log_seg_lock); + sdp->sd_log_seg_free += tr.tr_seg_reserved; + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, + sdp,); + spin_unlock(&sdp->sd_log_seg_lock); + } + + clear_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags); + + out: + gfs_log_unlock(sdp); + clear_bit(SDF_IN_LOG_DUMP, &sdp->sd_flags); +} + +/** + * gfs_log_shutdown - write a shutdown header into a journal + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_log_shutdown(struct gfs_sbd *sdp) +{ + struct gfs_log_buf *lb; + char *bmem; + struct gfs_log_header head; + struct gfs_log_descriptor desc; + unsigned int elements = 0; + int error; + + lb = gmalloc(sizeof(struct gfs_log_buf)); + memset(lb, 0, sizeof(struct gfs_log_buf)); + bmem = gmalloc(sdp->sd_sb.sb_bsize); + + gfs_log_lock(sdp); + + GFS_ASSERT_SBD(list_empty(&sdp->sd_log_ail), sdp,); + GFS_ASSERT_SBD(sdp->sd_log_seg_free == sdp->sd_jdesc.ji_nsegment - 1, + sdp,); + GFS_ASSERT_SBD(!sdp->sd_log_buffers, sdp,); + GFS_ASSERT_SBD(gfs_log_is_header(sdp, sdp->sd_log_head - 1), sdp,); + + /* Build a "last" log descriptor */ + + memset(&desc, 0, sizeof(struct gfs_log_descriptor)); + desc.ld_header.mh_magic = GFS_MAGIC; + desc.ld_header.mh_type = GFS_METATYPE_LD; + desc.ld_header.mh_format = GFS_FORMAT_LD; + desc.ld_type = GFS_LOG_DESC_LAST; + desc.ld_length = sdp->sd_sb.sb_seg_size - 1; + + /* Write the descriptor */ + + gfs_logbh_init(sdp, &lb->lb_bh, sdp->sd_log_head, bmem); + memset(bmem, 0, sdp->sd_sb.sb_bsize); + gfs_desc_out(&desc, lb->lb_bh.b_data); + error = gfs_logbh_start(sdp, &lb->lb_bh); + if (!error) + error = gfs_logbh_wait(sdp, &lb->lb_bh); + gfs_logbh_uninit(sdp, &lb->lb_bh); + + if (error) + goto out; + + /* Move to the next header */ + + while (!gfs_log_is_header(sdp, sdp->sd_log_head)) + log_incr_head(sdp, &sdp->sd_log_head); + + LO_DUMP_SIZE(sdp, &elements, NULL, NULL); + + /* Build the shutdown header */ + + memset(&head, 0, sizeof (struct gfs_log_header)); + head.lh_header.mh_magic = GFS_MAGIC; + head.lh_header.mh_type = GFS_METATYPE_LH; + head.lh_header.mh_format = GFS_FORMAT_LH; + head.lh_flags = GFS_LOG_HEAD_UNMOUNT; + head.lh_first = sdp->sd_log_head; + head.lh_sequence = sdp->sd_sequence + 1; + /* Don't care about tail */ + head.lh_last_dump = (elements) ? sdp->sd_log_dump_last : 0; + + /* Write out the shutdown header */ + + gfs_logbh_init(sdp, &lb->lb_bh, sdp->sd_log_head, bmem); + memset(bmem, 0, sdp->sd_sb.sb_bsize); + gfs_log_header_out(&head, lb->lb_bh.b_data); + gfs_log_header_out(&head, + lb->lb_bh.b_data + GFS_BASIC_BLOCK - + sizeof(struct gfs_log_header)); + error = gfs_logbh_start(sdp, &lb->lb_bh); + if (!error) + error = gfs_logbh_wait(sdp, &lb->lb_bh); + gfs_logbh_uninit(sdp, &lb->lb_bh); + + out: + gfs_log_unlock(sdp); + + kfree(lb); + kfree(bmem); + + return error; +} diff -urN linux-orig/fs/gfs/log.h linux-patched/fs/gfs/log.h --- linux-orig/fs/gfs/log.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/log.h 2004-06-30 13:27:49.346710434 -0500 @@ -0,0 +1,79 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __LOG_DOT_H__ +#define __LOG_DOT_H__ + +/** + * gfs_log_lock - acquire the right to mess with the log manager + * @sdp: the filesystem + * + */ + +static __inline__ void +gfs_log_lock(struct gfs_sbd *sdp) +{ + down(&sdp->sd_log_lock); +} + +/** + * gfs_log_unlock - release the right to mess with the log manager + * @sdp: the filesystem + * + */ + +static __inline__ void +gfs_log_unlock(struct gfs_sbd *sdp) +{ + up(&sdp->sd_log_lock); +} + +unsigned int gfs_struct2blk(struct gfs_sbd *sdp, unsigned int nstruct, + unsigned int ssize); +unsigned int gfs_blk2seg(struct gfs_sbd *sdp, unsigned int blocks); + +int gfs_log_reserve(struct gfs_sbd *sdp, unsigned int segments, int jump_queue); +void gfs_log_release(struct gfs_sbd *sdp, unsigned int segments); + +void gfs_ail_start(struct gfs_sbd *sdp, int flags); +int gfs_ail_empty(struct gfs_sbd *sdp); + +void gfs_log_commit(struct gfs_sbd *sdp, struct gfs_trans *trans); +void gfs_log_flush(struct gfs_sbd *sdp); +void gfs_log_flush_glock(struct gfs_glock *gl); + +int gfs_log_shutdown(struct gfs_sbd *sdp); + +void gfs_log_dump(struct gfs_sbd *sdp, int force); + +/* Internal crap used the log operations */ + +/** + * gfs_log_is_header - Discover if block is on journal header + * @sdp: The GFS superblock + * @block: The block number + * + * Returns: TRUE if the block is on a journal segment boundary, FALSE otherwise + */ + +static __inline__ int +gfs_log_is_header(struct gfs_sbd *sdp, uint64_t block) +{ + return !do_mod(block, sdp->sd_sb.sb_seg_size); +} + +struct gfs_log_buf *gfs_log_get_buf(struct gfs_sbd *sdp, struct gfs_trans *tr); +void gfs_log_fake_buf(struct gfs_sbd *sdp, struct gfs_trans *tr, char *data, + struct buffer_head *unlock); + +#endif /* __LOG_DOT_H__ */ diff -urN linux-orig/fs/gfs/lops.c linux-patched/fs/gfs/lops.c --- linux-orig/fs/gfs/lops.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/lops.c 2004-06-30 13:27:49.348709970 -0500 @@ -0,0 +1,1563 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "log.h" +#include "lops.h" +#include "quota.h" +#include "recovery.h" +#include "trans.h" +#include "unlinked.h" + +/** + * generic_le_add - generic routine to add a log element to a transaction + * @sdp: the filesystem + * @le: the log entry + * + */ + +static void +generic_le_add(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + struct gfs_trans *tr; + + GFS_ASSERT_SBD(le->le_ops && + !le->le_trans && + list_empty(&le->le_list), sdp,); + + tr = current_transaction; + GFS_ASSERT_SBD(tr, sdp,); + + le->le_trans = tr; + list_add(&le->le_list, &tr->tr_elements); +} + +/** + * glock_trans_end - drop a glock reference + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +glock_trans_end(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le); + + GFS_ASSERT_GLOCK(gfs_glock_is_locked_by_me(gl) && + gfs_glock_is_held_excl(gl), gl,); + gfs_glock_put(gl); +} + +/** + * glock_print - print debug info about a log element + * @sdp: the filesystem + * @le: the log element + * @where: is this a new transaction or a incore transaction + * + */ + +static void +glock_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where) +{ + struct gfs_glock *gl; + + switch (where) { + case TRANS_IS_NEW: + gl = container_of(le, struct gfs_glock, gl_new_le); + break; + case TRANS_IS_INCORE: + gl = container_of(le, struct gfs_glock, gl_incore_le); + break; + default: + GFS_ASSERT_SBD(FALSE, sdp,); + } + + printk(" Glock: (%u, %"PRIu64")\n", + gl->gl_name.ln_type, + gl->gl_name.ln_number); +} + +/** + * glock_overlap_trans - Find any incore transactions that might overlap with this LE + * @sdp: the filesystem + * @le: the log element + * + */ + +static struct gfs_trans * +glock_overlap_trans(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le); + + return gl->gl_incore_le.le_trans; +} + +/** + * glock_incore_commit - commit this LE to the incore log + * @sdp: the filesystem + * @tr: the incore transaction this LE is a part of + * @le: the log element + * + */ + +static void +glock_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_log_element *le) +{ + struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le); + + if (gl->gl_incore_le.le_trans) + GFS_ASSERT_GLOCK(gl->gl_incore_le.le_trans == tr, gl,); + else { + gl->gl_incore_le.le_trans = tr; + list_add(&gl->gl_incore_le.le_list, &tr->tr_elements); + if (tr != le->le_trans) + tr->tr_num_gl++; + } + + le->le_trans = NULL; + list_del_init(&le->le_list); +} + +/** + * glock_add_to_ail - Add this LE to the AIL + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +glock_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + le->le_trans = NULL; + list_del_init(&le->le_list); +} + +/** + * glock_trans_combine - combine to incore transactions + * @sdp: the filesystem + * @tr: the surviving transaction + * @new_tr: the transaction that's going to disappear + * + */ + +static void +glock_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_trans *new_tr) +{ + tr->tr_num_gl += new_tr->tr_num_gl; +} + +/** + * buf_print - print debug info about a log element + * @sdp: the filesystem + * @le: the log element + * @where: is this a new transaction or a incore transaction + * + */ + +static void +buf_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where) +{ + struct gfs_bufdata *bd; + + switch (where) { + case TRANS_IS_NEW: + bd = container_of(le, struct gfs_bufdata, bd_new_le); + break; + case TRANS_IS_INCORE: + bd = container_of(le, struct gfs_bufdata, bd_incore_le); + break; + default: + GFS_ASSERT_SBD(FALSE, sdp,); + } + + printk(" Buffer: %"PRIu64"\n", (uint64_t)bd->bd_bh->b_blocknr); +} + +/** + * buf_incore_commit - commit this LE to the incore log + * @sdp: the filesystem + * @tr: the incore transaction this LE is a part of + * @le: the log element + * + */ + +static void +buf_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_log_element *le) +{ + struct gfs_bufdata *bd = container_of(le, struct gfs_bufdata, bd_new_le); + + if (bd->bd_frozen) { + kfree(bd->bd_frozen); + bd->bd_frozen = NULL; + } + + if (bd->bd_incore_le.le_trans) { + GFS_ASSERT_SBD(bd->bd_incore_le.le_trans == tr, sdp,); + gfs_dunpin(sdp, bd->bd_bh, NULL); + } else { + bd->bd_incore_le.le_trans = tr; + list_add(&bd->bd_incore_le.le_list, &tr->tr_elements); + if (tr != le->le_trans) + tr->tr_num_buf++; + + sdp->sd_log_buffers++; + } + + le->le_trans = NULL; + list_del_init(&le->le_list); +} + +/** + * buf_add_to_ail - Add this LE to the AIL + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +buf_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + struct gfs_bufdata *bd = container_of(le, + struct gfs_bufdata, + bd_incore_le); + + gfs_dunpin(sdp, bd->bd_bh, le->le_trans); + + le->le_trans = NULL; + list_del_init(&le->le_list); + + GFS_ASSERT_SBD(sdp->sd_log_buffers, sdp,); + sdp->sd_log_buffers--; +} + +/** + * buf_trans_size - compute how much space the LE class takes up in a transaction + * @sdp: the filesystem + * @tr: the transaction + * @mblks: the number of regular metadata blocks + * @eblks: the number of extra blocks + * @blocks: the number of log blocks + * @bmem: the number of buffer-sized chunks of memory we need + * + */ + +static void +buf_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr, + unsigned int *mblks, unsigned int *eblks, + unsigned int *blocks, unsigned int *bmem) +{ + unsigned int cblks; + + if (tr->tr_num_buf) { + cblks = gfs_struct2blk(sdp, tr->tr_num_buf, + sizeof(struct gfs_block_tag)); + + if (mblks) + *mblks += tr->tr_num_buf; + if (blocks) + *blocks += tr->tr_num_buf + cblks; + if (bmem) + *bmem += cblks; + } +} + +/** + * buf_trans_combine - combine to incore transactions + * @sdp: the filesystem + * @tr: the surviving transaction + * @new_tr: the transaction that's going to disappear + * + */ + +static void +buf_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_trans *new_tr) +{ + tr->tr_num_buf += new_tr->tr_num_buf; +} + +/** + * increment_generation - increment the generation number in metadata buffer + * @sdp: the filesystem + * @bd: the struct gfs_bufdata structure associated with the buffer + * + */ + +static void +increment_generation(struct gfs_sbd *sdp, struct gfs_bufdata *bd) +{ + struct gfs_meta_header *mh, *mh2; + uint64_t tmp64; + + mh = (struct gfs_meta_header *)bd->bd_bh->b_data; + + tmp64 = gfs64_to_cpu(mh->mh_generation) + 1; + tmp64 = cpu_to_gfs64(tmp64); + + if (bd->bd_frozen) { + mh2 = (struct gfs_meta_header *)bd->bd_frozen; + GFS_ASSERT_SBD(mh->mh_generation == mh2->mh_generation, sdp,); + mh2->mh_generation = tmp64; + } + mh->mh_generation = tmp64; +} + +/** + * buf_build_bhlist - create the buffers that will make up the ondisk part of a transaction + * @sdp: the filesystem + * @tr: the transaction + * + */ + +static void +buf_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *tmp, *head; + struct gfs_log_element *le; + struct gfs_bufdata *bd; + struct gfs_log_descriptor desc; + struct gfs_block_tag tag; + struct gfs_log_buf *clb = NULL; + unsigned int num_ctl; + unsigned int offset = sizeof(struct gfs_log_descriptor); + unsigned int x, bufs; + + if (!tr->tr_num_buf) + return; + + /* set up control buffers for descriptor and tags */ + + num_ctl = gfs_struct2blk(sdp, tr->tr_num_buf, + sizeof(struct gfs_block_tag)); + + for (x = 0; x < num_ctl; x++) { + if (clb) + gfs_log_get_buf(sdp, tr); + else + clb = gfs_log_get_buf(sdp, tr); + } + + memset(&desc, 0, sizeof(struct gfs_log_descriptor)); + desc.ld_header.mh_magic = GFS_MAGIC; + desc.ld_header.mh_type = GFS_METATYPE_LD; + desc.ld_header.mh_format = GFS_FORMAT_LD; + desc.ld_type = GFS_LOG_DESC_METADATA; + desc.ld_length = num_ctl + tr->tr_num_buf; + desc.ld_data1 = tr->tr_num_buf; + gfs_desc_out(&desc, clb->lb_bh.b_data); + + x = 1; + bufs = 0; + + for (head = &tr->tr_elements, tmp = head->next; + tmp != head; + tmp = tmp->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + if (le->le_ops != &gfs_buf_lops) + continue; + bd = container_of(le, struct gfs_bufdata, bd_incore_le); + + gfs_meta_check(sdp, bd->bd_bh); + + gfs_lock_buffer(bd->bd_bh); + + increment_generation(sdp, bd); + + gfs_log_fake_buf(sdp, tr, + (bd->bd_frozen) ? bd->bd_frozen : bd->bd_bh->b_data, + bd->bd_bh); + + if (offset + sizeof(struct gfs_block_tag) > sdp->sd_sb.sb_bsize) { + clb = list_entry(clb->lb_list.prev, + struct gfs_log_buf, lb_list); + if (gfs_log_is_header(sdp, clb->lb_bh.b_blocknr)) + clb = list_entry(clb->lb_list.prev, + struct gfs_log_buf, lb_list); + x++; + offset = 0; + } + + memset(&tag, 0, sizeof(struct gfs_block_tag)); + tag.bt_blkno = bd->bd_bh->b_blocknr; + + gfs_block_tag_out(&tag, clb->lb_bh.b_data + offset); + + offset += sizeof(struct gfs_block_tag); + bufs++; + } + + GFS_ASSERT_SBD(x == num_ctl, sdp,); + GFS_ASSERT_SBD(bufs == tr->tr_num_buf, sdp,); +} + +/** + * buf_before_scan - called before journal replay + * @sdp: the filesystem + * @jid: the journal ID about to be replayed + * @head: the current head of the log + * @pass: the pass through the journal + * + */ + +static void +buf_before_scan(struct gfs_sbd *sdp, unsigned int jid, + struct gfs_log_header *head, unsigned int pass) +{ + if (pass == GFS_RECPASS_A1) + sdp->sd_recovery_replays = + sdp->sd_recovery_skips = + sdp->sd_recovery_sames = 0; +} + +/** + * replay_block - Replay a single metadata block + * @sdp: the filesystem + * @jdesc: the struct gfs_jindex structure for the journal being replayed + * @gl: the journal's glock + * @tag: the block tag describing the inplace location of the block + * @blkno: the location of the log's copy of the block + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +replay_block(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, struct gfs_block_tag *tag, uint64_t blkno) +{ + struct buffer_head *inplace_bh, *log_bh; + struct gfs_meta_header inplace_mh, log_mh; + int replay_block = TRUE; + int error = 0; + + gfs_replay_check(sdp); + + /* Warning: Using a real buffer here instead of a tempbh can be bad + on a OS that won't support multiple simultaneous buffers for the + same block on different glocks. */ + + error = gfs_dread(sdp, tag->bt_blkno, gl, + DIO_START | DIO_WAIT, &inplace_bh); + if (error) + return error; + gfs_meta_check(sdp, inplace_bh); + gfs_meta_header_in(&inplace_mh, inplace_bh->b_data); + + error = gfs_dread(sdp, blkno, gl, DIO_START | DIO_WAIT, &log_bh); + if (error) { + brelse(inplace_bh); + return error; + } + gfs_meta_check(sdp, log_bh); + gfs_meta_header_in(&log_mh, log_bh->b_data); + + if (log_mh.mh_generation < inplace_mh.mh_generation) { + replay_block = FALSE; + sdp->sd_recovery_skips++; + } else if (log_mh.mh_generation == inplace_mh.mh_generation) { + if (memcmp(log_bh->b_data, + inplace_bh->b_data, + sdp->sd_sb.sb_bsize) == 0) { + replay_block = FALSE; + sdp->sd_recovery_sames++; + } + } + + if (replay_block) { + memcpy(inplace_bh->b_data, + log_bh->b_data, + sdp->sd_sb.sb_bsize); + + error = gfs_replay_buf(gl, inplace_bh); + if (!error) + sdp->sd_recovery_replays++; + } + + brelse(log_bh); + brelse(inplace_bh); + + return error; +} + +/** + * buf_scan_elements - Replay a metadata log descriptor + * @sdp: the filesystem + * @jdesc: the struct gfs_jindex structure for the journal being replayed + * @gl: the journal's glock + * @start: the starting block of the descriptor + * @desc: the descriptor structure + * @pass: the pass through the journal + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +buf_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t start, + struct gfs_log_descriptor *desc, unsigned int pass) +{ + struct gfs_block_tag tag; + struct buffer_head *bh; + uint64_t cblk = start; + unsigned int num_tags = desc->ld_data1; + unsigned int offset = sizeof(struct gfs_log_descriptor); + unsigned int x; + int error; + + if (pass != GFS_RECPASS_A1) + return 0; + if (desc->ld_type != GFS_LOG_DESC_METADATA) + return 0; + + x = gfs_struct2blk(sdp, num_tags, sizeof(struct gfs_block_tag)); + while (x--) { + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE); + if (error) + return error; + } + + for (;;) { + GFS_ASSERT_SBD(num_tags, sdp,); + + error = gfs_dread(sdp, cblk, gl, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + + /* Do readahead for the inplace blocks in this control block */ + { + unsigned int o2 = offset; + unsigned int nt2 = num_tags; + + while (o2 + sizeof(struct gfs_block_tag) <= + sdp->sd_sb.sb_bsize) { + gfs_block_tag_in(&tag, bh->b_data + o2); + gfs_start_ra(gl, tag.bt_blkno, 1); + if (!--nt2) + break; + o2 += sizeof(struct gfs_block_tag); + } + } + + while (offset + sizeof(struct gfs_block_tag) <= + sdp->sd_sb.sb_bsize) { + gfs_block_tag_in(&tag, bh->b_data + offset); + + error = replay_block(sdp, jdesc, gl, &tag, start); + if (error) + goto out_drelse; + + if (!--num_tags) + goto out_drelse; + + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE); + if (error) + goto out_drelse; + + offset += sizeof(struct gfs_block_tag); + } + + brelse(bh); + + error = gfs_increment_blkno(sdp, jdesc, gl, &cblk, TRUE); + if (error) + return error; + + offset = 0; + } + + return 0; + + out_drelse: + brelse(bh); + + return error; +} + +/** + * buf_after_scan - called after journal replay + * @sdp: the filesystem + * @jid: the journal ID about to be replayed + * @pass: the pass through the journal + * + */ + +static void +buf_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass) +{ + if (pass == GFS_RECPASS_A1) { + printk("GFS: fsid=%s: jid=%u: Replayed %u of %u blocks\n", + sdp->sd_fsname, jid, + sdp->sd_recovery_replays, + sdp->sd_recovery_replays + sdp->sd_recovery_skips + + sdp->sd_recovery_sames); + printk("GFS: fsid=%s: jid=%u: replays = %u, skips = %u, sames = %u\n", + sdp->sd_fsname, jid, sdp->sd_recovery_replays, + sdp->sd_recovery_skips, sdp->sd_recovery_sames); + } +} + +/** + * unlinked_print - print debug info about a log element + * @sdp: the filesystem + * @le: the log element + * @where: is this a new transaction or a incore transaction + * + */ + +static void +unlinked_print(struct gfs_sbd *sdp, struct gfs_log_element *le, + unsigned int where) +{ + struct gfs_unlinked *ul; + char *type; + + switch (where) { + case TRANS_IS_NEW: + ul = container_of(le, struct gfs_unlinked, ul_new_le); + type = (test_bit(ULF_NEW_UL, &ul->ul_flags)) ? + "unlink" : "dealloc"; + break; + case TRANS_IS_INCORE: + ul = container_of(le, struct gfs_unlinked, ul_incore_le); + type = (test_bit(ULF_INCORE_UL, &ul->ul_flags)) ? + "unlink" : "dealloc"; + break; + default: + GFS_ASSERT_SBD(FALSE, sdp,); + } + + printk(" unlinked: %"PRIu64"/%"PRIu64", %s\n", + ul->ul_inum.no_formal_ino, ul->ul_inum.no_addr, + type); +} + +/** + * unlinked_incore_commit - commit this LE to the incore log + * @sdp: the filesystem + * @tr: the incore transaction this LE is a part of + * @le: the log element + * + */ + +static void +unlinked_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_log_element *le) +{ + struct gfs_unlinked *ul = container_of(le, + struct gfs_unlinked, + ul_new_le); + int n = !!test_bit(ULF_NEW_UL, &ul->ul_flags); + int i = !!test_bit(ULF_INCORE_UL, &ul->ul_flags); + + if (ul->ul_incore_le.le_trans) { + GFS_ASSERT_SBD(ul->ul_incore_le.le_trans == tr, sdp,); + GFS_ASSERT_SBD(n != i, sdp,); + + ul->ul_incore_le.le_trans = NULL; + list_del_init(&ul->ul_incore_le.le_list); + gfs_unlinked_put(sdp, ul); + + if (i) { + GFS_ASSERT_SBD(tr->tr_num_iul, sdp,); + tr->tr_num_iul--; + } else { + GFS_ASSERT_SBD(tr->tr_num_ida, sdp,); + tr->tr_num_ida--; + } + } else { + gfs_unlinked_hold(sdp, ul); + ul->ul_incore_le.le_trans = tr; + list_add(&ul->ul_incore_le.le_list, &tr->tr_elements); + + if (n) { + set_bit(ULF_INCORE_UL, &ul->ul_flags); + if (tr != le->le_trans) + tr->tr_num_iul++; + } else { + clear_bit(ULF_INCORE_UL, &ul->ul_flags); + if (tr != le->le_trans) + tr->tr_num_ida++; + } + } + + if (n) { + gfs_unlinked_hold(sdp, ul); + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags), sdp,); + set_bit(ULF_IC_LIST, &ul->ul_flags); + atomic_inc(&sdp->sd_unlinked_ic_count); + } else { + GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags), sdp,); + clear_bit(ULF_IC_LIST, &ul->ul_flags); + gfs_unlinked_put(sdp, ul); + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count), sdp,); + atomic_dec(&sdp->sd_unlinked_ic_count); + } + + le->le_trans = NULL; + list_del_init(&le->le_list); + gfs_unlinked_put(sdp, ul); +} + +/** + * unlinked_add_to_ail - Add this LE to the AIL + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +unlinked_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + struct gfs_unlinked *ul = container_of(le, + struct gfs_unlinked, + ul_incore_le); + int i = !!test_bit(ULF_INCORE_UL, &ul->ul_flags); + + if (i) { + gfs_unlinked_hold(sdp, ul); + GFS_ASSERT_SBD(!test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,); + set_bit(ULF_OD_LIST, &ul->ul_flags); + atomic_inc(&sdp->sd_unlinked_od_count); + } else { + GFS_ASSERT_SBD(test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,); + clear_bit(ULF_OD_LIST, &ul->ul_flags); + gfs_unlinked_put(sdp, ul); + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_od_count), sdp,); + atomic_dec(&sdp->sd_unlinked_od_count); + } + + le->le_trans = NULL; + list_del_init(&le->le_list); + gfs_unlinked_put(sdp, ul); +} + +/** + * unlinked_clean_dump - clean up a LE after a log dump + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +unlinked_clean_dump(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + le->le_trans = NULL; + list_del_init(&le->le_list); +} + +/** + * unlinked_trans_size - compute how much space the LE class takes up in a transaction + * @sdp: the filesystem + * @tr: the transaction + * @mblks: the number of regular metadata blocks + * @eblks: the number of extra blocks + * @blocks: the number of log blocks + * @bmem: the number of buffer-sized chunks of memory we need + * + */ + +static void +unlinked_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr, + unsigned int *mblks, unsigned int *eblks, + unsigned int *blocks, unsigned int *bmem) +{ + unsigned int ublks = 0; + + if (tr->tr_num_iul) + ublks = gfs_struct2blk(sdp, tr->tr_num_iul, + sizeof(struct gfs_inum)); + if (tr->tr_num_ida) + ublks += gfs_struct2blk(sdp, tr->tr_num_ida, + sizeof(struct gfs_inum)); + + if (eblks) + *eblks += ublks; + if (blocks) + *blocks += ublks; + if (bmem) + *bmem += ublks; +} + +/** + * unlinked_trans_combine - combine to incore transactions + * @sdp: the filesystem + * @tr: the surviving transaction + * @new_tr: the transaction that's going to disappear + * + */ + +static void +unlinked_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_trans *new_tr) +{ + tr->tr_num_iul += new_tr->tr_num_iul; + tr->tr_num_ida += new_tr->tr_num_ida; +} + +/** + * unlinked_build_bhlist - create the buffers that will make up the ondisk part of a transaction + * @sdp: the filesystem + * @tr: the transaction + * + */ + +static void +unlinked_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *tmp, *head; + struct gfs_log_element *le; + struct gfs_unlinked *ul; + struct gfs_log_descriptor desc; + struct gfs_log_buf *lb; + unsigned int pass = 2; + unsigned int type, number; + unsigned int offset, entries; + + while (pass--) { + if (tr->tr_flags & TRF_LOG_DUMP) { + if (pass) { + type = GFS_LOG_DESC_IUL; + number = tr->tr_num_iul; + } else + break; + } else { + if (pass) { + type = GFS_LOG_DESC_IUL; + number = tr->tr_num_iul; + } else { + type = GFS_LOG_DESC_IDA; + number = tr->tr_num_ida; + } + + if (!number) + continue; + } + + lb = gfs_log_get_buf(sdp, tr); + + memset(&desc, 0, sizeof(struct gfs_log_descriptor)); + desc.ld_header.mh_magic = GFS_MAGIC; + desc.ld_header.mh_type = GFS_METATYPE_LD; + desc.ld_header.mh_format = GFS_FORMAT_LD; + desc.ld_type = type; + desc.ld_length = gfs_struct2blk(sdp, number, sizeof(struct gfs_inum)); + desc.ld_data1 = (tr->tr_flags & TRF_LOG_DUMP) ? TRUE : FALSE; + gfs_desc_out(&desc, lb->lb_bh.b_data); + + offset = sizeof(struct gfs_log_descriptor); + entries = 0; + + for (head = &tr->tr_elements, tmp = head->next; + tmp != head; + tmp = tmp->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + if (le->le_ops != &gfs_unlinked_lops) + continue; + if (tr->tr_flags & TRF_LOG_DUMP) + ul = container_of(le, + struct gfs_unlinked, + ul_ondisk_le); + else { + ul = container_of(le, + struct gfs_unlinked, + ul_incore_le); + if (!!test_bit(ULF_INCORE_UL, &ul->ul_flags) != pass) + continue; + } + + if (offset + sizeof(struct gfs_inum) > sdp->sd_sb.sb_bsize) { + offset = 0; + lb = gfs_log_get_buf(sdp, tr); + } + + gfs_inum_out(&ul->ul_inum, + lb->lb_bh.b_data + offset); + + offset += sizeof(struct gfs_inum); + entries++; + } + + GFS_ASSERT_SBD(entries == number, sdp,); + } +} + +/** + * unlinked_dump_size - compute how much space the LE class takes up in a log dump + * @sdp: the filesystem + * @elements: the number of log elements in the dump + * @blocks: the number of blocks in the dump + * @bmem: the number of buffer-sized chunks of memory we need + * + */ + +static void +unlinked_dump_size(struct gfs_sbd *sdp, unsigned int *elements, + unsigned int *blocks, unsigned int *bmem) +{ + unsigned int c = atomic_read(&sdp->sd_unlinked_od_count); + unsigned int b = gfs_struct2blk(sdp, c, sizeof(struct gfs_inum)); + + if (elements) + *elements += c; + if (blocks) + *blocks += b; + if (bmem) + *bmem += b; +} + +/** + * unlinked_build_dump - create a transaction that represents a log dump for this LE class + * @sdp: the filesystem + * @tr: the transaction to fill + * + */ + +static void +unlinked_build_dump(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *tmp, *head; + struct gfs_unlinked *ul; + unsigned int x = 0; + + tr->tr_num_iul = atomic_read(&sdp->sd_unlinked_od_count); + + spin_lock(&sdp->sd_unlinked_lock); + + for (head = &sdp->sd_unlinked_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + ul = list_entry(tmp, struct gfs_unlinked, ul_list); + if (!test_bit(ULF_OD_LIST, &ul->ul_flags)) + continue; + + GFS_ASSERT_SBD(!ul->ul_ondisk_le.le_trans, sdp,); + ul->ul_ondisk_le.le_trans = tr; + list_add(&ul->ul_ondisk_le.le_list, &tr->tr_elements); + + x++; + } + + spin_unlock(&sdp->sd_unlinked_lock); + + GFS_ASSERT_SBD(x == atomic_read(&sdp->sd_unlinked_od_count), sdp,); +} + +/** + * unlinked_before_scan - called before a log dump is recovered + * @sdp: the filesystem + * @jid: the journal ID about to be scanned + * @head: the current head of the log + * @pass: the pass through the journal + * + */ + +static void +unlinked_before_scan(struct gfs_sbd *sdp, unsigned int jid, + struct gfs_log_header *head, unsigned int pass) +{ + if (pass == GFS_RECPASS_B1) + clear_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags); +} + +/** + * unlinked_scan_elements - scan unlinked inodes from the journal + * @sdp: the filesystem + * @jdesc: the struct gfs_jindex structure for the journal being scaned + * @gl: the journal's glock + * @start: the starting block of the descriptor + * @desc: the descriptor structure + * @pass: the pass through the journal + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +unlinked_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t start, + struct gfs_log_descriptor *desc, unsigned int pass) +{ + struct gfs_inum inum; + struct buffer_head *bh; + unsigned int offset = sizeof(struct gfs_log_descriptor); + unsigned int x; + int error; + + if (pass != GFS_RECPASS_B1) + return 0; + + switch (desc->ld_type) { + case GFS_LOG_DESC_IUL: + if (test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags)) + GFS_ASSERT_SBD(!desc->ld_data1, sdp,); + else { + GFS_ASSERT_SBD(desc->ld_data1, sdp,); + set_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags); + } + break; + + case GFS_LOG_DESC_IDA: + GFS_ASSERT_SBD(test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags), + sdp,); + break; + + default: + return 0; + } + + for (x = 0; x < desc->ld_length; x++) { + error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + + for (; + offset + sizeof(struct gfs_inum) <= sdp->sd_sb.sb_bsize; + offset += sizeof(struct gfs_inum)) { + gfs_inum_in(&inum, bh->b_data + offset); + + if (inum.no_addr) + gfs_unlinked_merge(sdp, desc->ld_type, &inum); + } + + brelse(bh); + + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE); + if (error) + return error; + + offset = 0; + } + + return 0; +} + +/** + * unlinked_after_scan - called after a log dump is recovered + * @sdp: the filesystem + * @jid: the journal ID about to be scanned + * @pass: the pass through the journal + * + */ + +static void +unlinked_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass) +{ + if (pass == GFS_RECPASS_B1) { + GFS_ASSERT_SBD(test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags), + sdp,); + printk("GFS: fsid=%s: Found %d unlinked inodes\n", + sdp->sd_fsname, atomic_read(&sdp->sd_unlinked_ic_count)); + } +} + +/** + * quota_print - print debug info about a log element + * @sdp: the filesystem + * @le: the log element + * @where: is this a new transaction or a incore transaction + * + */ + +static void +quota_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where) +{ + struct gfs_quota_le *ql; + + ql = container_of(le, struct gfs_quota_le, ql_le); + printk(" quota: %s %u: %"PRId64" blocks\n", + (test_bit(QDF_USER, &ql->ql_data->qd_flags)) ? "user" : "group", + ql->ql_data->qd_id, ql->ql_change); +} + +/** + * quota_incore_commit - commit this LE to the incore log + * @sdp: the filesystem + * @tr: the incore transaction this LE is a part of + * @le: the log element + * + */ + +static void +quota_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_log_element *le) +{ + struct gfs_quota_le *ql = container_of(le, struct gfs_quota_le, ql_le); + struct gfs_quota_data *qd = ql->ql_data; + + GFS_ASSERT_SBD(ql->ql_change, sdp,); + + /* Make this change under the sd_quota_lock, so other processes + checking qd_change_ic don't have to acquire the log lock. */ + + spin_lock(&sdp->sd_quota_lock); + qd->qd_change_new -= ql->ql_change; + qd->qd_change_ic += ql->ql_change; + spin_unlock(&sdp->sd_quota_lock); + + if (le->le_trans == tr) + list_add(&ql->ql_data_list, &qd->qd_le_list); + else { + struct list_head *tmp, *head; + struct gfs_quota_le *tmp_ql; + int found = FALSE; + + for (head = &qd->qd_le_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + tmp_ql = list_entry(tmp, struct gfs_quota_le, ql_data_list); + if (tmp_ql->ql_le.le_trans != tr) + continue; + + tmp_ql->ql_change += ql->ql_change; + + list_del(&le->le_list); + gfs_quota_put(sdp, qd); + kfree(ql); + + if (!tmp_ql->ql_change) { + list_del(&tmp_ql->ql_data_list); + list_del(&tmp_ql->ql_le.le_list); + gfs_quota_put(sdp, tmp_ql->ql_data); + kfree(tmp_ql); + tr->tr_num_q--; + } + + found = TRUE; + break; + } + + if (!found) { + le->le_trans = tr; + list_move(&le->le_list, &tr->tr_elements); + tr->tr_num_q++; + list_add(&ql->ql_data_list, &qd->qd_le_list); + } + } +} + +/** + * quota_add_to_ail - Add this LE to the AIL + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +quota_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + struct gfs_quota_le *ql = container_of(le, struct gfs_quota_le, ql_le); + struct gfs_quota_data *qd = ql->ql_data; + + qd->qd_change_od += ql->ql_change; + if (qd->qd_change_od) { + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) { + gfs_quota_hold(sdp, qd); + set_bit(QDF_OD_LIST, &qd->qd_flags); + atomic_inc(&sdp->sd_quota_od_count); + } + } else { + GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags), sdp,); + clear_bit(QDF_OD_LIST, &qd->qd_flags); + gfs_quota_put(sdp, qd); + GFS_ASSERT_SBD(atomic_read(&sdp->sd_quota_od_count), sdp,); + atomic_dec(&sdp->sd_quota_od_count); + } + + list_del(&ql->ql_data_list); + list_del(&le->le_list); + gfs_quota_put(sdp, qd); + kfree(ql); +} + +/** + * quota_clean_dump - clean up a LE after a log dump + * @sdp: the filesystem + * @le: the log element + * + */ + +static void +quota_clean_dump(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + le->le_trans = NULL; + list_del_init(&le->le_list); +} + +/** + * quota_trans_size - compute how much space the LE class takes up in a transaction + * @sdp: the filesystem + * @tr: the transaction + * @mblks: the number of regular metadata blocks + * @eblks: the number of extra blocks + * @blocks: the number of log blocks + * @bmem: the number of buffer-sized chunks of memory we need + * + */ + +static void +quota_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr, + unsigned int *mblks, unsigned int *eblks, + unsigned int *blocks, unsigned int *bmem) +{ + unsigned int qblks; + + if (tr->tr_num_q) { + qblks = gfs_struct2blk(sdp, tr->tr_num_q, + sizeof(struct gfs_quota_tag)); + + if (eblks) + *eblks += qblks; + if (blocks) + *blocks += qblks; + if (bmem) + *bmem += qblks; + } +} + +/** + * quota_trans_combine - combine to incore transactions + * @sdp: the filesystem + * @tr: the surviving transaction + * @new_tr: the transaction that's going to disappear + * + */ + +static void +quota_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr, + struct gfs_trans *new_tr) +{ + tr->tr_num_q += new_tr->tr_num_q; +} + +/** + * quota_build_bhlist - create the buffers that will make up the ondisk part of a transaction + * @sdp: the filesystem + * @tr: the transaction + * + */ + +static void +quota_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *tmp, *head; + struct gfs_log_element *le; + struct gfs_quota_le *ql; + struct gfs_log_descriptor desc; + struct gfs_quota_tag tag; + struct gfs_log_buf *lb; + unsigned int offset = sizeof(struct gfs_log_descriptor), entries = 0; + + if (!tr->tr_num_q && !(tr->tr_flags & TRF_LOG_DUMP)) + return; + + lb = gfs_log_get_buf(sdp, tr); + + memset(&desc, 0, sizeof(struct gfs_log_descriptor)); + desc.ld_header.mh_magic = GFS_MAGIC; + desc.ld_header.mh_type = GFS_METATYPE_LD; + desc.ld_header.mh_format = GFS_FORMAT_LD; + desc.ld_type = GFS_LOG_DESC_Q; + desc.ld_length = gfs_struct2blk(sdp, tr->tr_num_q, + sizeof(struct gfs_quota_tag)); + desc.ld_data1 = tr->tr_num_q; + desc.ld_data2 = (tr->tr_flags & TRF_LOG_DUMP) ? TRUE : FALSE; + gfs_desc_out(&desc, lb->lb_bh.b_data); + + for (head = &tr->tr_elements, tmp = head->next; + tmp != head; + tmp = tmp->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + if (le->le_ops != &gfs_quota_lops) + continue; + + ql = container_of(le, struct gfs_quota_le, ql_le); + + if (offset + sizeof(struct gfs_quota_tag) > + sdp->sd_sb.sb_bsize) { + offset = 0; + lb = gfs_log_get_buf(sdp, tr); + } + + memset(&tag, 0, sizeof(struct gfs_quota_tag)); + tag.qt_change = ql->ql_change; + tag.qt_flags = (test_bit(QDF_USER, &ql->ql_data->qd_flags)) ? + GFS_QTF_USER : 0; + tag.qt_id = ql->ql_data->qd_id; + + gfs_quota_tag_out(&tag, lb->lb_bh.b_data + offset); + + offset += sizeof(struct gfs_quota_tag); + entries++; + } + + GFS_ASSERT_SBD(entries == tr->tr_num_q, sdp,); +} + +/** + * quota_dump_size - compute how much space the LE class takes up in a log dump + * @sdp: the filesystem + * @elements: the number of log elements in the dump + * @blocks: the number of blocks in the dump + * @bmem: the number of buffer-sized chunks of memory we need + * + */ + +static void +quota_dump_size(struct gfs_sbd *sdp, unsigned int *elements, + unsigned int *blocks, unsigned int *bmem) +{ + unsigned int c = atomic_read(&sdp->sd_quota_od_count); + unsigned int b = gfs_struct2blk(sdp, c, sizeof(struct gfs_quota_tag)); + + if (elements) + *elements += c; + if (blocks) + *blocks += b; + if (bmem) + *bmem += b; +} + +/** + * quota_build_dump - create a transaction that represents a log dump for this LE class + * @sdp: the filesystem + * @tr: the transaction to fill + * + */ + +static void +quota_build_dump(struct gfs_sbd *sdp, struct gfs_trans *tr) +{ + struct list_head *tmp, *head; + struct gfs_quota_data *qd; + struct gfs_quota_le *ql; + unsigned int x = 0; + + tr->tr_num_q = atomic_read(&sdp->sd_quota_od_count); + + spin_lock(&sdp->sd_quota_lock); + + for (head = &sdp->sd_quota_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + qd = list_entry(tmp, struct gfs_quota_data, qd_list); + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) + continue; + + ql = &qd->qd_ondisk_ql; + + ql->ql_le.le_ops = &gfs_quota_lops; + GFS_ASSERT_SBD(!ql->ql_le.le_trans, sdp,); + ql->ql_le.le_trans = tr; + list_add(&ql->ql_le.le_list, &tr->tr_elements); + + ql->ql_data = qd; + ql->ql_change = qd->qd_change_od; + + x++; + } + + spin_unlock(&sdp->sd_quota_lock); + + GFS_ASSERT_SBD(x == atomic_read(&sdp->sd_quota_od_count), sdp,); +} + +/** + * quota_before_scan - called before a log dump is recovered + * @sdp: the filesystem + * @jid: the journal ID about to be scanned + * @head: the current head of the log + * @pass: the pass through the journal + * + */ + +static void +quota_before_scan(struct gfs_sbd *sdp, unsigned int jid, + struct gfs_log_header *head, unsigned int pass) +{ + if (pass == GFS_RECPASS_B1) + clear_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags); +} + +/** + * quota_scan_elements - scan quota inodes from the journal + * @sdp: the filesystem + * @jdesc: the struct gfs_jindex structure for the journal being scaned + * @gl: the journal's glock + * @start: the starting block of the descriptor + * @desc: the descriptor structure + * @pass: the pass through the journal + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +quota_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t start, + struct gfs_log_descriptor *desc, unsigned int pass) +{ + struct gfs_quota_tag tag; + struct buffer_head *bh; + unsigned int num_tags = desc->ld_data1; + unsigned int offset = sizeof(struct gfs_log_descriptor); + unsigned int x; + int error; + + if (pass != GFS_RECPASS_B1) + return 0; + if (desc->ld_type != GFS_LOG_DESC_Q) + return 0; + + if (test_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags)) + GFS_ASSERT_SBD(!desc->ld_data2, sdp,); + else { + GFS_ASSERT_SBD(desc->ld_data2, sdp,); + set_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags); + } + + if (!num_tags) + return 0; + + for (x = 0; x < desc->ld_length; x++) { + error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + + while (offset + sizeof(struct gfs_quota_tag) <= + sdp->sd_sb.sb_bsize) { + gfs_quota_tag_in(&tag, bh->b_data + offset); + + error = gfs_quota_merge(sdp, &tag); + if (error) + goto out_drelse; + + if (!--num_tags) + goto out_drelse; + + offset += sizeof(struct gfs_quota_tag); + } + + brelse(bh); + + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE); + if (error) + return error; + + offset = 0; + } + + return 0; + + out_drelse: + brelse(bh); + + return error; +} + +/** + * quota_after_scan - called after a log dump is recovered + * @sdp: the filesystem + * @jid: the journal ID about to be scanned + * @pass: the pass through the journal + * + */ + +static void +quota_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass) +{ + if (pass == GFS_RECPASS_B1) { + GFS_ASSERT_SBD(!sdp->sd_sb.sb_quota_di.no_formal_ino || + test_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags), + sdp,); + printk("GFS: fsid=%s: Found quota changes for %d IDs\n", + sdp->sd_fsname, atomic_read(&sdp->sd_quota_od_count)); + } +} + +struct gfs_log_operations gfs_glock_lops = { + .lo_add = generic_le_add, + .lo_trans_end = glock_trans_end, + .lo_print = glock_print, + .lo_overlap_trans = glock_overlap_trans, + .lo_incore_commit = glock_incore_commit, + .lo_add_to_ail = glock_add_to_ail, + .lo_trans_combine = glock_trans_combine, + .lo_name = "glock" +}; + +struct gfs_log_operations gfs_buf_lops = { + .lo_add = generic_le_add, + .lo_print = buf_print, + .lo_incore_commit = buf_incore_commit, + .lo_add_to_ail = buf_add_to_ail, + .lo_trans_size = buf_trans_size, + .lo_trans_combine = buf_trans_combine, + .lo_build_bhlist = buf_build_bhlist, + .lo_before_scan = buf_before_scan, + .lo_scan_elements = buf_scan_elements, + .lo_after_scan = buf_after_scan, + .lo_name = "buf" +}; + +struct gfs_log_operations gfs_unlinked_lops = { + .lo_add = generic_le_add, + .lo_print = unlinked_print, + .lo_incore_commit = unlinked_incore_commit, + .lo_add_to_ail = unlinked_add_to_ail, + .lo_clean_dump = unlinked_clean_dump, + .lo_trans_size = unlinked_trans_size, + .lo_trans_combine = unlinked_trans_combine, + .lo_build_bhlist = unlinked_build_bhlist, + .lo_dump_size = unlinked_dump_size, + .lo_build_dump = unlinked_build_dump, + .lo_before_scan = unlinked_before_scan, + .lo_scan_elements = unlinked_scan_elements, + .lo_after_scan = unlinked_after_scan, + .lo_name = "unlinked" +}; + +struct gfs_log_operations gfs_quota_lops = { + .lo_add = generic_le_add, + .lo_print = quota_print, + .lo_incore_commit = quota_incore_commit, + .lo_add_to_ail = quota_add_to_ail, + .lo_clean_dump = quota_clean_dump, + .lo_trans_size = quota_trans_size, + .lo_trans_combine = quota_trans_combine, + .lo_build_bhlist = quota_build_bhlist, + .lo_dump_size = quota_dump_size, + .lo_build_dump = quota_build_dump, + .lo_before_scan = quota_before_scan, + .lo_scan_elements = quota_scan_elements, + .lo_after_scan = quota_after_scan, + .lo_name = "quota" +}; + +struct gfs_log_operations *gfs_log_ops[] = { + &gfs_glock_lops, + &gfs_buf_lops, + &gfs_unlinked_lops, + &gfs_quota_lops, + NULL +}; diff -urN linux-orig/fs/gfs/lops.h linux-patched/fs/gfs/lops.h --- linux-orig/fs/gfs/lops.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/lops.h 2004-06-30 13:27:49.348709970 -0500 @@ -0,0 +1,179 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __LOPS_DOT_H__ +#define __LOPS_DOT_H__ + +extern struct gfs_log_operations gfs_glock_lops; +extern struct gfs_log_operations gfs_buf_lops; +extern struct gfs_log_operations gfs_unlinked_lops; +extern struct gfs_log_operations gfs_quota_lops; + +extern struct gfs_log_operations *gfs_log_ops[]; + +#define INIT_LE(le, lops) \ +do \ +{ \ + (le)->le_ops = (lops); \ + (le)->le_trans = NULL; \ + INIT_LIST_HEAD(&(le)->le_list); \ +} \ +while (0) + +#define LO_ADD(sdp, le) \ +do \ +{ \ + if ((le)->le_ops->lo_add) \ + (le)->le_ops->lo_add((sdp), (le)); \ +} \ +while (0) + +#define LO_TRANS_END(sdp, le) \ +do \ +{ \ + if ((le)->le_ops->lo_trans_end) \ + (le)->le_ops->lo_trans_end((sdp), (le)); \ +} \ +while (0) + +#define LO_PRINT(sdp, le, where) \ +do \ +{ \ + if ((le)->le_ops->lo_print) \ + (le)->le_ops->lo_print((sdp), (le), (where)); \ +} \ +while (0) + +static __inline__ struct gfs_trans * +LO_OVERLAP_TRANS(struct gfs_sbd *sdp, struct gfs_log_element *le) +{ + if (le->le_ops->lo_overlap_trans) + return le->le_ops->lo_overlap_trans(sdp, le); + else + return NULL; +} + +#define LO_INCORE_COMMIT(sdp, tr, le) \ +do \ +{ \ + if ((le)->le_ops->lo_incore_commit) \ + (le)->le_ops->lo_incore_commit((sdp), (tr), (le)); \ +} \ +while (0) + +#define LO_ADD_TO_AIL(sdp, le) \ +do \ +{ \ + if ((le)->le_ops->lo_add_to_ail) \ + (le)->le_ops->lo_add_to_ail((sdp), (le)); \ +} \ +while (0) + +#define LO_CLEAN_DUMP(sdp, le) \ +do \ +{ \ + if ((le)->le_ops->lo_clean_dump) \ + (le)->le_ops->lo_clean_dump((sdp), (le)); \ +} \ +while (0) + +#define LO_TRANS_SIZE(sdp, tr, mblks, eblks, blocks, bmem) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_trans_size) \ + gfs_log_ops[__lops_x]->lo_trans_size((sdp), (tr), (mblks), (eblks), (blocks), (bmem)); \ +} \ +while (0) + +#define LO_TRANS_COMBINE(sdp, tr, new_tr) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_trans_combine) \ + gfs_log_ops[__lops_x]->lo_trans_combine((sdp), (tr), (new_tr)); \ +} \ +while (0) + +#define LO_BUILD_BHLIST(sdp, tr) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_build_bhlist) \ + gfs_log_ops[__lops_x]->lo_build_bhlist((sdp), (tr)); \ +} \ +while (0) + +#define LO_DUMP_SIZE(sdp, elements, blocks, bmem) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_dump_size) \ + gfs_log_ops[__lops_x]->lo_dump_size((sdp), (elements), (blocks), (bmem)); \ +} \ +while (0) + +#define LO_BUILD_DUMP(sdp, tr) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_build_dump) \ + gfs_log_ops[__lops_x]->lo_build_dump((sdp), (tr)); \ +} \ +while (0) + +#define LO_BEFORE_SCAN(sdp, jid, head, pass) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_before_scan) \ + gfs_log_ops[__lops_x]->lo_before_scan((sdp), (jid), (head), (pass)); \ +} \ +while (0) + +static __inline__ int +LO_SCAN_ELEMENTS(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t start, + struct gfs_log_descriptor *desc, unsigned int pass) +{ + int x; + int error; + + for (x = 0; gfs_log_ops[x]; x++) + if (gfs_log_ops[x]->lo_scan_elements) { + error = gfs_log_ops[x]->lo_scan_elements(sdp, jdesc, gl, + start, desc, pass); + if (error) + return error; + } + + return 0; +} + +#define LO_AFTER_SCAN(sdp, jid, pass) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \ + if (gfs_log_ops[__lops_x]->lo_after_scan) \ + gfs_log_ops[__lops_x]->lo_after_scan((sdp), (jid), (pass)); \ +} \ +while (0) + +#endif /* __LOPS_DOT_H__ */ diff -urN linux-orig/fs/gfs/lvb.c linux-patched/fs/gfs/lvb.c --- linux-orig/fs/gfs/lvb.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/lvb.c 2004-06-30 13:27:49.349709738 -0500 @@ -0,0 +1,148 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" + +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member); + +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));} +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));} +#define CPIN_16(s1, s2, member) {(s1->member) = gfs16_to_cpu((s2->member));} +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs16((s1->member));} +#define CPIN_32(s1, s2, member) {(s1->member) = gfs32_to_cpu((s2->member));} +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs32((s1->member));} +#define CPIN_64(s1, s2, member) {(s1->member) = gfs64_to_cpu((s2->member));} +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs64((s1->member));} + +/** + * gfs_rgrp_lvb_in - Read in rgrp data + * @rb: the cpu-order structure + * @lvb: the lvb + * + */ + +void +gfs_rgrp_lvb_in(struct gfs_rgrp_lvb *rb, char *lvb) +{ + struct gfs_rgrp_lvb *str = (struct gfs_rgrp_lvb *)lvb; + + CPIN_32(rb, str, rb_magic); + CPIN_32(rb, str, rb_free); + CPIN_32(rb, str, rb_useddi); + CPIN_32(rb, str, rb_freedi); + CPIN_32(rb, str, rb_usedmeta); + CPIN_32(rb, str, rb_freemeta); +} + +/** + * gfs_rgrp_lvb_out - Write out rgrp data + * @rb: the cpu-order structure + * @lvb: the lvb + * + */ + +void +gfs_rgrp_lvb_out(struct gfs_rgrp_lvb *rb, char *lvb) +{ + struct gfs_rgrp_lvb *str = (struct gfs_rgrp_lvb *)lvb; + + CPOUT_32(rb, str, rb_magic); + CPOUT_32(rb, str, rb_free); + CPOUT_32(rb, str, rb_useddi); + CPOUT_32(rb, str, rb_freedi); + CPOUT_32(rb, str, rb_usedmeta); + CPOUT_32(rb, str, rb_freemeta); +} + +/** + * gfs_rgrp_lvb_print - Print out rgrp data + * @rb: the cpu-order structure + * @console - TRUE if this should be printed to the console, + * FALSE if it should be just printed to the incore debug + * buffer + */ + +void +gfs_rgrp_lvb_print(struct gfs_rgrp_lvb *rb) +{ + pv(rb, rb_magic, "%u"); + pv(rb, rb_free, "%u"); + pv(rb, rb_useddi, "%u"); + pv(rb, rb_freedi, "%u"); + pv(rb, rb_usedmeta, "%u"); + pv(rb, rb_freemeta, "%u"); +} + +/** + * gfs_quota_lvb_in - Read in quota data + * @rb: the cpu-order structure + * @lvb: the lvb + * + */ + +void +gfs_quota_lvb_in(struct gfs_quota_lvb *qb, char *lvb) +{ + struct gfs_quota_lvb *str = (struct gfs_quota_lvb *)lvb; + + CPIN_32(qb, str, qb_magic); + CPIN_32(qb, str, qb_pad); + CPIN_64(qb, str, qb_limit); + CPIN_64(qb, str, qb_warn); + CPIN_64(qb, str, qb_value); +} + +/** + * gfs_quota_lvb_out - Write out quota data + * @rb: the cpu-order structure + * @lvb: the lvb + * + */ + +void +gfs_quota_lvb_out(struct gfs_quota_lvb *qb, char *lvb) +{ + struct gfs_quota_lvb *str = (struct gfs_quota_lvb *)lvb; + + CPOUT_32(qb, str, qb_magic); + CPOUT_32(qb, str, qb_pad); + CPOUT_64(qb, str, qb_limit); + CPOUT_64(qb, str, qb_warn); + CPOUT_64(qb, str, qb_value); +} + +/** + * gfs_quota_lvb_print - Print out quota data + * @rb: the cpu-order structure + * @console - TRUE if this should be printed to the console, + * FALSE if it should be just printed to the incore debug + * buffer + */ + +void +gfs_quota_lvb_print(struct gfs_quota_lvb *qb) +{ + pv(qb, qb_magic, "%u"); + pv(qb, qb_pad, "%u"); + pv(qb, qb_limit, "%"PRIu64); + pv(qb, qb_warn, "%"PRIu64); + pv(qb, qb_value, "%"PRId64); +} diff -urN linux-orig/fs/gfs/lvb.h linux-patched/fs/gfs/lvb.h --- linux-orig/fs/gfs/lvb.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/lvb.h 2004-06-30 13:27:49.349709738 -0500 @@ -0,0 +1,48 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __LVB_DOT_H__ +#define __LVB_DOT_H__ + +#define GFS_MIN_LVB_SIZE (32) + +struct gfs_rgrp_lvb { + uint32_t rb_magic; + uint32_t rb_free; + uint32_t rb_useddi; + uint32_t rb_freedi; + uint32_t rb_usedmeta; + uint32_t rb_freemeta; +}; + +struct gfs_quota_lvb { + uint32_t qb_magic; + uint32_t qb_pad; + uint64_t qb_limit; + uint64_t qb_warn; + int64_t qb_value; +}; + +/* Translation functions */ + +void gfs_rgrp_lvb_in(struct gfs_rgrp_lvb *rb, char *lvb); +void gfs_rgrp_lvb_out(struct gfs_rgrp_lvb *rb, char *lvb); +void gfs_quota_lvb_in(struct gfs_quota_lvb *qb, char *lvb); +void gfs_quota_lvb_out(struct gfs_quota_lvb *qb, char *lvb); + +/* Printing functions */ + +void gfs_rgrp_lvb_print(struct gfs_rgrp_lvb *rb); +void gfs_quota_lvb_print(struct gfs_quota_lvb *qb); + +#endif /* __LVB_DOT_H__ */ diff -urN linux-orig/fs/gfs/main.c linux-patched/fs/gfs/main.c --- linux-orig/fs/gfs/main.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/main.c 2004-06-30 13:27:49.349709738 -0500 @@ -0,0 +1,142 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "mount.h" +#include "ops_fstype.h" + +struct proc_dir_entry *gfs_proc_entry = NULL; + +/** + * init_gfs_fs - Register GFS as a filesystem + * + * Returns: 0 on success, error code on failure + */ + +int __init +init_gfs_fs(void) +{ + int error = 0; + + init_MUTEX(&gfs_mount_args_lock); + + gfs_proc_entry = create_proc_read_entry("fs/gfs", S_IFREG | 0200, NULL, NULL, NULL); + if (!gfs_proc_entry) { + printk("GFS: can't register /proc/fs/gfs\n"); + error = -EINVAL; + goto fail; + } + gfs_proc_entry->write_proc = gfs_proc_write; + + gfs_random_number = xtime.tv_nsec; + + gfs_glock_cachep = kmem_cache_create("gfs_glock", sizeof(struct gfs_glock), + 0, 0, + NULL, NULL); + if (!gfs_glock_cachep) + goto fail2; + + gfs_inode_cachep = kmem_cache_create("gfs_inode", sizeof(struct gfs_inode), + 0, 0, + NULL, NULL); + if (!gfs_inode_cachep) + goto fail2; + + gfs_bufdata_cachep = kmem_cache_create("gfs_bufdata", sizeof(struct gfs_bufdata), + 0, 0, + NULL, NULL); + if (!gfs_bufdata_cachep) + goto fail2; + + gfs_mhc_cachep = kmem_cache_create("gfs_meta_header_cache", sizeof(struct gfs_meta_header_cache), + 0, 0, + NULL, NULL); + if (!gfs_mhc_cachep) + goto fail2; + + error = register_filesystem(&gfs_fs_type); + if (error) + goto fail2; + + printk("GFS %s (built %s %s) installed\n", + GFS_RELEASE_NAME, __DATE__, __TIME__); + + return 0; + + fail2: + if (gfs_mhc_cachep) + kmem_cache_destroy(gfs_mhc_cachep); + + if (gfs_bufdata_cachep) + kmem_cache_destroy(gfs_bufdata_cachep); + + if (gfs_inode_cachep) + kmem_cache_destroy(gfs_inode_cachep); + + if (gfs_glock_cachep) + kmem_cache_destroy(gfs_glock_cachep); + + down(&gfs_mount_args_lock); + if (gfs_mount_args) { + kfree(gfs_mount_args); + gfs_mount_args = NULL; + } + up(&gfs_mount_args_lock); + remove_proc_entry("fs/gfs", NULL); + + fail: + return error; +} + +/** + * exit_gfs_fs - Unregister the file system + * + */ + +void __exit +exit_gfs_fs(void) +{ + unregister_filesystem(&gfs_fs_type); + + kmem_cache_destroy(gfs_mhc_cachep); + kmem_cache_destroy(gfs_bufdata_cachep); + kmem_cache_destroy(gfs_inode_cachep); + kmem_cache_destroy(gfs_glock_cachep); + + down(&gfs_mount_args_lock); + if (gfs_mount_args) { + kfree(gfs_mount_args); + gfs_mount_args = NULL; + } + up(&gfs_mount_args_lock); + remove_proc_entry("fs/gfs", NULL); +} + +MODULE_DESCRIPTION("Global File System " GFS_RELEASE_NAME); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +module_init(init_gfs_fs); +module_exit(exit_gfs_fs); + diff -urN linux-orig/fs/gfs/mount.c linux-patched/fs/gfs/mount.c --- linux-orig/fs/gfs/mount.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/mount.c 2004-06-30 13:27:49.349709738 -0500 @@ -0,0 +1,212 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "mount.h" + +char *gfs_mount_args = NULL; +struct semaphore gfs_mount_args_lock; + +/** + * gfs_make_args - Parse mount arguments + * @data: + * @args: + * + * Return: 0 on success, -EXXX on failure + */ + +int +gfs_make_args(char *data, struct gfs_args *args) +{ + char *options, *x, *y; + int do_free = FALSE; + int error = 0; + + /* If someone preloaded options, use those instead */ + + down(&gfs_mount_args_lock); + if (gfs_mount_args) { + data = gfs_mount_args; + gfs_mount_args = NULL; + do_free = TRUE; + } + up(&gfs_mount_args_lock); + + /* Set some defaults */ + + memset(args, 0, sizeof(struct gfs_args)); + args->ar_num_glockd = GFS_GLOCKD_DEFAULT; + + /* Split the options into tokens with the "," character and + process them */ + + for (options = data; (x = strsep(&options, ",")); ) { + if (!*x) + continue; + + y = strchr(x, '='); + if (y) + *y++ = 0; + + if (!strcmp(x, "lockproto")) { + if (!y) { + printk("GFS: need argument to lockproto\n"); + error = -EINVAL; + break; + } + strncpy(args->ar_lockproto, y, 256); + args->ar_lockproto[255] = 0; + } + + else if (!strcmp(x, "locktable")) { + if (!y) { + printk("GFS: need argument to locktable\n"); + error = -EINVAL; + break; + } + strncpy(args->ar_locktable, y, 256); + args->ar_locktable[255] = 0; + } + + else if (!strcmp(x, "hostdata")) { + if (!y) { + printk("GFS: need argument to hostdata\n"); + error = -EINVAL; + break; + } + strncpy(args->ar_hostdata, y, 256); + args->ar_hostdata[255] = 0; + } + + else if (!strcmp(x, "ignore_local_fs")) + args->ar_ignore_local_fs = TRUE; + + else if (!strcmp(x, "localflocks")) + args->ar_localflocks = TRUE; + + else if (!strcmp(x, "localcaching")) + args->ar_localcaching = TRUE; + + else if (!strcmp(x, "upgrade")) + args->ar_upgrade = TRUE; + + else if (!strcmp(x, "num_glockd")) { + if (!y) { + printk("GFS: need argument to num_glockd\n"); + error = -EINVAL; + break; + } + sscanf(y, "%u", &args->ar_num_glockd); + if (!args->ar_num_glockd || args->ar_num_glockd > GFS_GLOCKD_MAX) { + printk("GFS: 0 < num_glockd <= %u (not %u)\n", + GFS_GLOCKD_MAX, args->ar_num_glockd); + error = -EINVAL; + break; + } + } + + else if (!strcmp(x, "acl")) + args->ar_posixacls = TRUE; + + /* Unknown */ + + else { + printk("GFS: unknown option: %s\n", x); + error = -EINVAL; + break; + } + } + + if (error) + printk("GFS: invalid mount option(s)\n"); + + if (do_free) + kfree(data); + + return error; +} + +/** + * gfs_proc_write - Read in some mount options + * @file: unused + * @buffer: a buffer of mount options + * @count: the length of the mount options + * @data: unused + * + * Called when someone writes to /proc/fs/gfs. + * It allows you to specify mount options when you can't do it + * from mount. i.e. from a inital ramdisk + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_proc_write(struct file *file, + const char *buffer, unsigned long count, + void *data) +{ + int error; + char *p; + + if (!try_module_get(THIS_MODULE)) + return -EAGAIN; /* Huh!?! */ + down(&gfs_mount_args_lock); + + if (gfs_mount_args) { + kfree(gfs_mount_args); + gfs_mount_args = NULL; + } + + if (!count) { + error = 0; + goto fail; + } + + gfs_mount_args = gmalloc(count + 1); + + error = -EFAULT; + if (copy_from_user(gfs_mount_args, buffer, count)) + goto fail_free; + + gfs_mount_args[count] = 0; + + /* Get rid of extra newlines */ + + for (p = gfs_mount_args; *p; p++) + if (*p == '\n') + *p = 0; + + up(&gfs_mount_args_lock); + module_put(THIS_MODULE); + + return count; + + fail_free: + kfree(gfs_mount_args); + gfs_mount_args = NULL; + + fail: + up(&gfs_mount_args_lock); + module_put(THIS_MODULE); + return error; +} diff -urN linux-orig/fs/gfs/mount.h linux-patched/fs/gfs/mount.h --- linux-orig/fs/gfs/mount.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/mount.h 2004-06-30 13:27:49.349709738 -0500 @@ -0,0 +1,27 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __MOUNT_DOT_H__ +#define __MOUNT_DOT_H__ + +int gfs_make_args(char *data, struct gfs_args *args); + +/* Allow args to be passed to GFS when using an initial ram disk */ + +extern char *gfs_mount_args; +extern struct semaphore gfs_mount_args_lock; + +int gfs_proc_write(struct file *file, const char *buffer, + unsigned long count, void *data); + +#endif /* __MOUNT_DOT_H__ */ diff -urN linux-orig/fs/gfs/ondisk.c linux-patched/fs/gfs/ondisk.c --- linux-orig/fs/gfs/ondisk.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ondisk.c 2004-06-30 13:27:49.350709506 -0500 @@ -0,0 +1,28 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" + +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member); + +#define WANT_GFS_CONVERSION_FUNCTIONS +#include + diff -urN linux-orig/fs/gfs/ops_address.c linux-patched/fs/gfs/ops_address.c --- linux-orig/fs/gfs/ops_address.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_address.c 2004-06-30 13:27:49.350709506 -0500 @@ -0,0 +1,476 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "dio.h" +#include "file.h" +#include "glock.h" +#include "inode.h" +#include "ops_address.h" +#include "page.h" +#include "quota.h" +#include "trans.h" + +/** + * get_block - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int +get_block(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + struct gfs_inode *ip = vn2ip(inode); + int new = create; + uint64_t dblock; + int error; + + error = gfs_block_map(ip, lblock, &new, &dblock, NULL); + if (error) + return error; + + GFS_ASSERT_INODE(dblock || !create, ip,); + + if (!dblock) + return 0; + + map_bh(bh_result, inode->i_sb, dblock); + if (new) + set_buffer_new(bh_result); + + return 0; +} + +/** + * get_block_noalloc - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int +get_block_noalloc(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + int error; + + error = get_block(inode, lblock, bh_result, FALSE); + + GFS_ASSERT_INODE(!create || buffer_mapped(bh_result), + vn2ip(inode),); + + return error; +} + +/** + * get_blocks - + * @inode: + * @lblock: + * @max_blocks: + * @bh_result: + * @create: + * + * Returns: errno + */ + +static int +get_blocks(struct inode *inode, sector_t lblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + struct gfs_inode *ip = vn2ip(inode); + int new = create; + uint64_t dblock; + uint32_t extlen; + int error; + + error = gfs_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + GFS_ASSERT_INODE(dblock || !create, ip,); + + if (!dblock) + return 0; + + map_bh(bh_result, inode->i_sb, dblock); + if (new) + set_buffer_new(bh_result); + + if (extlen > max_blocks) + extlen = max_blocks; + bh_result->b_size = extlen << inode->i_blkbits; + + return 0; +} + +/** + * get_blocks_noalloc - + * @inode: + * @lblock: + * @max_blocks: + * @bh_result: + * @create: + * + * Returns: errno + */ + +static int +get_blocks_noalloc(struct inode *inode, sector_t lblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + int error; + + error = get_blocks(inode, lblock, max_blocks, bh_result, FALSE); + + GFS_ASSERT_INODE(!create || buffer_mapped(bh_result), + vn2ip(inode),); + + return error; +} + +/** + * gfs_writepage - Write complete page + * @page: Page to write + * + * Returns: errno + */ + +static int +gfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct gfs_inode *ip = vn2ip(page->mapping->host); + int error; + + atomic_inc(&ip->i_sbd->sd_ops_address); + + GFS_ASSERT_INODE(gfs_glock_is_held_excl(ip->i_gl) && + !gfs_is_stuffed(ip), ip,); + + error = block_write_full_page(page, get_block_noalloc, wbc); + + gfs_flush_meta_cache(ip); + + if (error == -EIO) + gfs_io_error_inode(ip); + + return error; +} + +/** + * stuffed_readpage - Fill in a Linux page with stuffed file data + * @ip: the inode + * @page: the page + * + * Returns: errno + */ + +static int +stuffed_readpage(struct gfs_inode *ip, struct page *page) +{ + struct buffer_head *dibh; + void *kaddr; + int error; + + GFS_ASSERT_INODE(PageLocked(page), ip,); + + error = gfs_get_inode_buffer(ip, &dibh); + if (!error) { + kaddr = kmap(page); + memcpy((char *)kaddr, + dibh->b_data + sizeof(struct gfs_dinode), + ip->i_di.di_size); + memset((char *)kaddr + ip->i_di.di_size, + 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + brelse(dibh); + + SetPageUptodate(page); + } + + return error; +} + +/** + * readi_readpage - readpage that goes through gfs_internal_read() + * @page: The page to read + * + * Returns: errno + */ + +static int +readi_readpage(struct page *page) +{ + struct gfs_inode *ip = vn2ip(page->mapping->host); + void *kaddr; + int ret; + + kaddr = kmap(page); + + ret = gfs_internal_read(ip, kaddr, + (uint64_t)page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE); + if (ret >= 0) { + if (ret < PAGE_CACHE_SIZE) + memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret); + SetPageUptodate(page); + ret = 0; + } + + kunmap(page); + + unlock_page(page); + + return ret; +} + +/** + * gfs_readpage - readpage with locking + * @file: The file to read a page for + * @page: The page to read + * + * Returns: errno + */ + +static int +gfs_readpage(struct file *file, struct page *page) +{ + struct gfs_inode *ip = vn2ip(page->mapping->host); + int error; + + atomic_inc(&ip->i_sbd->sd_ops_address); + + if (!gfs_glock_is_locked_by_me(ip->i_gl)) { + unlock_page(page); + bitch_about(ip->i_sbd, &ip->i_sbd->sd_last_unlocked_aop, + "unlocked readpage request"); + return -ENOSYS; + } + + if (!gfs_is_jdata(ip)) { + if (gfs_is_stuffed(ip) && !page->index) { + error = stuffed_readpage(ip, page); + unlock_page(page); + } else + error = block_read_full_page(page, get_block); + } else + error = readi_readpage(page); + + if (error == -EIO) + gfs_io_error_inode(ip); + + return error; +} + +/** + * gfs_prepare_write - Prepare to write to a file + * @file: The file to write to + * @page: The page which is to be prepared for writing + * @from: From (byte range within page) + * @to: To (byte range within page) + * + * Returns: errno + */ + +static int +gfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct gfs_inode *ip = vn2ip(page->mapping->host); + struct gfs_sbd *sdp = ip->i_sbd; + int error = 0; + + atomic_inc(&sdp->sd_ops_address); + + if (!gfs_glock_is_locked_by_me(ip->i_gl)) { + bitch_about(sdp, &sdp->sd_last_unlocked_aop, + "unlocked prepare_write request"); + return -ENOSYS; + } + + if (gfs_is_stuffed(ip)) { + uint64_t file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (file_size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) { + error = gfs_unstuff_dinode(ip, gfs_unstuffer_page, page); + if (!error) + error = block_prepare_write(page, from, to, get_block); + } else if (!PageUptodate(page)) + error = stuffed_readpage(ip, page); + } else + error = block_prepare_write(page, from, to, get_block); + + if (error == -EIO) + gfs_io_error_inode(ip); + + return error; +} + +/** + * gfs_commit_write - Commit write to a file + * @file: The file to write to + * @page: The page containing the data + * @from: From (byte range within page) + * @to: To (byte range within page) + * + * Returns: errno + */ + +static int +gfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs_is_stuffed(ip)) { + struct buffer_head *dibh; + uint64_t file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + void *kaddr; + + GFS_ASSERT_INODE(PageLocked(page), ip,); + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail; + + gfs_trans_add_bh(ip->i_gl, dibh); + + kaddr = kmap(page); + memcpy(dibh->b_data + sizeof(struct gfs_dinode) + from, + (char *)kaddr + from, + to - from); + kunmap(page); + + brelse(dibh); + + SetPageUptodate(page); + + if (inode->i_size < file_size) + i_size_write(inode, file_size); + } else { + error = generic_commit_write(file, page, from, to); + if (error) + goto fail; + } + + return 0; + + fail: + ClearPageUptodate(page); + + return error; +} + +/** + * gfs_bmap - Block map function + * @mapping: Address space info + * @lblock: The block to map + * + * Returns: The disk address for the block or 0 on hole or error + */ + +static sector_t +gfs_bmap(struct address_space *mapping, sector_t lblock) +{ + struct gfs_inode *ip = vn2ip(mapping->host); + struct gfs_holder i_gh; + int dblock = 0; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_address); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return 0; + + if (!gfs_is_stuffed(ip)) + dblock = generic_block_bmap(mapping, lblock, get_block); + + gfs_glock_dq_uninit(&i_gh); + + return dblock; +} + +/** + * gfs_direct_IO - + * @rw: + * @iocb: + * @iov: + * @offset: + * @nr_segs: + * + * Returns: errno + */ + +static int +gfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs_inode *ip = vn2ip(inode); + get_blocks_t *gb = get_blocks; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_address); + + GFS_ASSERT_INODE(gfs_glock_is_locked_by_me(ip->i_gl), ip,); + GFS_ASSERT_INODE(!gfs_is_stuffed(ip), ip,); + + if (rw == WRITE && !current_transaction) + gb = get_blocks_noalloc; + + error = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, gb, NULL); + + if (error == -EIO) + gfs_io_error_inode(ip); + + return error; +} + +struct address_space_operations gfs_file_aops = { + .writepage = gfs_writepage, + .readpage = gfs_readpage, + .sync_page = block_sync_page, + .prepare_write = gfs_prepare_write, + .commit_write = gfs_commit_write, + .bmap = gfs_bmap, + .direct_IO = gfs_direct_IO, +}; diff -urN linux-orig/fs/gfs/ops_address.h linux-patched/fs/gfs/ops_address.h --- linux-orig/fs/gfs/ops_address.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_address.h 2004-06-30 13:27:49.350709506 -0500 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_ADDRESS_DOT_H__ +#define __OPS_ADDRESS_DOT_H__ + +extern struct address_space_operations gfs_file_aops; + +#endif /* __OPS_ADDRESS_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_dentry.c linux-patched/fs/gfs/ops_dentry.c --- linux-orig/fs/gfs/ops_dentry.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_dentry.c 2004-06-30 13:27:49.350709506 -0500 @@ -0,0 +1,124 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dir.h" +#include "glock.h" +#include "ops_dentry.h" + +/** + * gfs_drevalidate - Check directory lookup consistency + * @dentry: the mapping to check + * @nd: + * + * Check to make sure the lookup necessary to arrive at this inode from its + * parent is still good. + * + * Returns: 1 if the dentry is ok, 0 if it isn't + */ + +static int +gfs_drevalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = dget_parent(dentry); + struct gfs_inode *dip; + struct inode *inode; + struct gfs_holder d_gh; + struct gfs_inode *ip; + struct gfs_inum inum; + unsigned int type; + int error; + + lock_kernel(); + + dip = vn2ip(parent->d_inode); + GFS_ASSERT(dip,); + + atomic_inc(&dip->i_sbd->sd_ops_dentry); + + if (dip->i_sbd->sd_args.ar_localcaching) + goto valid; + + inode = dentry->d_inode; + if (inode && is_bad_inode(inode)) + goto invalid; + + error = gfs_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + goto fail; + + error = gfs_dir_search(dip, &dentry->d_name, &inum, &type); + switch (error) { + case 0: + if (!inode) + goto invalid_gunlock; + break; + case -ENOENT: + if (!inode) + goto valid_gunlock; + goto invalid_gunlock; + default: + goto fail_gunlock; + } + + ip = vn2ip(inode); + GFS_ASSERT_SBD(ip, dip->i_sbd,); + + if (ip->i_num.no_formal_ino != inum.no_formal_ino) + goto invalid_gunlock; + + GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,); + + valid_gunlock: + gfs_glock_dq_uninit(&d_gh); + + valid: + unlock_kernel(); + dput(parent); + return 1; + + invalid_gunlock: + gfs_glock_dq_uninit(&d_gh); + + invalid: + if (inode && S_ISDIR(inode->i_mode)) { + if (have_submounts(dentry)) + goto valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); + + unlock_kernel(); + dput(parent); + return 0; + + fail_gunlock: + gfs_glock_dq_uninit(&d_gh); + + fail: + unlock_kernel(); + dput(parent); + return 0; +} + +struct dentry_operations gfs_dops = { + .d_revalidate = gfs_drevalidate, +}; diff -urN linux-orig/fs/gfs/ops_dentry.h linux-patched/fs/gfs/ops_dentry.h --- linux-orig/fs/gfs/ops_dentry.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_dentry.h 2004-06-30 13:27:49.351709274 -0500 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_DENTRY_DOT_H__ +#define __OPS_DENTRY_DOT_H__ + +extern struct dentry_operations gfs_dops; + +#endif /* __OPS_DENTRY_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_export.c linux-patched/fs/gfs/ops_export.c --- linux-orig/fs/gfs/ops_export.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_export.c 2004-06-30 13:27:49.351709274 -0500 @@ -0,0 +1,415 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ops_export.h" +#include "rgrp.h" + +struct inode_cookie +{ + uint64_t formal_ino; + uint32_t gen; + int gen_valid; +}; + +struct get_name_filldir +{ + uint64_t formal_ino; + char *name; +}; + +/** + * gfs_decode_fh - + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +struct dentry * +gfs_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, int fh_type, + int (*acceptable)(void *context, struct dentry *dentry), + void *context) +{ + struct inode_cookie this, parent; + + atomic_inc(&vfs2sdp(sb)->sd_ops_export); + + if (fh_type != fh_len) + return NULL; + + memset(&parent, 0, sizeof(struct inode_cookie)); + + switch (fh_type) { + case 6: + parent.gen_valid = TRUE; + parent.gen = fh[5]; + case 5: + parent.formal_ino = ((uint64_t)gfs32_to_cpu(fh[3])) << 32; + parent.formal_ino |= (uint64_t)gfs32_to_cpu(fh[4]); + case 3: + this.gen_valid = TRUE; + this.gen = gfs32_to_cpu(fh[2]); + this.formal_ino = ((uint64_t)gfs32_to_cpu(fh[0])) << 32; + this.formal_ino |= (uint64_t)gfs32_to_cpu(fh[1]); + break; + default: + return NULL; + } + + return gfs_export_ops.find_exported_dentry(sb, &this, &parent, + acceptable, context); +} + +/** + * gfs_encode_fh - + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +int +gfs_encode_fh(struct dentry *dentry, __u32 *fh, int *len, + int connectable) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + int maxlen = *len; + + atomic_inc(&ip->i_sbd->sd_ops_export); + + if (maxlen < 3) + return 255; + + fh[0] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino >> 32)); + fh[1] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino & 0xFFFFFFFF)); + fh[2] = cpu_to_gfs32(inode->i_generation); + *len = 3; + + if (maxlen < 5 || !connectable) + return 3; + + spin_lock(&dentry->d_lock); + + inode = dentry->d_parent->d_inode; + ip = vn2ip(inode); + + fh[3] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino >> 32)); + fh[4] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino & 0xFFFFFFFF)); + *len = 5; + + if (maxlen < 6) { + spin_unlock(&dentry->d_lock); + return 5; + } + + fh[5] = cpu_to_gfs32(inode->i_generation); + + spin_unlock(&dentry->d_lock); + + *len = 6; + + return 6; +} + +/** + * get_name_filldir - + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static int +get_name_filldir(void *opaque, + const char *name, unsigned int length, + uint64_t offset, + struct gfs_inum *inum, unsigned int type) +{ + struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; + + if (inum->no_formal_ino != gnfd->formal_ino) + return 0; + + memcpy(gnfd->name, name, length); + gnfd->name[length] = 0; + + return 1; +} + +/** + * gfs_get_name - + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +int gfs_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct inode *dir = parent->d_inode; + struct inode *inode = child->d_inode; + struct gfs_inode *dip, *ip; + struct get_name_filldir gnfd; + struct gfs_holder gh; + uint64_t offset = 0; + int error; + + if (!dir) + return -EINVAL; + + atomic_inc(&vfs2sdp(dir->i_sb)->sd_ops_export); + + if (!S_ISDIR(dir->i_mode) || !inode) + return -EINVAL; + + dip = vn2ip(dir); + ip = vn2ip(inode); + + *name = 0; + gnfd.formal_ino = ip->i_num.no_formal_ino; + gnfd.name = name; + + error = gfs_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); + if (error) + return error; + + error = gfs_dir_read(dip, &offset, &gnfd, get_name_filldir); + + gfs_glock_dq_uninit(&gh); + + if (!error & !*name) + error = -ENOENT; + + return error; +} + +/** + * gfs_get_parent - + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +struct dentry * +gfs_get_parent(struct dentry *child) +{ + struct gfs_inode *dip = vn2ip(child->d_inode); + struct gfs_holder d_gh, i_gh; + struct qstr dotdot = { .name = "..", .len = 2 }; + struct gfs_inode *ip; + struct inode *inode; + struct dentry *dentry; + int error; + + atomic_inc(&dip->i_sbd->sd_ops_export); + + gfs_holder_init(dip->i_gl, 0, 0, &d_gh); + error = gfs_lookupi(&d_gh, &dotdot, TRUE, &i_gh); + if (error) + goto fail; + + error = -ENOENT; + if (!i_gh.gh_gl) + goto fail; + + ip = gl2ip(i_gh.gh_gl); + + gfs_glock_dq_uninit(&d_gh); + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return dentry; + + fail: + gfs_holder_uninit(&d_gh); + return ERR_PTR(error); +} + +/** + * gfs_get_dentry - + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +struct dentry * +gfs_get_dentry(struct super_block *sb, void *inump) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + struct inode_cookie *cookie = (struct inode_cookie *)inump; + struct gfs_inum inum; + struct gfs_holder i_gh, ri_gh, rgd_gh; + struct gfs_rgrpd *rgd; + struct buffer_head *bh; + struct gfs_dinode *di; + struct gfs_inode *ip; + struct inode *inode; + struct dentry *dentry; + int error; + + atomic_inc(&sdp->sd_ops_export); + + if (!cookie->formal_ino || + cookie->formal_ino == sdp->sd_jiinode->i_num.no_formal_ino || + cookie->formal_ino == sdp->sd_riinode->i_num.no_formal_ino || + cookie->formal_ino == sdp->sd_qinode->i_num.no_formal_ino || + cookie->formal_ino == sdp->sd_linode->i_num.no_formal_ino) + return ERR_PTR(-EINVAL); + + inum.no_formal_ino = cookie->formal_ino; + inum.no_addr = cookie->formal_ino; + + error = gfs_glock_nq_num(sdp, + inum.no_formal_ino, &gfs_inode_glops, + LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, + &i_gh); + if (error) + return ERR_PTR(error); + + error = gfs_inode_get(i_gh.gh_gl, &inum, NO_CREATE, &ip); + if (error) + goto fail; + if (ip) + goto out; + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + + error = -EINVAL; + rgd = gfs_blk2rgrpd(sdp, inum.no_addr); + if (!rgd) + goto fail_rindex; + + error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); + if (error) + goto fail_rindex; + + error = -ESTALE; + if (gfs_get_block_type(rgd, inum.no_addr) != GFS_BLKST_USEDMETA) + goto fail_rgd; + + error = gfs_dread(sdp, inum.no_addr, i_gh.gh_gl, + DIO_START | DIO_WAIT, &bh); + if (error) + goto fail_rgd; + + di = (struct gfs_dinode *)bh->b_data; + + error = -ESTALE; + if (gfs32_to_cpu(di->di_header.mh_magic) != GFS_MAGIC || + gfs32_to_cpu(di->di_header.mh_type) != GFS_METATYPE_DI || + (gfs32_to_cpu(di->di_flags) & GFS_DIF_UNUSED)) + goto fail_relse; + + brelse(bh); + gfs_glock_dq_uninit(&rgd_gh); + gfs_glock_dq_uninit(&ri_gh); + + error = gfs_inode_get(i_gh.gh_gl, &inum, CREATE, &ip); + if (error) + goto fail; + + atomic_inc(&sdp->sd_fh2dentry_misses); + + out: + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (cookie->gen_valid && cookie->gen != inode->i_generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return dentry; + + fail_relse: + brelse(bh); + + fail_rgd: + gfs_glock_dq_uninit(&rgd_gh); + + fail_rindex: + gfs_glock_dq_uninit(&ri_gh); + + fail: + gfs_glock_dq_uninit(&i_gh); + return ERR_PTR(error); +} + +struct export_operations gfs_export_ops = { + .decode_fh = gfs_decode_fh, + .encode_fh = gfs_encode_fh, + .get_name = gfs_get_name, + .get_parent = gfs_get_parent, + .get_dentry = gfs_get_dentry, +}; + diff -urN linux-orig/fs/gfs/ops_export.h linux-patched/fs/gfs/ops_export.h --- linux-orig/fs/gfs/ops_export.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_export.h 2004-06-30 13:27:49.351709274 -0500 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_EXPORT_DOT_H__ +#define __OPS_EXPORT_DOT_H__ + +extern struct export_operations gfs_export_ops; + +#endif /* __OPS_EXPORT_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_file.c linux-patched/fs/gfs/ops_file.c --- linux-orig/fs/gfs/ops_file.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_file.c 2004-06-30 13:27:49.352709042 -0500 @@ -0,0 +1,1552 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "dio.h" +#include "dir.h" +#include "file.h" +#include "flock.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ioctl.h" +#include "log.h" +#include "ops_file.h" +#include "ops_vm.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +struct filldir_bad_entry { + char *fbe_name; + unsigned int fbe_length; + uint64_t fbe_offset; + struct gfs_inum fbe_inum; + unsigned int fbe_type; +}; + +struct filldir_bad { + struct gfs_sbd *fdb_sbd; + int fdb_prefetch; + + struct filldir_bad_entry *fdb_entry; + unsigned int fdb_entry_num; + unsigned int fdb_entry_off; + + char *fdb_name; + unsigned int fdb_name_size; + unsigned int fdb_name_off; +}; + +struct filldir_reg { + struct gfs_sbd *fdr_sbd; + int fdr_prefetch; + + filldir_t fdr_filldir; + void *fdr_opaque; +}; + +typedef ssize_t(*do_rw_t) (struct file * file, + char *buf, + size_t size, loff_t * offset, + unsigned int num_gh, struct gfs_holder * ghs); + +/** + * gfs_llseek - seek to a location in a file + * @file: the file + * @offset: the offset + * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) + * + * SEEK_END requires the glock for the file because it references the + * file's size. + * + * Returns: The new offset, or -EXXX on error + */ + +static loff_t +gfs_llseek(struct file *file, loff_t offset, int origin) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + struct gfs_holder i_gh; + loff_t error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + if (origin == 2) { + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (!error) { + error = remote_llseek(file, offset, origin); + gfs_glock_dq_uninit(&i_gh); + } + } else + error = remote_llseek(file, offset, origin); + + return error; +} + +#define vma2state(vma) \ +((((vma)->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) == \ + (VM_MAYWRITE | VM_MAYSHARE)) ? \ + LM_ST_EXCLUSIVE : LM_ST_SHARED) \ + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static ssize_t +walk_vm_hard(struct file *file, char *buf, size_t size, loff_t *offset, + do_rw_t operation) +{ + struct gfs_holder *ghs; + unsigned int num_gh = 0; + ssize_t count; + + { + struct super_block *sb = file->f_dentry->d_inode->i_sb; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start = (unsigned long)buf; + unsigned long end = start + size; + int dumping = (current->flags & PF_DUMPCORE); + unsigned int x = 0; + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file && + vma->vm_file->f_dentry->d_inode->i_sb == sb) { + num_gh++; + } + } + + ghs = kmalloc((num_gh + 1) * sizeof(struct gfs_holder), GFP_KERNEL); + if (!ghs) { + if (!dumping) + up_read(&mm->mmap_sem); + return -ENOMEM; + } + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file) { + struct inode *inode = vma->vm_file->f_dentry->d_inode; + if (inode->i_sb == sb) + gfs_holder_init(vn2ip(inode)->i_gl, + vma2state(vma), + 0, &ghs[x++]); + } + } + + if (!dumping) + up_read(&mm->mmap_sem); + + GFS_ASSERT_SBD(x == num_gh, vfs2sdp(sb),); + } + + count = operation(file, buf, size, offset, num_gh, ghs); + + while (num_gh--) + gfs_holder_uninit(&ghs[num_gh]); + kfree(ghs); + + return count; +} + +/** + * walk_vma - Walk the vmas associated with a buffer for read or write. + * If any of them are gfs, pass the gfs inode down to the read/write + * worker function so that locks can be acquired in the correct order. + * @file: The file to read/write from/to + * @buf: The buffer to copy to/from + * @size: The amount of data requested + * @offset: The current file offset + * @operation: The read or write worker function + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, -errno on failure + */ + +static ssize_t +walk_vm(struct file *file, char *buf, size_t size, loff_t *offset, + do_rw_t operation) +{ + if (current->mm) { + struct super_block *sb = file->f_dentry->d_inode->i_sb; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start = (unsigned long)buf; + unsigned long end = start + size; + int dumping = (current->flags & PF_DUMPCORE); + + if (!dumping) + down_read(&mm->mmap_sem); + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file && + vma->vm_file->f_dentry->d_inode->i_sb == sb) + goto do_locks; + } + + if (!dumping) + up_read(&mm->mmap_sem); + } + + { + struct gfs_holder gh; + return operation(file, buf, size, offset, 0, &gh); + } + + do_locks: + return walk_vm_hard(file, buf, size, offset, operation); +} + +/** + * functionname - summary + * @param1: description + * @param2: description + * @param3: description + * + * Function description + * + * Returns: what is returned + */ + +static ssize_t +do_read_readi(struct file *file, char *buf, size_t size, loff_t *offset) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + ssize_t count = 0; + + if (*offset < 0) + return -EINVAL; + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EFAULT; + + if (!(file->f_flags & O_LARGEFILE)) { + if (*offset >= 0x7FFFFFFFull) + return -EFBIG; + if (*offset + size > 0x7FFFFFFFull) + size = 0x7FFFFFFFull - *offset; + } + + count = gfs_readi(ip, buf, *offset, size, gfs_copy2user); + + if (count > 0) + *offset += count; + + return count; +} + +/** + * do_read_direct - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @ghs: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, -EXXX on failure + */ + +static ssize_t +do_read_direct(struct file *file, char *buf, size_t size, loff_t *offset, + unsigned int num_gh, struct gfs_holder *ghs) +{ + struct inode *inode = file->f_mapping->host; + struct gfs_inode *ip = vn2ip(inode); + unsigned int state = LM_ST_DEFERRED; + int flags = 0; + unsigned int x; + ssize_t count = 0; + int error; + + for (x = 0; x < num_gh; x++) + if (ghs[x].gh_gl == ip->i_gl) { + state = LM_ST_SHARED; + flags |= GL_LOCAL_EXCL; + break; + } + + gfs_holder_init(ip->i_gl, state, flags, &ghs[num_gh]); + + error = gfs_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + error = -EINVAL; + if (gfs_is_jdata(ip)) + goto out_gunlock; + + if (gfs_is_stuffed(ip)) { + size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1; + + if (((*offset) & mask) || (((unsigned long)buf) & mask)) + goto out_gunlock; + + count = do_read_readi(file, buf, size & ~mask, offset); + } + else + count = generic_file_read(file, buf, size, offset); + + error = 0; + + out_gunlock: + gfs_glock_dq_m(num_gh + 1, ghs); + + out: + gfs_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * do_read_buf - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @ghs: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, -EXXX on failure + */ + +static ssize_t +do_read_buf(struct file *file, char *buf, size_t size, loff_t *offset, + unsigned int num_gh, struct gfs_holder *ghs) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + ssize_t count = 0; + int error; + + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]); + + error = gfs_glock_nq_m_atime(num_gh + 1, ghs); + if (error) + goto out; + + if (gfs_is_jdata(ip) || + (gfs_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags))) + count = do_read_readi(file, buf, size, offset); + else + count = generic_file_read(file, buf, size, offset); + + gfs_glock_dq_m(num_gh + 1, ghs); + + out: + gfs_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * gfs_read - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, -EXXX on failure + */ + +static ssize_t +gfs_read(struct file *file, char *buf, size_t size, loff_t *offset) +{ + atomic_inc(&vfs2sdp(file->f_mapping->host->i_sb)->sd_ops_file); + + if (file->f_flags & O_DIRECT) + return walk_vm(file, buf, size, offset, do_read_direct); + else + return walk_vm(file, buf, size, offset, do_read_buf); +} + +/** + * grope_mapping - feel up a mapping that needs to be written + * @buf: the start of the memory to be written + * @size: the size of the memory to be written + * + * We do this after acquiring the locks on the mapping, + * but before starting the write transaction. We need to make + * sure that we don't cause recursive transactions if blocks + * need to be allocated to the file backing the mapping. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +grope_mapping(char *buf, size_t size) +{ + unsigned long start = (unsigned long)buf; + unsigned long stop = start + size; + char c; + + while (start < stop) { + if (copy_from_user(&c, (char *)start, 1)) + return -EFAULT; + + start += PAGE_CACHE_SIZE; + start &= PAGE_CACHE_MASK; + } + + return 0; +} + +/** + * do_write_direct_alloc - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, -EXXX on failure + */ + +static ssize_t +do_write_direct_alloc(struct file *file, char *buf, size_t size, loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = NULL; + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + struct buffer_head *dibh; + unsigned int data_blocks, ind_blocks; + ssize_t count; + int error; + + gfs_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); + + al = gfs_alloc_get(ip); + + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested_meta = ind_blocks; + al->al_requested_data = data_blocks; + + error = gfs_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + /* Trans may require: + All blocks for a RG bitmap, whatever indirect blocks we + need, a modified dinode, and a quota change. */ + + error = gfs_trans_begin(sdp, + 1 + al->al_rgd->rd_ri.ri_length + ind_blocks, + 1); + if (error) + goto fail_ipres; + + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? (~(S_ISUID | S_ISGID)) : (~S_ISUID); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + if (gfs_is_stuffed(ip)) { + error = gfs_unstuff_dinode(ip, gfs_unstuffer_sync, NULL); + if (error) + goto fail_end_trans; + } + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs_trans_end(sdp); + + if (file->f_flags & O_SYNC) + gfs_log_flush_glock(ip->i_gl); + + gfs_inplace_release(ip); + gfs_quota_unlock_m(ip); + gfs_alloc_put(ip); + + return count; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_ipres: + gfs_inplace_release(ip); + + fail_gunlock_q: + gfs_quota_unlock_m(ip); + + fail: + gfs_alloc_put(ip); + + return error; +} + +/** + * do_write_direct - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @gh: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, -EXXX on failure + */ + +static ssize_t +do_write_direct(struct file *file, char *buf, size_t size, loff_t *offset, + unsigned int num_gh, struct gfs_holder *ghs) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_file *fp = vf2fp(file); + unsigned int state = LM_ST_DEFERRED; + int alloc_required; + unsigned int x; + size_t s; + ssize_t count = 0; + int error; + + if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags)) + state = LM_ST_EXCLUSIVE; + else + for (x = 0; x < num_gh; x++) + if (ghs[x].gh_gl == ip->i_gl) { + state = LM_ST_EXCLUSIVE; + break; + } + + restart: + gfs_holder_init(ip->i_gl, state, 0, &ghs[num_gh]); + + error = gfs_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + error = -EINVAL; + if (gfs_is_jdata(ip)) + goto out_gunlock; + + if (num_gh) { + error = grope_mapping(buf, size); + if (error) + goto out_gunlock; + } + + if (file->f_flags & O_APPEND) + *offset = ip->i_di.di_size; + + if (!(file->f_flags & O_LARGEFILE)) { + error = -EFBIG; + if (*offset >= 0x7FFFFFFFull) + goto out_gunlock; + if (*offset + size > 0x7FFFFFFFull) + size = 0x7FFFFFFFull - *offset; + } + + if (gfs_is_stuffed(ip) || + *offset + size > ip->i_di.di_size || + ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID))) + alloc_required = TRUE; + else { + error = gfs_write_alloc_required(ip, *offset, size, + &alloc_required); + if (error) + goto out_gunlock; + } + + if (alloc_required && state != LM_ST_EXCLUSIVE) { + gfs_glock_dq_m(num_gh + 1, ghs); + gfs_holder_uninit(&ghs[num_gh]); + state = LM_ST_EXCLUSIVE; + goto restart; + } + + if (alloc_required) { + set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); + + while (size) { + s = sdp->sd_tune.gt_max_atomic_write; + if (s > size) + s = size; + + error = do_write_direct_alloc(file, buf, s, offset); + if (error < 0) + goto out_gunlock; + + buf += error; + size -= error; + count += error; + } + } else { + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + struct gfs_holder t_gh; + + clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); + + error = gfs_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); + if (error) + goto out_gunlock; + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + + gfs_glock_dq_uninit(&t_gh); + } + + error = 0; + + out_gunlock: + gfs_glock_dq_m(num_gh + 1, ghs); + + out: + gfs_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * do_do_write_buf - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, -EXXX on failure + */ + +static ssize_t +do_do_write_buf(struct file *file, char *buf, size_t size, loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = NULL; + struct buffer_head *dibh; + unsigned int data_blocks, ind_blocks; + int alloc_required, journaled; + ssize_t count; + int error; + + journaled = gfs_is_jdata(ip); + + gfs_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); + + error = gfs_write_alloc_required(ip, *offset, size, &alloc_required); + if (error) + return error; + + if (alloc_required) { + al = gfs_alloc_get(ip); + + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + if (journaled) + al->al_requested_meta = ind_blocks + data_blocks; + else { + al->al_requested_meta = ind_blocks; + al->al_requested_data = data_blocks; + } + + error = gfs_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + /* Trans may require: + All blocks for a RG bitmap, whatever indirect blocks we + need, a modified dinode, and a quota change. */ + + error = gfs_trans_begin(sdp, + 1 + al->al_rgd->rd_ri.ri_length + + ind_blocks + + ((journaled) ? data_blocks : 0), 1); + if (error) + goto fail_ipres; + } else { + /* Trans may require: + A modified dinode. */ + + error = gfs_trans_begin(sdp, + 1 + ((journaled) ? data_blocks : 0), 0); + if (error) + goto fail_ipres; + } + + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? (~(S_ISUID | S_ISGID)) : (~S_ISUID); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + if (journaled || + (gfs_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags) && + *offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))) { + + count = gfs_writei(ip, buf, *offset, size, gfs_copy_from_user); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + *offset += count; + } else { + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs_trans_end(sdp); + + if (file->f_flags & O_SYNC) + gfs_log_flush_glock(ip->i_gl); + + if (alloc_required) { + GFS_ASSERT_INODE(count != size || + al->al_alloced_meta || + al->al_alloced_data, ip,); + gfs_inplace_release(ip); + gfs_quota_unlock_m(ip); + gfs_alloc_put(ip); + } + + return count; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_ipres: + if (alloc_required) + gfs_inplace_release(ip); + + fail_gunlock_q: + if (alloc_required) + gfs_quota_unlock_m(ip); + + fail: + if (alloc_required) + gfs_alloc_put(ip); + + return error; +} + +/** + * do_write_buf - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @gh: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, -EXXX on failure + */ + +static ssize_t +do_write_buf(struct file *file, + char *buf, size_t size, loff_t *offset, + unsigned int num_gh, struct gfs_holder *ghs) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + struct gfs_sbd *sdp = ip->i_sbd; + size_t s; + ssize_t count = 0; + int error; + + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]); + + error = gfs_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + if (num_gh) { + error = grope_mapping(buf, size); + if (error) + goto out_gunlock; + } + + if (file->f_flags & O_APPEND) + *offset = ip->i_di.di_size; + + if (!(file->f_flags & O_LARGEFILE)) { + error = -EFBIG; + if (*offset >= 0x7FFFFFFFull) + goto out_gunlock; + if (*offset + size > 0x7FFFFFFFull) + size = 0x7FFFFFFFull - *offset; + } + + while (size) { + s = sdp->sd_tune.gt_max_atomic_write; + if (s > size) + s = size; + + error = do_do_write_buf(file, buf, s, offset); + if (error < 0) + goto out_gunlock; + + buf += error; + size -= error; + count += error; + } + + error = 0; + + out_gunlock: + gfs_glock_dq_m(num_gh + 1, ghs); + + out: + gfs_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * gfs_write - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, -EXXX on failure + */ + +static ssize_t +gfs_write(struct file *file, const char *buf, size_t size, loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + ssize_t count; + + atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_file); + + if (*offset < 0) + return -EINVAL; + if (!access_ok(VERIFY_READ, buf, size)) + return -EFAULT; + + down(&inode->i_sem); + if (file->f_flags & O_DIRECT) + count = walk_vm(file, (char *)buf, size, offset, do_write_direct); + else + count = walk_vm(file, (char *)buf, size, offset, do_write_buf); + up(&inode->i_sem); + + return count; +} + +/** + * filldir_reg_func - Report a directory entry to the caller of gfs_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * Returns: 0 on success, 1 if buffer full + */ + +static int +filldir_reg_func(void *opaque, + const char *name, unsigned int length, + uint64_t offset, + struct gfs_inum *inum, unsigned int type) +{ + struct filldir_reg *fdr = (struct filldir_reg *)opaque; + struct gfs_sbd *sdp = fdr->fdr_sbd; + unsigned int vfs_type; + int error; + + switch (type) { + case GFS_FILE_NON: + vfs_type = DT_UNKNOWN; + break; + case GFS_FILE_REG: + vfs_type = DT_REG; + break; + case GFS_FILE_DIR: + vfs_type = DT_DIR; + break; + case GFS_FILE_LNK: + vfs_type = DT_LNK; + break; + case GFS_FILE_BLK: + vfs_type = DT_BLK; + break; + case GFS_FILE_CHR: + vfs_type = DT_CHR; + break; + case GFS_FILE_FIFO: + vfs_type = DT_FIFO; + break; + case GFS_FILE_SOCK: + vfs_type = DT_SOCK; + break; + default: + GFS_ASSERT_SBD(FALSE, sdp, + printk("type = %u\n", type);); + } + + error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, + inum->no_formal_ino, vfs_type); + if (error) + return 1; + + if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { + gfs_glock_prefetch_num(sdp, + inum->no_formal_ino, &gfs_inode_glops, + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); + gfs_glock_prefetch_num(sdp, + inum->no_addr, &gfs_iopen_glops, + LM_ST_SHARED, LM_FLAG_TRY); + } + + return 0; +} + +/** + * readdir_reg - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +readdir_reg(struct file *file, void *dirent, filldir_t filldir) +{ + struct gfs_inode *dip = vn2ip(file->f_mapping->host); + struct filldir_reg fdr; + struct gfs_holder d_gh; + uint64_t offset = file->f_pos; + int error; + + fdr.fdr_sbd = dip->i_sbd; + fdr.fdr_prefetch = GFS_ASYNC_LM(dip->i_sbd); + fdr.fdr_filldir = filldir; + fdr.fdr_opaque = dirent; + + gfs_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); + error = gfs_glock_nq_atime(&d_gh); + if (error) { + gfs_holder_uninit(&d_gh); + return error; + } + + error = gfs_dir_read(dip, &offset, &fdr, filldir_reg_func); + + gfs_glock_dq_uninit(&d_gh); + + file->f_pos = offset; + + return error; +} + +/** + * filldir_bad_func - Report a directory entry to the caller of gfs_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * Returns: 0 on success, 1 if buffer full + */ + +static int +filldir_bad_func(void *opaque, + const char *name, unsigned int length, + uint64_t offset, + struct gfs_inum *inum, unsigned int type) +{ + struct filldir_bad *fdb = (struct filldir_bad *)opaque; + struct gfs_sbd *sdp = fdb->fdb_sbd; + struct filldir_bad_entry *fbe; + + if (fdb->fdb_entry_off == fdb->fdb_entry_num || + fdb->fdb_name_off + length > fdb->fdb_name_size) + return 1; + + fbe = &fdb->fdb_entry[fdb->fdb_entry_off]; + fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off; + memcpy(fbe->fbe_name, name, length); + fbe->fbe_length = length; + fbe->fbe_offset = offset; + fbe->fbe_inum = *inum; + fbe->fbe_type = type; + + fdb->fdb_entry_off++; + fdb->fdb_name_off += length; + + if (fdb->fdb_prefetch && !(length == 1 && *name == '.')) { + gfs_glock_prefetch_num(sdp, + inum->no_formal_ino, &gfs_inode_glops, + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); + gfs_glock_prefetch_num(sdp, + inum->no_addr, &gfs_iopen_glops, + LM_ST_SHARED, LM_FLAG_TRY); + } + + return 0; +} + +/** + * readdir_bad - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +readdir_bad(struct file *file, void *dirent, filldir_t filldir) +{ + struct gfs_inode *dip = vn2ip(file->f_mapping->host); + struct gfs_sbd *sdp = dip->i_sbd; + struct filldir_reg fdr; + unsigned int entries, size; + struct filldir_bad *fdb; + struct gfs_holder d_gh; + uint64_t offset = file->f_pos; + unsigned int x; + struct filldir_bad_entry *fbe; + int error; + + entries = sdp->sd_tune.gt_entries_per_readdir; + size = sizeof(struct filldir_bad) + + entries * (sizeof(struct filldir_bad_entry) + GFS_FAST_NAME_SIZE); + + fdb = gmalloc(size); + memset(fdb, 0, size); + + fdb->fdb_sbd = sdp; + fdb->fdb_prefetch = GFS_ASYNC_LM(sdp); + fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1); + fdb->fdb_entry_num = entries; + fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) + + entries * sizeof(struct filldir_bad_entry); + fdb->fdb_name_size = entries * GFS_FAST_NAME_SIZE; + + gfs_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); + error = gfs_glock_nq_atime(&d_gh); + if (error) { + gfs_holder_uninit(&d_gh); + goto out; + } + + error = gfs_dir_read(dip, &offset, fdb, filldir_bad_func); + + gfs_glock_dq_uninit(&d_gh); + + fdr.fdr_sbd = sdp; + fdr.fdr_prefetch = FALSE; + fdr.fdr_filldir = filldir; + fdr.fdr_opaque = dirent; + + for (x = 0; x < fdb->fdb_entry_off; x++) { + fbe = &fdb->fdb_entry[x]; + + error = filldir_reg_func(&fdr, + fbe->fbe_name, fbe->fbe_length, + fbe->fbe_offset, + &fbe->fbe_inum, fbe->fbe_type); + if (error) { + file->f_pos = fbe->fbe_offset; + error = 0; + goto out; + } + } + + file->f_pos = offset; + + out: + kfree(fdb); + + return error; +} + +/** + * gfs_readdir - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int error; + + atomic_inc(&vfs2sdp(file->f_mapping->host->i_sb)->sd_ops_file); + + if (strcmp(current->comm, "nfsd") != 0) + error = readdir_reg(file, dirent, filldir); + else + error = readdir_bad(file, dirent, filldir); + + return error; +} + +/** + * gfs_ioctl - do an ioctl on a file + * @inode: the inode + * @file: the file pointer + * @cmd: the ioctl command + * @arg: the argument + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct gfs_inode *ip = vn2ip(inode); + atomic_inc(&ip->i_sbd->sd_ops_file); + return gfs_ioctli(ip, cmd, (void *)arg); +} + +/** + * gfs_open - open a file + * @inode: the inode to open + * @file: the struct file for this opening + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_open(struct inode *inode, struct file *file) +{ + struct gfs_inode *ip = vn2ip(inode); + struct gfs_holder i_gh; + struct gfs_file *fp; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + fp = gmalloc(sizeof(struct gfs_file)); + memset(fp, 0, sizeof(struct gfs_file)); + + init_MUTEX(&fp->f_fl_lock); + + fp->f_inode = ip; + fp->f_vfile = file; + + GFS_ASSERT_INODE(!vf2fp(file), ip,); + vf2fp(file) = fp; + + if (ip->i_di.di_type == GFS_FILE_REG) { + error = gfs_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + goto fail; + + if (!(file->f_flags & O_LARGEFILE) && + ip->i_di.di_size > 0x7FFFFFFFull) { + error = -EFBIG; + goto fail_gunlock; + } + + /* If this is an exclusive create, make sure our gfs_create() + says we created the file. The O_EXCL flag isn't passed + to gfs_create(), so we have to check it here. */ + + if (file->f_flags & O_CREAT) { + if (ip->i_creat_task == current && + ip->i_creat_pid == current->pid) { + ip->i_creat_task = NULL; + ip->i_creat_pid = 0; + } else if (file->f_flags & O_EXCL) { + error = -EEXIST; + goto fail_gunlock; + } + } + + /* Listen to the Direct I/O flag */ + + if (ip->i_di.di_flags & GFS_DIF_DIRECTIO) + file->f_flags |= O_DIRECT; + + /* Don't let the user open O_DIRECT on a jdata file */ + + if ((file->f_flags & O_DIRECT) && gfs_is_jdata(ip)) { + error = -EINVAL; + goto fail_gunlock; + } + + gfs_glock_dq_uninit(&i_gh); + } + + return 0; + + fail_gunlock: + gfs_glock_dq_uninit(&i_gh); + + fail: + vf2fp(file) = NULL; + kfree(fp); + + return error; +} + +/** + * gfs_close - called to close a struct file + * @inode: the inode the struct file belongs to + * @file: the struct file being closed + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_close(struct inode *inode, struct file *file) +{ + struct gfs_file *fp; + + atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_file); + + fp = vf2fp(file); + vf2fp(file) = NULL; + + GFS_ASSERT(fp,); + + kfree(fp); + + return 0; +} + +/** + * gfs_fsync - sync the dirty data for a file (across the cluster) + * @file: the file that points to the dentry (Huh?) + * @dentry: the dentry that points to the inode to sync + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct gfs_inode *ip = vn2ip(dentry->d_inode); + struct gfs_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + + if (gfs_is_jdata(ip)) + gfs_log_flush_glock(ip->i_gl); + else + i_gh.gh_flags |= GL_SYNC; + + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_lock - acquire/release a flock or posix lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + struct gfs_sbd *sdp = ip->i_sbd; + struct lm_lockname name; + uint64_t start = fl->fl_start, end = fl->fl_end; + pid_t pid = fl->fl_pid; + int plock = (fl->fl_flags & FL_POSIX); + int flock = (fl->fl_flags & FL_FLOCK); + int get, set, wait, ex, sh, un; + int error; + + atomic_inc(&sdp->sd_ops_file); + + if (sdp->sd_args.ar_localflocks) + return LOCK_USE_CLNT; + + if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if (!flock && !plock) + return -ENOLCK; + + get = (IS_GETLK(cmd)) ? TRUE : FALSE; + set = (IS_SETLK(cmd)) ? TRUE : FALSE; + wait = (IS_SETLKW(cmd)) ? TRUE : FALSE; + + if ((flock && (get || (!set && !wait))) || + (plock && (!get && !set && !wait))) + return -EINVAL; + + ex = (fl->fl_type == F_WRLCK) ? TRUE : FALSE; + sh = (fl->fl_type == F_RDLCK) ? TRUE : FALSE; + un = (fl->fl_type == F_UNLCK) ? TRUE : FALSE; + + if (!ex && !sh && !un) + return -EINVAL; + + if (flock) { + struct gfs_file *fp = vf2fp(file); + GFS_ASSERT(fp,); + + if (un) + error = gfs_funlock(fp); + else + error = gfs_flock(fp, ex, wait); + } else { + name.ln_number = ip->i_num.no_formal_ino; + name.ln_type = LM_TYPE_PLOCK; + if (get) { + error = sdp->sd_lockstruct.ls_ops->lm_plock_get( + sdp->sd_lockstruct.ls_lockspace, + &name, (unsigned long)fl->fl_owner, + &start, &end, &ex, (unsigned long*)&pid); + if (error < 0) + return error; + + fl->fl_type = F_UNLCK; + if (!error) + return error; + + fl->fl_start = start; + fl->fl_end = end; + fl->fl_pid = pid; + fl->fl_type = (ex) ? F_WRLCK : F_RDLCK; + + error = 0; + } else if (un) + error = sdp->sd_lockstruct.ls_ops->lm_punlock( + sdp->sd_lockstruct.ls_lockspace, + &name, (unsigned long)fl->fl_owner, + start, end); + else + error = sdp->sd_lockstruct.ls_ops->lm_plock( + sdp->sd_lockstruct.ls_lockspace, + &name, (unsigned long)fl->fl_owner, + wait, ex, start, end); + } + + return error; +} + +/** + * gfs_sendfile - Send bytes to a file or socket + * @in_file: The file to read from + * @out_file: The file to write to + * @count: The amount of data + * @offset: The beginning file offset + * + * Outputs: offset - updated according to number of bytes read + * + * Returns: The number of bytes sent, -EXXX on failure + */ + +static ssize_t +gfs_sendfile(struct file *in_file, loff_t *offset, size_t count, read_actor_t actor, void __user *target) +{ + struct gfs_inode *ip = vn2ip(in_file->f_mapping->host); + struct gfs_holder gh; + ssize_t retval; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + + retval = gfs_glock_nq_atime(&gh); + if (retval) + goto out; + + if (gfs_is_jdata(ip)) + retval = -ENOSYS; + else + retval = generic_file_sendfile(in_file, offset, count, actor, target); + + gfs_glock_dq(&gh); + + out: + gfs_holder_uninit(&gh); + + return retval; +} + +/** + * gfs_mmap - We don't support shared writable mappings right now + * @file: The file to map + * @vma: The VMA which described the mapping + * + * Returns: 0 or error code + */ + +static int +gfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct gfs_inode *ip = vn2ip(file->f_mapping->host); + struct gfs_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); + error = gfs_glock_nq_atime(&i_gh); + if (error) { + gfs_holder_uninit(&i_gh); + return error; + } + + if (gfs_is_jdata(ip)) { + if (vma->vm_flags & VM_MAYSHARE) + error = -ENOSYS; + else + vma->vm_ops = &gfs_vm_ops_private; + } else { + /* This is VM_MAYWRITE instead of VM_WRITE because a call + to mprotect() can turn on VM_WRITE later. */ + + if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == (VM_MAYSHARE | VM_MAYWRITE)) + vma->vm_ops = &gfs_vm_ops_sharewrite; + else + vma->vm_ops = &gfs_vm_ops_private; + } + + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +struct file_operations gfs_file_fops = { + .llseek = gfs_llseek, + .read = gfs_read, + .write = gfs_write, + .ioctl = gfs_ioctl, + .mmap = gfs_mmap, + .open = gfs_open, + .release = gfs_close, + .fsync = gfs_fsync, + .lock = gfs_lock, + .sendfile = gfs_sendfile, +}; + +struct file_operations gfs_dir_fops = { + .readdir = gfs_readdir, + .ioctl = gfs_ioctl, + .open = gfs_open, + .release = gfs_close, + .fsync = gfs_fsync, + .lock = gfs_lock, +}; diff -urN linux-orig/fs/gfs/ops_file.h linux-patched/fs/gfs/ops_file.h --- linux-orig/fs/gfs/ops_file.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_file.h 2004-06-30 13:27:49.352709042 -0500 @@ -0,0 +1,20 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_FILE_DOT_H__ +#define __OPS_FILE_DOT_H__ + +extern struct file_operations gfs_file_fops; +extern struct file_operations gfs_dir_fops; + +#endif /* __OPS_FILE_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_fstype.c linux-patched/fs/gfs/ops_fstype.c --- linux-orig/fs/gfs/ops_fstype.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_fstype.c 2004-06-30 13:27:49.353708810 -0500 @@ -0,0 +1,607 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "daemon.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "locking.h" +#include "mount.h" +#include "ops_export.h" +#include "ops_fstype.h" +#include "ops_super.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "unlinked.h" + +/** + * gfs_read_super - Read in superblock + * @sb: The VFS superblock + * @data: Mount options + * @silent: Don't complain if its not a GFS filesystem + * + * Returns: The VFS superblock, or NULL on error + */ + +static int +fill_super(struct super_block *sb, void *data, int silent) +{ + struct gfs_sbd *sdp; + struct gfs_holder mount_gh, sb_gh, ji_gh; + struct inode *inode; + int super = TRUE, jindex = TRUE; + unsigned int x; + int error; + + error = -ENOMEM; + sdp = vmalloc(sizeof(struct gfs_sbd)); + if (!sdp) + goto fail; + + memset(sdp, 0, sizeof(struct gfs_sbd)); + + vfs2sdp(sb) = sdp; + sdp->sd_vfs = sb; + + /* Init rgrp variables */ + + INIT_LIST_HEAD(&sdp->sd_rglist); + init_MUTEX(&sdp->sd_rindex_lock); + INIT_LIST_HEAD(&sdp->sd_rg_mru_list); + spin_lock_init(&sdp->sd_rg_mru_lock); + INIT_LIST_HEAD(&sdp->sd_rg_recent); + spin_lock_init(&sdp->sd_rg_recent_lock); + spin_lock_init(&sdp->sd_rg_forward_lock); + + for (x = 0; x < GFS_GL_HASH_SIZE; x++) { + sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list); + } + + INIT_LIST_HEAD(&sdp->sd_reclaim_list); + spin_lock_init(&sdp->sd_reclaim_lock); + init_waitqueue_head(&sdp->sd_reclaim_wchan); + + for (x = 0; x < GFS_MHC_HASH_SIZE; x++) + INIT_LIST_HEAD(&sdp->sd_mhc[x]); + INIT_LIST_HEAD(&sdp->sd_mhc_single); + spin_lock_init(&sdp->sd_mhc_lock); + + for (x = 0; x < GFS_DEPEND_HASH_SIZE; x++) + INIT_LIST_HEAD(&sdp->sd_depend[x]); + spin_lock_init(&sdp->sd_depend_lock); + + init_MUTEX(&sdp->sd_freeze_lock); + + init_MUTEX(&sdp->sd_thread_lock); + init_completion(&sdp->sd_thread_completion); + + spin_lock_init(&sdp->sd_log_seg_lock); + INIT_LIST_HEAD(&sdp->sd_log_seg_list); + init_waitqueue_head(&sdp->sd_log_seg_wait); + INIT_LIST_HEAD(&sdp->sd_log_ail); + INIT_LIST_HEAD(&sdp->sd_log_incore); + init_MUTEX(&sdp->sd_log_lock); + INIT_LIST_HEAD(&sdp->sd_unlinked_list); + spin_lock_init(&sdp->sd_unlinked_lock); + INIT_LIST_HEAD(&sdp->sd_quota_list); + spin_lock_init(&sdp->sd_quota_lock); + + INIT_LIST_HEAD(&sdp->sd_dirty_j); + spin_lock_init(&sdp->sd_dirty_j_lock); + + spin_lock_init(&sdp->sd_ail_lock); + INIT_LIST_HEAD(&sdp->sd_recovery_bufs); + + gfs_init_tune_data(sdp); + + error = gfs_make_args((char *)data, &sdp->sd_args); + if (error) { + printk("GFS: can't parse mount arguments\n"); + goto fail_vfree; + } + + /* Copy out mount flags */ + + if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) + set_bit(SDF_NOATIME, &sdp->sd_flags); + if (sb->s_flags & MS_RDONLY) + set_bit(SDF_ROFS, &sdp->sd_flags); + + /* Setup up Virtual Super Block */ + + sb->s_magic = GFS_MAGIC; + sb->s_op = &gfs_super_ops; + sb->s_export_op = &gfs_export_ops; + sb->s_flags |= MS_NOATIME | MS_NODIRATIME; + sb->s_maxbytes = ~0ULL; + + if (sdp->sd_args.ar_posixacls) + sb->s_flags |= MS_POSIXACL; + + /* Set up the buffer cache and fill in some fake values + to allow us to read in the superblock. */ + + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS_BASIC_BLOCK); + sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + + GFS_ASSERT_SBD(sizeof(struct gfs_sb) <= sdp->sd_sb.sb_bsize, sdp,); + + error = gfs_mount_lockproto(sdp, silent); + if (error) + goto fail_vfree; + + printk("GFS: fsid=%s: Joined cluster. Now mounting FS...\n", + sdp->sd_fsname); + + if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && + !sdp->sd_args.ar_ignore_local_fs) { + /* Force local [p|f]locks */ + sdp->sd_args.ar_localflocks = TRUE; + + /* Force local read ahead and caching */ + sdp->sd_args.ar_localcaching = TRUE; + } + + /* Start up the scand thread */ + + error = kernel_thread(gfs_scand, sdp, 0); + if (error < 0) { + printk("GFS: fsid=%s: can't start scand thread: %d\n", + sdp->sd_fsname, error); + goto fail_lockproto; + } + wait_for_completion(&sdp->sd_thread_completion); + + /* Start up the glockd thread */ + + for (sdp->sd_glockd_num = 0; + sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; + sdp->sd_glockd_num++) { + error = kernel_thread(gfs_glockd, sdp, 0); + if (error < 0) { + printk("GFS: fsid=%s: can't start glockd thread: %d\n", + sdp->sd_fsname, error); + goto fail_glockd; + } + wait_for_completion(&sdp->sd_thread_completion); + } + + error = gfs_glock_nq_num(sdp, + GFS_MOUNT_LOCK, &gfs_nondisk_glops, + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE, + &mount_gh); + if (error) { + printk("GFS: fsid=%s: can't acquire mount glock: %d\n", + sdp->sd_fsname, error); + goto fail_glockd; + } + + error = gfs_glock_nq_num(sdp, + GFS_LIVE_LOCK, &gfs_nondisk_glops, + LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT, + &sdp->sd_live_gh); + if (error) { + printk("GFS: fsid=%s: can't acquire live glock: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_mount; + } + + sdp->sd_live_gh.gh_owner = NULL; + + error = gfs_glock_nq_num(sdp, + GFS_SB_LOCK, &gfs_meta_glops, + (sdp->sd_args.ar_upgrade) ? LM_ST_EXCLUSIVE : LM_ST_SHARED, + 0, &sb_gh); + if (error) { + printk("GFS: fsid=%s: can't acquire superblock glock: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_live; + } + + error = gfs_read_sb(sdp, sb_gh.gh_gl, silent); + if (error) { + printk("GFS: fsid=%s: can't read superblock: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_sb; + } + + /* Set up the buffer cache and SB for real */ + + error = -EINVAL; + if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { + printk("GFS: fsid=%s: FS block size (%u) is too small for device block size (%u)\n", + sdp->sd_fsname, sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); + goto fail_gunlock_sb; + } + if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { + printk("GFS: fsid=%s: FS block size (%u) is too big for machine page size (%u)\n", + sdp->sd_fsname, sdp->sd_sb.sb_bsize, + (unsigned int)PAGE_SIZE); + goto fail_gunlock_sb; + } + + /* Get rid of buffers from the original block size */ + sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA); + sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift; + + sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); + + /* Read in journal index inode */ + + error = gfs_get_jiinode(sdp); + if (error) { + printk("GFS: fsid=%s: can't get journal index inode: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_sb; + } + + init_MUTEX(&sdp->sd_jindex_lock); + + /* Get a handle on the transaction glock */ + + error = gfs_glock_get(sdp, GFS_TRANS_LOCK, &gfs_trans_glops, + CREATE, &sdp->sd_trans_gl); + if (error) + goto fail_ji_free; + set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags); + + /* Upgrade version numbers if we need to */ + + if (sdp->sd_args.ar_upgrade) { + error = gfs_do_upgrade(sdp, sb_gh.gh_gl); + if (error) + goto fail_trans_gl; + } + + /* Load in the journal index */ + + error = gfs_jindex_hold(sdp, &ji_gh); + if (error) { + printk("GFS: fsid=%s: can't read journal index: %d\n", + sdp->sd_fsname, error); + goto fail_trans_gl; + } + + error = -EINVAL; + if (sdp->sd_lockstruct.ls_jid >= sdp->sd_journals) { + printk("GFS: fsid=%s: can't mount journal #%u\n", + sdp->sd_fsname, sdp->sd_lockstruct.ls_jid); + printk("GFS: fsid=%s: there are only %u journals (0 - %u)\n", + sdp->sd_fsname, sdp->sd_journals, sdp->sd_journals - 1); + goto fail_gunlock_ji; + } + sdp->sd_jdesc = sdp->sd_jindex[sdp->sd_lockstruct.ls_jid]; + sdp->sd_log_seg_free = sdp->sd_jdesc.ji_nsegment - 1; + + error = gfs_glock_nq_num(sdp, + sdp->sd_jdesc.ji_addr, &gfs_meta_glops, + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP, + &sdp->sd_journal_gh); + if (error) { + printk("GFS: fsid=%s: can't acquire the journal glock: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_ji; + } + + if (sdp->sd_lockstruct.ls_first) { + for (x = 0; x < sdp->sd_journals; x++) { + error = gfs_recover_journal(sdp, + x, sdp->sd_jindex + x, + TRUE); + if (error) { + printk("GFS: fsid=%s: error recovering journal %u: %d\n", + sdp->sd_fsname, x, error); + goto fail_gunlock_journal; + } + } + + sdp->sd_lockstruct.ls_ops->lm_others_may_mount(sdp->sd_lockstruct.ls_lockspace); + sdp->sd_lockstruct.ls_first = FALSE; + } else { + error = gfs_recover_journal(sdp, + sdp->sd_lockstruct.ls_jid, &sdp->sd_jdesc, + TRUE); + if (error) { + printk("GFS: fsid=%s: error recovering my journal: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_journal; + } + } + + gfs_glock_dq_uninit(&ji_gh); + jindex = FALSE; + + /* Disown my Journal glock */ + + sdp->sd_journal_gh.gh_owner = NULL; + + /* Drop our cache and reread all the things we read before the replay. */ + + error = gfs_read_sb(sdp, sb_gh.gh_gl, FALSE); + if (error) { + printk("GFS: fsid=%s: can't read superblock: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_journal; + } + + gfs_glock_force_drop(sdp->sd_jiinode->i_gl); + + error = gfs_jindex_hold(sdp, &ji_gh); + if (error) { + printk("GFS: fsid=%s: can't read journal index: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_journal; + } + gfs_glock_dq_uninit(&ji_gh); + + /* Make the FS read/write */ + + if (!test_bit(SDF_ROFS, &sdp->sd_flags)) { + error = gfs_make_fs_rw(sdp); + if (error) { + printk("GFS: fsid=%s: can't make FS RW: %d\n", + sdp->sd_fsname, error); + goto fail_gunlock_journal; + } + } + + /* Start up the recover thread */ + + error = kernel_thread(gfs_recoverd, sdp, 0); + if (error < 0) { + printk("GFS: fsid=%s: can't start recoverd thread: %d\n", + sdp->sd_fsname, error); + goto fail_recover_dump; + } + wait_for_completion(&sdp->sd_thread_completion); + + /* Read in the resource index inode */ + + error = gfs_get_riinode(sdp); + if (error) { + printk("GFS: fsid=%s: can't get resource index inode: %d\n", + sdp->sd_fsname, error); + goto fail_recoverd; + } + + /* Get the root inode */ + + error = gfs_get_rootinode(sdp); + if (error) { + printk("GFS: fsid=%s: can't read in root inode: %d\n", + sdp->sd_fsname, error); + goto fail_ri_free; + } + + /* Read in the quota inode */ + + error = gfs_get_qinode(sdp); + if (error) { + printk("GFS: fsid=%s: can't get quota file inode: %d\n", + sdp->sd_fsname, error); + goto fail_root_free; + } + + /* Read in the license inode */ + + error = gfs_get_linode(sdp); + if (error) { + printk("GFS: fsid=%s: can't get license file inode: %d\n", + sdp->sd_fsname, error); + goto fail_qi_free; + } + + /* We're through with the superblock lock */ + + gfs_glock_dq_uninit(&sb_gh); + super = FALSE; + + /* Get the inode/dentry */ + + inode = gfs_iget(sdp->sd_rooti, CREATE); + if (!inode) { + printk("GFS: fsid=%s: can't get root inode\n", sdp->sd_fsname); + error = -ENOMEM; + goto fail_li_free; + } + + sb->s_root = d_alloc_root(inode); + if (!sb->s_root) { + iput(inode); + printk("GFS: fsid=%s: can't get root dentry\n", sdp->sd_fsname); + error = -ENOMEM; + goto fail_li_free; + } + + /* Start up the logd thread */ + + sdp->sd_jindex_refresh_time = jiffies; + + error = kernel_thread(gfs_logd, sdp, 0); + if (error < 0) { + printk("GFS: fsid=%s: can't start logd thread: %d\n", + sdp->sd_fsname, error); + goto fail_dput; + } + wait_for_completion(&sdp->sd_thread_completion); + + /* Start up the quotad thread */ + + error = kernel_thread(gfs_quotad, sdp, 0); + if (error < 0) { + printk("GFS: fsid=%s: can't start quotad thread: %d\n", + sdp->sd_fsname, error); + goto fail_logd; + } + wait_for_completion(&sdp->sd_thread_completion); + + /* Start up the inoded thread */ + + error = kernel_thread(gfs_inoded, sdp, 0); + if (error < 0) { + printk("GFS: fsid=%s: can't start inoded thread: %d\n", + sdp->sd_fsname, error); + goto fail_quotad; + } + wait_for_completion(&sdp->sd_thread_completion); + + /* Get a handle on the rename lock */ + + error = gfs_glock_get(sdp, GFS_RENAME_LOCK, &gfs_nondisk_glops, + CREATE, &sdp->sd_rename_gl); + if (error) + goto fail_inoded; + + gfs_glock_dq_uninit(&mount_gh); + + return 0; + + fail_inoded: + down(&sdp->sd_thread_lock); + clear_bit(SDF_INODED_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_inoded_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + fail_quotad: + down(&sdp->sd_thread_lock); + clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_quotad_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + fail_logd: + down(&sdp->sd_thread_lock); + clear_bit(SDF_LOGD_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_logd_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + fail_dput: + dput(sb->s_root); + + fail_li_free: + gfs_inode_put(sdp->sd_linode); + + fail_qi_free: + gfs_inode_put(sdp->sd_qinode); + + fail_root_free: + gfs_inode_put(sdp->sd_rooti); + + fail_ri_free: + gfs_inode_put(sdp->sd_riinode); + gfs_clear_rgrpd(sdp); + + fail_recoverd: + down(&sdp->sd_thread_lock); + clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_recoverd_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + fail_recover_dump: + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + gfs_unlinked_cleanup(sdp); + gfs_quota_cleanup(sdp); + + fail_gunlock_journal: + gfs_glock_dq_uninit(&sdp->sd_journal_gh); + + fail_gunlock_ji: + if (jindex) + gfs_glock_dq_uninit(&ji_gh); + + fail_trans_gl: + gfs_glock_put(sdp->sd_trans_gl); + + fail_ji_free: + gfs_inode_put(sdp->sd_jiinode); + gfs_clear_journals(sdp); + + fail_gunlock_sb: + if (super) + gfs_glock_dq_uninit(&sb_gh); + + fail_gunlock_live: + gfs_glock_dq_uninit(&sdp->sd_live_gh); + + fail_gunlock_mount: + gfs_glock_dq_uninit(&mount_gh); + + fail_glockd: + clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags); + wake_up(&sdp->sd_reclaim_wchan); + while (sdp->sd_glockd_num--) + wait_for_completion(&sdp->sd_thread_completion); + + down(&sdp->sd_thread_lock); + clear_bit(SDF_SCAND_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_scand_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + fail_lockproto: + gfs_gl_hash_clear(sdp, TRUE); + gfs_unmount_lockproto(sdp); + gfs_clear_dirty_j(sdp); + while (invalidate_inodes(sb)) + yield(); + + fail_vfree: + vfree(sdp); + + fail: + vfs2sdp(sb) = NULL; + return error; +} + +/** + * gfs_get_sb - + * @fs_type: + * @flags: + * @dev_name: + * @data: + * + * Returns: the new superblock + */ + +struct super_block *gfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, fill_super); +} + +struct file_system_type gfs_fs_type = { + .name = "gfs", + .fs_flags = FS_REQUIRES_DEV, + .get_sb = gfs_get_sb, + .kill_sb = kill_block_super, + .owner = THIS_MODULE, +}; diff -urN linux-orig/fs/gfs/ops_fstype.h linux-patched/fs/gfs/ops_fstype.h --- linux-orig/fs/gfs/ops_fstype.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_fstype.h 2004-06-30 13:27:49.353708810 -0500 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_FSTYPE_DOT_H__ +#define __OPS_FSTYPE_DOT_H__ + +extern struct file_system_type gfs_fs_type; + +#endif /* __OPS_FSTYPE_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_inode.c linux-patched/fs/gfs/ops_inode.c --- linux-orig/fs/gfs/ops_inode.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_inode.c 2004-06-30 13:27:49.354708578 -0500 @@ -0,0 +1,1723 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "acl.h" +#include "bmap.h" +#include "dio.h" +#include "dir.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "ops_dentry.h" +#include "ops_inode.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "unlinked.h" + +/** + * gfs_create - Create a file + * @dir: The directory in which to create the file + * @dentry: The dentry of the new file + * @mode: The mode of the new file + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct gfs_inode *dip = vn2ip(dir), *ip; + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_holder d_gh, i_gh; + struct inode *inode; + int new = TRUE; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + gfs_holder_init(dip->i_gl, 0, 0, &d_gh); + + for (;;) { + error = gfs_createi(&d_gh, &dentry->d_name, + GFS_FILE_REG, mode, + &i_gh); + if (!error) + break; + else if (error != -EEXIST) { + gfs_holder_uninit(&d_gh); + return error; + } + + error = gfs_lookupi(&d_gh, &dentry->d_name, + FALSE, &i_gh); + if (!error) { + if (i_gh.gh_gl) { + new = FALSE; + break; + } + } else { + gfs_holder_uninit(&d_gh); + return error; + } + } + + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,); + ip = gl2ip(i_gh.gh_gl); + + if (new) { + gfs_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs_inplace_release(dip); + gfs_quota_unlock_m(dip); + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul); + gfs_alloc_put(dip); + + ip->i_creat_task = current; + ip->i_creat_pid = current->pid; + } + + gfs_glock_dq_uninit(&d_gh); + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + if (new) + mark_inode_dirty(inode); + + return 0; +} + +/** + * lookup_cdpn_sub_at - Maybe lookup a Context Dependent Pathname + * @sdp: the filesystem + * @dentry: the original dentry to lookup + * @new_dentry: the new dentry, if this was a substitutable path. + * + */ + +static void +lookup_cdpn_sub_at(struct gfs_sbd *sdp, struct dentry *dentry, + struct dentry **new_dentry) +{ + struct dentry *parent = dget_parent(dentry); + char *buf = gmalloc(2 * __NEW_UTS_LEN + 2); + + if (gfs_filecmp(&dentry->d_name, "@hostname", 9)) + *new_dentry = lookup_one_len(system_utsname.nodename, + parent, + strlen(system_utsname.nodename)); + else if (gfs_filecmp(&dentry->d_name, "@mach", 5)) + *new_dentry = lookup_one_len(system_utsname.machine, + parent, + strlen(system_utsname.machine)); + else if (gfs_filecmp(&dentry->d_name, "@os", 3)) + *new_dentry = lookup_one_len(system_utsname.sysname, + parent, + strlen(system_utsname.sysname)); + else if (gfs_filecmp(&dentry->d_name, "@uid", 4)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsuid)); + else if (gfs_filecmp(&dentry->d_name, "@gid", 4)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsgid)); + else if (gfs_filecmp(&dentry->d_name, "@sys", 4)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%s_%s", + system_utsname.machine, + system_utsname.sysname)); + else if (gfs_filecmp(&dentry->d_name, "@jid", 4)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%u", + sdp->sd_lockstruct.ls_jid)); + + kfree(buf); + dput(parent); +} + +/** + * lookup_cdpn_sub_brace - Maybe lookup a Context Dependent Pathname + * @sdp: the filesystem + * @dentry: the original dentry to lookup + * @new_dentry: the new dentry, if this was a substitutable path. + * + */ + +static void +lookup_cdpn_sub_brace(struct gfs_sbd *sdp, struct dentry *dentry, + struct dentry **new_dentry) +{ + struct dentry *parent = dget_parent(dentry); + char *buf = gmalloc(2 * __NEW_UTS_LEN + 2); + + if (gfs_filecmp(&dentry->d_name, "{hostname}", 10)) + *new_dentry = lookup_one_len(system_utsname.nodename, + parent, + strlen(system_utsname.nodename)); + else if (gfs_filecmp(&dentry->d_name, "{mach}", 6)) + *new_dentry = lookup_one_len(system_utsname.machine, + parent, + strlen(system_utsname.machine)); + else if (gfs_filecmp(&dentry->d_name, "{os}", 4)) + *new_dentry = lookup_one_len(system_utsname.sysname, + parent, + strlen(system_utsname.sysname)); + else if (gfs_filecmp(&dentry->d_name, "{uid}", 5)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsuid)); + else if (gfs_filecmp(&dentry->d_name, "{gid}", 5)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsgid)); + else if (gfs_filecmp(&dentry->d_name, "{sys}", 5)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%s_%s", + system_utsname.machine, + system_utsname.sysname)); + else if (gfs_filecmp(&dentry->d_name, "{jid}", 5)) + *new_dentry = lookup_one_len(buf, + parent, + sprintf(buf, "%u", + sdp->sd_lockstruct.ls_jid)); + + kfree(buf); + dput(parent); +} + +/** + * gfs_lookup - Look up a filename in a directory and return its inode + * @dir: The directory inode + * @dentry: The dentry of the new inode + * + * Called by the VFS layer. Lock dir and call gfs_lookupi() + * + * Returns: 0 on success, -EXXXX on failure + */ + +static struct dentry * +gfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct gfs_inode *dip = vn2ip(dir), *ip; + struct gfs_holder d_gh, i_gh; + struct inode *inode = NULL; + int error; + + atomic_inc(&dip->i_sbd->sd_ops_inode); + + /* Do Context Dependent Path Name expansion */ + + if (*dentry->d_name.name == '@' && dentry->d_name.len > 1) { + struct dentry *new_dentry = NULL; + lookup_cdpn_sub_at(dip->i_sbd, dentry, &new_dentry); + if (new_dentry) + return new_dentry; + } else if (*dentry->d_name.name == '{' && dentry->d_name.len > 2) { + struct dentry *new_dentry = NULL; + lookup_cdpn_sub_brace(dip->i_sbd, dentry, &new_dentry); + if (new_dentry) + return new_dentry; + } + + dentry->d_op = &gfs_dops; + + gfs_holder_init(dip->i_gl, 0, 0, &d_gh); + + error = gfs_lookupi(&d_gh, &dentry->d_name, FALSE, &i_gh); + if (error) { + gfs_holder_uninit(&d_gh); + return ERR_PTR(error); + } + + if (i_gh.gh_gl) { + ip = gl2ip(i_gh.gh_gl); + + gfs_glock_dq_uninit(&d_gh); + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + } else + gfs_holder_uninit(&d_gh); + + if (inode) + return d_splice_alias(inode, dentry); + d_add(dentry, inode); + return NULL; +} + +/** + * gfs_link - Link to a file + * @old_dentry: The inode to link + * @dir: Add link to this directory + * @dentry: The name of the link + * + * Link the inode in "old_dentry" into the directory "dir" with the + * name in "dentry". + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +{ + struct gfs_inode *dip = vn2ip(dir); + struct gfs_sbd *sdp = dip->i_sbd; + struct inode *inode = old_dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_alloc *al = NULL; + struct gfs_holder ghs[2]; + int alloc_required; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + if (ip->i_di.di_type == GFS_FILE_DIR) + return -EPERM; + + gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]); + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]); + + error = gfs_glock_nq_m(2, ghs); + if (error) + goto fail; + + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto fail_gunlock; + + error = gfs_dir_search(dip, &dentry->d_name, NULL, NULL); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + default: + goto fail_gunlock; + } + + if (!dip->i_di.di_nlink) { + error = -EINVAL; + goto fail_gunlock; + } + if (dip->i_di.di_entries == (uint32_t)-1) { + error = -EFBIG; + goto fail_gunlock; + } + if (!ip->i_di.di_nlink) { + error = -EINVAL; + goto fail_gunlock; + } + if (ip->i_di.di_nlink == (uint32_t)-1) { + error = -EMLINK; + goto fail_gunlock; + } + + error = gfs_diradd_alloc_required(dip, &dentry->d_name, &alloc_required); + if (error) + goto fail_gunlock; + + if (alloc_required) { + al = gfs_alloc_get(dip); + + error = gfs_quota_lock_m(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail_alloc; + + error = gfs_quota_check(dip, dip->i_di.di_uid, dip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested_meta = sdp->sd_max_dirres; + + error = gfs_inplace_reserve(dip); + if (error) + goto fail_gunlock_q; + + /* Trans may require: + two dinode blocks, directory modifications to add an entry, + RG bitmap blocks to allocate from, and quota change */ + + error = gfs_trans_begin(sdp, + 2 + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length, + 1); + if (error) + goto fail_ipres; + } else { + /* Trans may require: + Two dinode blocks and a leaf block. */ + + error = gfs_trans_begin(sdp, 3, 0); + if (error) + goto fail_ipres; + } + + error = gfs_dir_add(dip, &dentry->d_name, &ip->i_num, ip->i_di.di_type); + if (error) + goto fail_end_trans; + + error = gfs_change_nlink(ip, +1); + if (error) + goto fail_end_trans; + + gfs_trans_end(sdp); + + if (alloc_required) { + GFS_ASSERT_INODE(al->al_alloced_meta, dip,); + gfs_inplace_release(dip); + gfs_quota_unlock_m(dip); + gfs_alloc_put(dip); + } + + gfs_glock_dq_m(2, ghs); + + gfs_holder_uninit(&ghs[0]); + gfs_holder_uninit(&ghs[1]); + + atomic_inc(&inode->i_count); + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_ipres: + if (alloc_required) + gfs_inplace_release(dip); + + fail_gunlock_q: + if (alloc_required) + gfs_quota_unlock_m(dip); + + fail_alloc: + if (alloc_required) + gfs_alloc_put(dip); + + fail_gunlock: + gfs_glock_dq_m(2, ghs); + + fail: + gfs_holder_uninit(&ghs[0]); + gfs_holder_uninit(&ghs[1]); + + return error; +} + +/** + * gfs_unlink - Unlink a file + * @dir: The inode of the directory containing the file to unlink + * @dentry: The file itself + * + * Unlink a file. Call gfs_unlinki() + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct gfs_inode *dip = vn2ip(dir); + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_inode *ip = vn2ip(dentry->d_inode); + struct gfs_holder ghs[2]; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]); + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]); + + error = gfs_glock_nq_m(2, ghs); + if (error) + goto fail; + + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto fail_gunlock; + + if ((dip->i_di.di_mode & S_ISVTX) && + dip->i_di.di_uid != current->fsuid && + ip->i_di.di_uid != current->fsuid && + !capable(CAP_FOWNER)) { + error = -EPERM; + goto fail_gunlock; + } + + error = gfs_revalidate(dip, &dentry->d_name, ip); + if (error) + goto fail_gunlock; + + /* Trans may require: + Two dinode blocks and one modified directory leaf block + and one unlinked tag. */ + + error = gfs_trans_begin(sdp, 3, 1); + if (error) + goto fail_gunlock; + + error = gfs_unlinki(dip, &dentry->d_name, ip); + if (error) + goto fail_end_trans; + + gfs_trans_end(sdp); + + gfs_glock_dq_m(2, ghs); + + gfs_holder_uninit(&ghs[0]); + gfs_holder_uninit(&ghs[1]); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_gunlock: + gfs_glock_dq_m(2, ghs); + + fail: + gfs_holder_uninit(&ghs[0]); + gfs_holder_uninit(&ghs[1]); + + return error; +} + +/** + * gfs_symlink - Create a symlink + * @dir: The directory to create the symlink in + * @dentry: The dentry to put the symlink in + * @symname: The thing which the link points to + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + struct gfs_inode *dip = vn2ip(dir), *ip; + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_holder d_gh, i_gh; + struct inode *inode; + struct buffer_head *dibh; + int size; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + /* Must be stuffed with a null terminator for gfs_follow_link() */ + size = strlen(symname); + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode) - 1) + return -ENAMETOOLONG; + + gfs_holder_init(dip->i_gl, 0, 0, &d_gh); + + error = gfs_createi(&d_gh, &dentry->d_name, + GFS_FILE_LNK, 0777, + &i_gh); + if (error) { + gfs_holder_uninit(&d_gh); + return error; + } + + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,); + ip = gl2ip(i_gh.gh_gl); + + ip->i_di.di_size = size; + + error = gfs_get_inode_buffer(ip, &dibh); + GFS_ASSERT_INODE(!error, ip,); + + gfs_dinode_out(&ip->i_di, dibh->b_data); + memcpy(dibh->b_data + sizeof(struct gfs_dinode), symname, size); + + brelse(dibh); + + gfs_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs_inplace_release(dip); + gfs_quota_unlock_m(dip); + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul); + gfs_alloc_put(dip); + + gfs_glock_dq_uninit(&d_gh); + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs_mkdir - Make a directory + * @dir: The parent directory of the new one + * @dentry: The dentry of the new directory + * @mode: The mode of the new directory + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct gfs_inode *dip = vn2ip(dir), *ip; + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_holder d_gh, i_gh; + struct inode *inode; + struct buffer_head *dibh; + struct gfs_dinode *di; + struct gfs_dirent *dent; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + gfs_holder_init(dip->i_gl, 0, 0, &d_gh); + + error = gfs_createi(&d_gh, &dentry->d_name, + GFS_FILE_DIR, mode, + &i_gh); + if (error) { + gfs_holder_uninit(&d_gh); + return error; + } + + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,); + ip = gl2ip(i_gh.gh_gl); + + ip->i_di.di_nlink = 2; + ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode); + ip->i_di.di_flags |= GFS_DIF_JDATA; + ip->i_di.di_payload_format = GFS_FORMAT_DE; + ip->i_di.di_entries = 2; + + error = gfs_get_inode_buffer(ip, &dibh); + GFS_ASSERT_INODE(!error, ip,); + + di = (struct gfs_dinode *)dibh->b_data; + + error = gfs_dirent_alloc(ip, dibh, 1, &dent); + GFS_ASSERT_INODE(!error, ip,); /* This should never fail */ + + dent->de_inum = di->di_num; /* already GFS endian */ + dent->de_hash = gfs_dir_hash(".", 1); + dent->de_hash = cpu_to_gfs32(dent->de_hash); + dent->de_type = cpu_to_gfs16(GFS_FILE_DIR); + memcpy((char *) (dent + 1), ".", 1); + di->di_entries = cpu_to_gfs32(1); + + error = gfs_dirent_alloc(ip, dibh, 2, &dent); + GFS_ASSERT_INODE(!error, ip,); /* This should never fail */ + + gfs_inum_out(&dip->i_num, (char *) &dent->de_inum); + dent->de_hash = gfs_dir_hash("..", 2); + dent->de_hash = cpu_to_gfs32(dent->de_hash); + dent->de_type = cpu_to_gfs16(GFS_FILE_DIR); + memcpy((char *) (dent + 1), "..", 2); + + gfs_dinode_out(&ip->i_di, (char *)di); + + brelse(dibh); + + error = gfs_change_nlink(dip, +1); + GFS_ASSERT_INODE(!error, dip,); /* dip already pinned */ + + gfs_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs_inplace_release(dip); + gfs_quota_unlock_m(dip); + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul); + gfs_alloc_put(dip); + + gfs_glock_dq_uninit(&d_gh); + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs_rmdir - Remove a directory + * @dir: The parent directory of the directory to be removed + * @dentry: The dentry of the directory to remove + * + * Remove a directory. Call gfs_rmdiri() + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct gfs_inode *dip = vn2ip(dir); + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_inode *ip = vn2ip(dentry->d_inode); + struct gfs_holder ghs[2]; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]); + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]); + + error = gfs_glock_nq_m(2, ghs); + if (error) + goto fail; + + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto fail_gunlock; + + if ((dip->i_di.di_mode & S_ISVTX) && + dip->i_di.di_uid != current->fsuid && + ip->i_di.di_uid != current->fsuid && + !capable(CAP_FOWNER)) { + error = -EPERM; + goto fail_gunlock; + } + + error = gfs_revalidate(dip, &dentry->d_name, ip); + if (error) + goto fail_gunlock; + + GFS_ASSERT_INODE(ip->i_di.di_entries >= 2, ip, + gfs_dinode_print(&ip->i_di);); + + if (ip->i_di.di_entries > 2) { + error = -ENOTEMPTY; + goto fail_gunlock; + } + + /* Trans may require: + Two dinode blocks, one directory leaf block containing the + entry to be rmdired, two leaf blocks containing . and .. of + the directory being rmdired, and one unlinked tag */ + + error = gfs_trans_begin(sdp, 5, 1); + if (error) + goto fail_gunlock; + + error = gfs_rmdiri(dip, &dentry->d_name, ip); + if (error) + goto fail_end_trans; + + gfs_trans_end(sdp); + + gfs_glock_dq_m(2, ghs); + + gfs_holder_uninit(&ghs[0]); + gfs_holder_uninit(&ghs[1]); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_gunlock: + gfs_glock_dq_m(2, ghs); + + fail: + gfs_holder_uninit(&ghs[0]); + gfs_holder_uninit(&ghs[1]); + + return error; +} + +/** + * gfs_mknod - Make a special file + * @dir: The directory in which the special file will reside + * @dentry: The dentry of the special file + * @mode: The mode of the special file + * @rdev: The device specification of the special file + * + */ + +static int +gfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct gfs_inode *dip = vn2ip(dir), *ip; + struct gfs_sbd *sdp = dip->i_sbd; + struct gfs_holder d_gh, i_gh; + struct inode *inode; + struct buffer_head *dibh; + uint16_t type = 0; + uint32_t major = 0, minor = 0; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + switch (mode & S_IFMT) { + case S_IFBLK: + type = GFS_FILE_BLK; + major = MAJOR(dev); + minor = MINOR(dev); + break; + case S_IFCHR: + type = GFS_FILE_CHR; + major = MAJOR(dev); + minor = MINOR(dev); + break; + case S_IFIFO: + type = GFS_FILE_FIFO; + break; + case S_IFSOCK: + type = GFS_FILE_SOCK; + break; + default: + GFS_ASSERT_SBD(FALSE, sdp, + printk("mode = %d\n", mode);); + break; + }; + + gfs_holder_init(dip->i_gl, 0, 0, &d_gh); + + error = gfs_createi(&d_gh, &dentry->d_name, + type, mode, + &i_gh); + if (error) { + gfs_holder_uninit(&d_gh); + return error; + } + + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,); + ip = gl2ip(i_gh.gh_gl); + + ip->i_di.di_major = major; + ip->i_di.di_minor = minor; + + error = gfs_get_inode_buffer(ip, &dibh); + GFS_ASSERT_INODE(!error, ip,); + + gfs_dinode_out(&ip->i_di, dibh->b_data); + + brelse(dibh); + + gfs_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs_inplace_release(dip); + gfs_quota_unlock_m(dip); + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul); + gfs_alloc_put(dip); + + gfs_glock_dq_uninit(&d_gh); + gfs_glock_dq_uninit(&i_gh); + + inode = gfs_iget(ip, CREATE); + gfs_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs_rename - Rename a file + * @odir: Parent directory of old file name + * @odentry: The old dentry of the file + * @ndir: Parent directory of new file name + * @ndentry: The new dentry of the file + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_rename(struct inode *odir, struct dentry *odentry, + struct inode *ndir, struct dentry *ndentry) +{ + struct gfs_inode *odip = vn2ip(odir); + struct gfs_inode *ndip = vn2ip(ndir); + struct gfs_inode *ip = vn2ip(odentry->d_inode); + struct gfs_inode *nip = NULL; + struct gfs_sbd *sdp = odip->i_sbd; + struct qstr name; + struct gfs_alloc *al; + struct gfs_holder ghs[4], r_gh; + unsigned int num_gh; + int dir_rename = FALSE; + int alloc_required; + unsigned int x; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs_unlinked_limit(sdp); + + if (ndentry->d_inode) { + nip = vn2ip(ndentry->d_inode); + if (ip == nip) + return 0; + } + + /* Make sure we aren't trying to move a dirctory into it's subdir */ + + if (ip->i_di.di_type == GFS_FILE_DIR && odip != ndip) { + dir_rename = TRUE; + + error = gfs_glock_nq_init(sdp->sd_rename_gl, + LM_ST_EXCLUSIVE, 0, + &r_gh); + if (error) + return error; + + error = gfs_ok_to_move(ip, ndip); + if (error) + goto fail; + } + + gfs_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]); + gfs_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]); + num_gh = 2; + + if (nip) + gfs_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh++]); + + if (dir_rename) + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh++]); + + error = gfs_glock_nq_m(num_gh, ghs); + if (error) + goto fail_uninit; + + /* Check out the old directory */ + + error = permission(odir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto fail_gunlock; + + if ((odip->i_di.di_mode & S_ISVTX) && + odip->i_di.di_uid != current->fsuid && + ip->i_di.di_uid != current->fsuid && + !capable(CAP_FOWNER)) { + error = -EPERM; + goto fail_gunlock; + } + + error = gfs_revalidate(odip, &odentry->d_name, ip); + if (error) + goto fail_gunlock; + + /* Check out the new directory */ + + error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto fail_gunlock; + + if (nip) { + if ((ndip->i_di.di_mode & S_ISVTX) && + ndip->i_di.di_uid != current->fsuid && + nip->i_di.di_uid != current->fsuid && + !capable(CAP_FOWNER)) { + error = -EPERM; + goto fail_gunlock; + } + + error = gfs_revalidate(ndip, &ndentry->d_name, nip); + if (error) + goto fail_gunlock; + + if (nip->i_di.di_type == GFS_FILE_DIR) { + GFS_ASSERT_INODE(nip->i_di.di_entries >= 2, ip, + gfs_dinode_print(&nip->i_di);); + if (nip->i_di.di_entries > 2) { + error = -ENOTEMPTY; + goto fail_gunlock; + } + } + } else { + error = gfs_dir_search(ndip, &ndentry->d_name, NULL, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + error = -EEXIST; + default: + goto fail_gunlock; + }; + + if (odip != ndip) { + if (!ndip->i_di.di_nlink) { + error = -EINVAL; + goto fail_gunlock; + } + if (ndip->i_di.di_entries == (uint32_t)-1) { + error = -EFBIG; + goto fail_gunlock; + } + if (ip->i_di.di_type == GFS_FILE_DIR && + ndip->i_di.di_nlink == (uint32_t)-1) { + error = -EMLINK; + goto fail_gunlock; + } + } + } + + error = gfs_diradd_alloc_required(ndip, &ndentry->d_name, &alloc_required); + if (error) + goto fail_gunlock; + + if (alloc_required) { + al = gfs_alloc_get(ndip); + + error = gfs_quota_lock_m(ndip, + NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail_alloc; + + error = gfs_quota_check(ndip, ndip->i_di.di_uid, ndip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested_meta = sdp->sd_max_dirres; + + error = gfs_inplace_reserve(ndip); + if (error) + goto fail_gunlock_q; + + /* Trans may require: + Dinodes for the srcdir, srcino, dstdir, dstino. Blocks for + adding the entry to dstdir. RG bitmaps for that allocation. + One leaf block in the srcdir for removal of the entry. + One leaf block for changing .. in srcino (if it's a directory). + Two leaf blocks for removing . and .. from dstino (if it exists + and it's a directory), one unlinked tag, and one quota block. */ + + error = gfs_trans_begin(sdp, + 8 + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length, + 2); + if (error) + goto fail_ipres; + } else { + /* Trans may require: + Dinodes for the srcdir, srcino, dstdir, dstino. One block for + adding the entry to dstdir. + One leaf block in the srcdir for removal of the entry. + One leaf block for changing .. in srcino (if it's a directory). + Two leaf blocks for removing . and .. from dstino (if it exists + and it's a directory), and one unlinked tag. */ + + error = gfs_trans_begin(sdp, 9, 1); + if (error) + goto fail_ipres; + } + + /* Remove the target file, if it exists */ + + if (nip) { + if (nip->i_di.di_type == GFS_FILE_DIR) + error = gfs_rmdiri(ndip, &ndentry->d_name, nip); + else + error = gfs_unlinki(ndip, &ndentry->d_name, nip); + + if (error) + goto fail_end_trans; + } + + if (dir_rename) { + error = gfs_change_nlink(ndip, +1); + if (error) + goto fail_end_trans; + error = gfs_change_nlink(odip, -1); + if (error) + goto fail_end_trans; + + name.len = 2; + name.name = ".."; + + error = gfs_dir_mvino(ip, &name, &ndip->i_num, GFS_FILE_DIR); + if (error) + goto fail_end_trans; + } + + error = gfs_dir_del(odip, &odentry->d_name); + if (error) + goto fail_end_trans; + + error = gfs_dir_add(ndip, &ndentry->d_name, &ip->i_num, ip->i_di.di_type); + if (error) + goto fail_end_trans; + + if (dir_rename) + gfs_trans_add_gl(sdp->sd_rename_gl); + + gfs_trans_end(sdp); + + if (alloc_required) { + /* Don't check al->al_alloced_meta and friends. */ + gfs_inplace_release(ndip); + gfs_quota_unlock_m(ndip); + gfs_alloc_put(ndip); + } + + gfs_glock_dq_m(num_gh, ghs); + + for (x = 0; x < num_gh; x++) + gfs_holder_uninit(&ghs[x]); + + if (dir_rename) + gfs_glock_dq_uninit(&r_gh); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_ipres: + if (alloc_required) + gfs_inplace_release(ndip); + + fail_gunlock_q: + if (alloc_required) + gfs_quota_unlock_m(ndip); + + fail_alloc: + if (alloc_required) + gfs_alloc_put(ndip); + + fail_gunlock: + gfs_glock_dq_m(num_gh, ghs); + + fail_uninit: + for (x = 0; x < num_gh; x++) + gfs_holder_uninit(&ghs[x]); + + fail: + if (dir_rename) + gfs_glock_dq_uninit(&r_gh); + + return error; +} + +/** + * gfs_readlink - Read the value of a symlink + * @dentry: the symlink + * @buf: the buffer to read the symlink data into + * @size: the size of the buffer + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_readlink(struct dentry *dentry, char *user_buf, int user_size) +{ + struct gfs_inode *ip = vn2ip(dentry->d_inode); + char array[GFS_FAST_NAME_SIZE], *buf = array; + unsigned int len = GFS_FAST_NAME_SIZE; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs_readlinki(ip, &buf, &len); + if (error) + return error; + + GFS_ASSERT_INODE(len, ip,); + + if (user_size > len - 1) + user_size = len - 1; + + if (copy_to_user(user_buf, buf, user_size)) + error = -EFAULT; + else + error = user_size; + + if (buf != array) + kfree(buf); + + return error; +} + +/** + * gfs_follow_link - Follow a symbolic link + * @dentry: The dentry of the link + * @nd: Data that we pass to vfs_follow_link() + * + * This can handle symlinks of any size. It is optimised for symlinks + * under GFS_FAST_NAME_SIZE. + * + * Returns: 0 on success or error code + */ + +static int +gfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct gfs_inode *ip = vn2ip(dentry->d_inode); + char array[GFS_FAST_NAME_SIZE], *buf = array; + unsigned int len = GFS_FAST_NAME_SIZE; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs_readlinki(ip, &buf, &len); + if (!error) { + error = vfs_follow_link(nd, buf); + if (buf != array) + kfree(buf); + } + + return error; +} + +/** + * gfs_permission - + * @inode: + * @mask: + * @nd: + * + * Returns: errno + */ + +static int +gfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct gfs_inode *ip = vn2ip(inode); + struct gfs_holder i_gh; + struct posix_acl *acl; + umode_t mode = inode->i_mode; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + return error; + + if (mask & MAY_WRITE) { + if (IS_RDONLY(inode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + error = -EROFS; + goto out; + } + if (IS_IMMUTABLE(inode)) { + error = -EACCES; + goto out; + } + } + + if (capable(CAP_DAC_OVERRIDE)) + if (!(mask & MAY_EXEC) || (mode & S_IXUGO)) + goto out; + + if (capable(CAP_DAC_READ_SEARCH) && + (mask == MAY_READ || + (!(mask & MAY_WRITE) && S_ISDIR(mode)))) + goto out; + + if (inode->i_uid == current->fsuid) { + if ((mask & (mode >> 6)) != mask) + error = -EACCES; + goto out; + } + + if ((mask & (mode >> 3)) == mask) { + error = gfs_getacl(inode, TRUE, &acl); + if (acl) { + error = posix_acl_permission(inode, acl, mask); + goto out; + } else if (error && error != -ENODATA) + goto out; + error = 0; + if (in_group_p(inode->i_gid)) { + error = 0; + goto out; + } + } else if (in_group_p(inode->i_gid)) { + error = -EACCES; + goto out; + } + + if ((mask & mode) == mask) + goto out; + + error = -EACCES; + + out: + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_setattr - Change attributes on an inode + * @dentry: The dentry which is changing + * @attr: The structure describing the change + * + * The VFS layer wants to change one or more of an inodes attributes. Write + * that change out to disk. + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_holder i_gh; + struct gfs_alloc *al; + struct buffer_head *dibh; + uint32_t ouid, ogid, nuid, ngid; + int error = 0; + + atomic_inc(&sdp->sd_ops_inode); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + error = inode_change_ok(inode, attr); + if (error) + goto fail; + + if (attr->ia_valid & ATTR_SIZE) { + error = permission(inode, MAY_WRITE, NULL); + if (error) + goto fail; + + if (attr->ia_size != ip->i_di.di_size) { + error = vmtruncate(inode, attr->ia_size); + if (error) + goto fail; + } + + error = gfs_truncatei(ip, attr->ia_size, gfs_truncator_page); + if (error) + goto fail; + + if ((sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) && + !gfs_is_jdata(ip)) + i_gh.gh_flags |= GL_SYNC; + } + + else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) { + ouid = ip->i_di.di_uid; + ogid = ip->i_di.di_gid; + nuid = attr->ia_uid; + ngid = attr->ia_gid; + + if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) + ouid = nuid = NO_QUOTA_CHANGE; + if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) + ogid = ngid = NO_QUOTA_CHANGE; + + al = gfs_alloc_get(ip); + + error = gfs_quota_lock_m(ip, nuid, ngid); + if (error) + goto fail_alloc; + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + error = gfs_quota_check(ip, nuid, ngid); + if (error) + goto fail_gunlock_q; + } + + /* Trans may require: + one dinode block and one quota change block */ + + error = gfs_trans_begin(sdp, 1, 1); + if (error) + goto fail_gunlock_q; + + error = gfs_get_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + gfs_trans_add_quota(sdp, -ip->i_di.di_blocks, + ouid, ogid); + gfs_trans_add_quota(sdp, ip->i_di.di_blocks, + nuid, ngid); + } + + inode_setattr(inode, attr); + gfs_inode_attr_out(ip); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs_trans_end(sdp); + + gfs_quota_unlock_m(ip); + gfs_alloc_put(ip); + } + + else { + /* Trans may require: + one dinode block plus changes for acl. */ + + error = gfs_trans_begin(sdp, + 1 + GFS_MAX_EA_ACL_BLKS, 0); + if (error) + goto fail; + + error = gfs_get_inode_buffer(ip, &dibh); + if (!error) { + inode_setattr(inode, attr); + gfs_inode_attr_out(ip); + + if (attr->ia_valid & ATTR_MODE) + error = gfs_acl_setattr(inode); + + gfs_trans_add_bh(ip->i_gl, dibh); + gfs_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs_trans_end(sdp); + } + + gfs_glock_dq_uninit(&i_gh); + + mark_inode_dirty(inode); + + return error; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_gunlock_q: + gfs_quota_unlock_m(ip); + + fail_alloc: + gfs_alloc_put(ip); + + fail: + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_getattr - Read out an inode's attributes + * @mnt: ? + * @dentry: The dentry to stat + * @stat: The inode's stats + * + * Returns: 0 on success, -EXXXX on failure + */ + +static int +gfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_holder gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (!error) + { + generic_fillattr(inode, stat); + gfs_glock_dq_uninit(&gh); + } + + return error; +} + +/** + * get_eatype - get the type of the ea, and trucate the type from the name + * @namep: ea name, possibly with type appended + * + * Returns: GFS_EATYPE_XXX + */ + +int +get_eatype(const char *name, char **truncated_name) +{ + int type; + + if (strncmp(name, "system.", 7) == 0) { + type = GFS_EATYPE_SYS; + *truncated_name = strchr(name, '.') + 1; + } else if (strncmp(name, "user.", 5) == 0) { + type = GFS_EATYPE_USR; + *truncated_name = strchr(name, '.') + 1; + } else { + type = GFS_EATYPE_UNUSED; + *truncated_name = NULL; + } + + return type; +} + +/** + * gfs_setxattr - Set (or create or replace) an inode's extended attribute + * @dentry: inode's dentry + * @name: name of the extended attribute + * @data: the value of the extended attribute + * @size: the size of data + * @flags: used to specify create or replace actions + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_setxattr(struct dentry *dentry, const char *name, + const void *data, size_t size, + int flags) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_easet_io req; + char *truncated_name; + int error = 0; + + atomic_inc(&sdp->sd_ops_inode); + + req.es_type = get_eatype(name, &truncated_name); + + if (req.es_type == GFS_EATYPE_UNUSED) + error = -EOPNOTSUPP; + else { + req.es_data = data; + req.es_name = truncated_name; + req.es_data_len = size; + req.es_name_len = strlen(truncated_name); + if (flags & XATTR_CREATE) + req.es_cmd = GFS_EACMD_CREATE; + else if (flags & XATTR_REPLACE) + req.es_cmd = GFS_EACMD_REPLACE; + else + req.es_cmd = GFS_EACMD_SET; + error = gfs_set_eattr(sdp, ip, &req); + } + + return error; +} + +/** + * gfs_getxattr - + * @dentry: + * @name: + * @data: + * @size: + * + * Returns: 0 on success, -EXXX on error + */ + +ssize_t +gfs_getxattr(struct dentry *dentry, const char *name, + void *data, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_eaget_io req; + char *truncated_name; + int error = 0; + + atomic_inc(&sdp->sd_ops_inode); + + req.eg_type = get_eatype(name, &truncated_name); + + if (req.eg_type == GFS_EATYPE_UNUSED) + error = -EOPNOTSUPP; + else { + req.eg_name = truncated_name; + req.eg_name_len = strlen(truncated_name); + req.eg_data = data; + req.eg_data_len = size; + req.eg_len = NULL; + error = gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy); + } + + return error; +} + +/** + * gfs_listxattr - + * @dentry: + * @buffer: + * @size: + * + * Returns: 0 on success, -EXXX on error + */ + +ssize_t +gfs_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_eaget_io req; + + atomic_inc(&sdp->sd_ops_inode); + + req.eg_type = 0; + req.eg_name = NULL; + req.eg_name_len = 0; + req.eg_data = buffer; + req.eg_data_len = size; + req.eg_len = NULL; + + return gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy); +} + +/** + * gfs_removexattr - + * @dentry: + * @name: + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + struct gfs_inode *ip = vn2ip(inode); + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_easet_io req; + char *truncated_name; + int error = 0; + + atomic_inc(&sdp->sd_ops_inode); + + req.es_type = get_eatype(name, &truncated_name); + + if (req.es_type == GFS_EATYPE_UNUSED) + error = -EOPNOTSUPP; + else { + req.es_name = truncated_name; + req.es_data = NULL; + req.es_data_len = 0; + req.es_name_len = strlen(truncated_name); + req.es_cmd = GFS_EACMD_REMOVE; + error = gfs_set_eattr(sdp, ip, &req); + } + + return error; +} + +struct inode_operations gfs_file_iops = { + .permission = gfs_permission, + .setattr = gfs_setattr, + .getattr = gfs_getattr, + .setxattr = gfs_setxattr, + .getxattr = gfs_getxattr, + .listxattr = gfs_listxattr, + .removexattr = gfs_removexattr, +}; + +struct inode_operations gfs_dev_iops = { + .permission = gfs_permission, + .setattr = gfs_setattr, + .getattr = gfs_getattr, + .setxattr = gfs_setxattr, + .getxattr = gfs_getxattr, + .listxattr = gfs_listxattr, + .removexattr = gfs_removexattr, +}; + +struct inode_operations gfs_dir_iops = { + .create = gfs_create, + .lookup = gfs_lookup, + .link = gfs_link, + .unlink = gfs_unlink, + .symlink = gfs_symlink, + .mkdir = gfs_mkdir, + .rmdir = gfs_rmdir, + .mknod = gfs_mknod, + .rename = gfs_rename, + .permission = gfs_permission, + .setattr = gfs_setattr, + .getattr = gfs_getattr, + .setxattr = gfs_setxattr, + .getxattr = gfs_getxattr, + .listxattr = gfs_listxattr, + .removexattr = gfs_removexattr, +}; + +struct inode_operations gfs_symlink_iops = { + .readlink = gfs_readlink, + .follow_link = gfs_follow_link, + .permission = gfs_permission, + .setattr = gfs_setattr, + .getattr = gfs_getattr, + .setxattr = gfs_setxattr, + .getxattr = gfs_getxattr, + .listxattr = gfs_listxattr, + .removexattr = gfs_removexattr, +}; + diff -urN linux-orig/fs/gfs/ops_inode.h linux-patched/fs/gfs/ops_inode.h --- linux-orig/fs/gfs/ops_inode.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_inode.h 2004-06-30 13:27:49.354708578 -0500 @@ -0,0 +1,22 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_INODE_DOT_H__ +#define __OPS_INODE_DOT_H__ + +extern struct inode_operations gfs_file_iops; +extern struct inode_operations gfs_dir_iops; +extern struct inode_operations gfs_symlink_iops; +extern struct inode_operations gfs_dev_iops; + +#endif /* __OPS_INODE_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_super.c linux-patched/fs/gfs/ops_super.c --- linux-orig/fs/gfs/ops_super.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_super.c 2004-06-30 13:27:49.354708578 -0500 @@ -0,0 +1,416 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "inode.h" +#include "locking.h" +#include "log.h" +#include "ops_super.h" +#include "page.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" + +/** + * gfs_write_inode - Make sure the inode is stable on the disk + * @inode: The inode + * @sync: synchronous write flag + * + */ + +static void +gfs_write_inode(struct inode *inode, int sync) +{ + struct gfs_inode *ip = vn2ip(inode); + + atomic_inc(&ip->i_sbd->sd_ops_super); + + if (ip && sync && !gfs_in_panic) + gfs_log_flush_glock(ip->i_gl); +} + +/** + * gfs_put_inode - put an inode + * @inode: The inode + * + * If i_nlink is zero, any dirty data for the inode is thrown away. + * If a process on another machine has the file open, it may need that + * data. So, sync it out. + */ + +static void +gfs_put_inode(struct inode *inode) +{ + struct gfs_sbd *sdp = vfs2sdp(inode->i_sb); + struct gfs_inode *ip = vn2ip(inode); + + atomic_inc(&sdp->sd_ops_super); + + if (ip && + !inode->i_nlink && + S_ISREG(inode->i_mode) && + !sdp->sd_args.ar_localcaching) + gfs_sync_page_i(inode, DIO_START | DIO_WAIT); +} + +/** + * gfs_put_super - Unmount the filesystem + * @sb: The VFS superblock + * + */ + +static void +gfs_put_super(struct super_block *sb) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + int error; + + atomic_inc(&sdp->sd_ops_super); + + /* Unfreeze the filesystem, if we need to */ + + down(&sdp->sd_freeze_lock); + if (sdp->sd_freeze_count) + gfs_glock_dq_uninit(&sdp->sd_freeze_gh); + up(&sdp->sd_freeze_lock); + + /* Kill off the inode thread */ + down(&sdp->sd_thread_lock); + clear_bit(SDF_INODED_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_inoded_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + /* Kill off the quota thread */ + down(&sdp->sd_thread_lock); + clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_quotad_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + /* Kill off the log thread */ + down(&sdp->sd_thread_lock); + clear_bit(SDF_LOGD_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_logd_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + /* Kill off the recoverd thread */ + down(&sdp->sd_thread_lock); + clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_recoverd_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + /* Kill off the glockd threads */ + clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags); + wake_up(&sdp->sd_reclaim_wchan); + while (sdp->sd_glockd_num--) + wait_for_completion(&sdp->sd_thread_completion); + + /* Kill off the scand thread */ + down(&sdp->sd_thread_lock); + clear_bit(SDF_SCAND_RUN, &sdp->sd_flags); + wake_up_process(sdp->sd_scand_process); + up(&sdp->sd_thread_lock); + wait_for_completion(&sdp->sd_thread_completion); + + if (!test_bit(SDF_ROFS, &sdp->sd_flags)) { + gfs_log_flush(sdp); + gfs_quota_sync(sdp); + gfs_quota_sync(sdp); + + error = gfs_make_fs_ro(sdp); + if (error) + gfs_io_error(sdp); + } + + /* At this point, we're through modifying the disk */ + + /* Release stuff */ + + gfs_inode_put(sdp->sd_riinode); + gfs_inode_put(sdp->sd_jiinode); + gfs_inode_put(sdp->sd_rooti); + gfs_inode_put(sdp->sd_qinode); + gfs_inode_put(sdp->sd_linode); + + gfs_glock_put(sdp->sd_trans_gl); + gfs_glock_put(sdp->sd_rename_gl); + + gfs_glock_dq_uninit(&sdp->sd_journal_gh); + + gfs_glock_dq_uninit(&sdp->sd_live_gh); + + /* Get rid of rgrp bitmap structures */ + gfs_clear_rgrpd(sdp); + gfs_clear_journals(sdp); + + /* Take apart glock structures and buffer lists */ + gfs_gl_hash_clear(sdp, TRUE); + + /* Unmount the locking protocol */ + gfs_unmount_lockproto(sdp); + + /* At this point, we're through participating in the lockspace */ + + gfs_clear_dirty_j(sdp); + + /* Get rid of any extra inodes */ + while (invalidate_inodes(sb)) + yield(); + + vfree(sdp); + + vfs2sdp(sb) = NULL; +} + +/** + * gfs_write_super - disk commit all incore transactions + * @sb: the filesystem + * + * This function is called every time sync(2) is called. + * After this exits, all dirty buffers and synced. + */ + +static void +gfs_write_super(struct super_block *sb) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + + atomic_inc(&sdp->sd_ops_super); + + if (!gfs_in_panic) + gfs_log_flush(sdp); +} + +/** + * gfs_write_super_lockfs - prevent further writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static void +gfs_write_super_lockfs(struct super_block *sb) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + int error; + + atomic_inc(&sdp->sd_ops_super); + + for (;;) { + error = gfs_freeze_fs(sdp); + if (!error) + break; + + switch (error) { + case -EBUSY: + printk("GFS: fsid=%s: waiting for recovery before freeze\n", + sdp->sd_fsname); + break; + + default: + printk("GFS: fsid=%s: error freezing FS: %d\n", + sdp->sd_fsname, error); + break; + } + + printk("GFS: fsid=%s: retrying...\n", sdp->sd_fsname); + + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + } +} + +/** + * gfs_unlockfs - reallow writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static void +gfs_unlockfs(struct super_block *sb) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + + atomic_inc(&sdp->sd_ops_super); + + gfs_unfreeze_fs(sdp); +} + +/** + * gfs_statfs - Gather and return stats about the filesystem + * @sb: The superblock + * @statfsbuf: The buffer + * + * Returns: 0 on success or error code + */ + +static int +gfs_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + struct gfs_usage usage; + int error; + + atomic_inc(&sdp->sd_ops_super); + + error = gfs_stat_gfs(sdp, &usage, TRUE); + if (error) + return error; + + memset(buf, 0, sizeof(struct kstatfs)); + + buf->f_type = GFS_MAGIC; + buf->f_bsize = usage.gu_block_size; + buf->f_blocks = usage.gu_total_blocks; + buf->f_bfree = usage.gu_free + usage.gu_free_dinode + usage.gu_free_meta; + buf->f_bavail = usage.gu_free + usage.gu_free_dinode + usage.gu_free_meta; + buf->f_files = usage.gu_used_dinode + usage.gu_free_dinode + usage.gu_free_meta + usage.gu_free; + buf->f_ffree = usage.gu_free_dinode + usage.gu_free_meta + usage.gu_free; + buf->f_namelen = GFS_FNAMESIZE; + + return 0; +} + +/** + * gfs_remount_fs - called when the FS is remounted + * @sb: the filesystem + * @flags: the remount flags + * @data: extra data passed in (not used right now) + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +gfs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct gfs_sbd *sdp = vfs2sdp(sb); + int error = 0; + + atomic_inc(&sdp->sd_ops_super); + + if (*flags & (MS_NOATIME | MS_NODIRATIME)) + set_bit(SDF_NOATIME, &sdp->sd_flags); + else + clear_bit(SDF_NOATIME, &sdp->sd_flags); + + if (*flags & MS_RDONLY) { + if (!test_bit(SDF_ROFS, &sdp->sd_flags)) + error = gfs_make_fs_ro(sdp); + } else if (!(*flags & MS_RDONLY) && + test_bit(SDF_ROFS, &sdp->sd_flags)) { + error = gfs_make_fs_rw(sdp); + } + + /* Don't let the VFS update atimes. */ + *flags |= MS_NOATIME | MS_NODIRATIME; + + return error; +} + +/** + * gfs_clear_inode - Deallocate an inode when VFS is done with it + * @inode: The VFS inode + * + */ + +static void +gfs_clear_inode(struct inode *inode) +{ + struct gfs_inode *ip = vn2ip(inode); + + atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_super); + + if (ip) { + spin_lock(&ip->i_lock); + ip->i_vnode = NULL; + vn2ip(inode) = NULL; + spin_unlock(&ip->i_lock); + + gfs_glock_schedule_for_reclaim(ip->i_gl); + gfs_inode_put(ip); + } +} + +/** + * gfs_show_options - Show mount options for /proc/mounts + * @s: seq_file structure + * @mnt: vfsmount + * + * Returns: 0 on success or error code + */ + +static int +gfs_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct gfs_sbd *sdp = vfs2sdp(mnt->mnt_sb); + struct gfs_args *args = &sdp->sd_args; + + atomic_inc(&sdp->sd_ops_super); + + if (args->ar_lockproto[0]) { + seq_printf(s, ",lockproto="); + seq_puts(s, args->ar_lockproto); + } + if (args->ar_locktable[0]) { + seq_printf(s, ",locktable="); + seq_puts(s, args->ar_locktable); + } + if (args->ar_hostdata[0]) { + seq_printf(s, ",hostdata="); + seq_puts(s, args->ar_hostdata); + } + if (args->ar_ignore_local_fs) + seq_printf(s, ",ignore_local_fs"); + if (args->ar_localflocks) + seq_printf(s, ",localflocks"); + if (args->ar_localcaching) + seq_printf(s, ",localcaching"); + if (args->ar_upgrade) + seq_printf(s, ",upgrade"); + if (args->ar_num_glockd != GFS_GLOCKD_DEFAULT) + seq_printf(s, ",num_glockd=%u", args->ar_num_glockd); + if (args->ar_posixacls) + seq_printf(s, ",acl"); + + return 0; +} + +struct super_operations gfs_super_ops = { + .write_inode = gfs_write_inode, + .put_inode = gfs_put_inode, + .put_super = gfs_put_super, + .write_super = gfs_write_super, + .write_super_lockfs = gfs_write_super_lockfs, + .unlockfs = gfs_unlockfs, + .statfs = gfs_statfs, + .remount_fs = gfs_remount_fs, + .clear_inode = gfs_clear_inode, + .show_options = gfs_show_options, +}; diff -urN linux-orig/fs/gfs/ops_super.h linux-patched/fs/gfs/ops_super.h --- linux-orig/fs/gfs/ops_super.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_super.h 2004-06-30 13:27:49.354708578 -0500 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_SUPER_DOT_H__ +#define __OPS_SUPER_DOT_H__ + +extern struct super_operations gfs_super_ops; + +#endif /* __OPS_SUPER_DOT_H__ */ diff -urN linux-orig/fs/gfs/ops_vm.c linux-patched/fs/gfs/ops_vm.c --- linux-orig/fs/gfs/ops_vm.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_vm.c 2004-06-30 13:27:49.355708346 -0500 @@ -0,0 +1,212 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "ops_vm.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +/** + * gfs_private_nopage - + * @area: + * @address: + * @type: + * + * Returns: the page + */ + +static struct page * +gfs_private_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct gfs_inode *ip = vn2ip(area->vm_file->f_mapping->host); + struct gfs_holder i_gh; + struct page *result; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_vm); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + if (error) + return NULL; + + set_bit(GIF_PAGED, &ip->i_flags); + + result = filemap_nopage(area, address, type); + + gfs_glock_dq_uninit(&i_gh); + + return result; +} + +/** + * alloc_page_backing - + * @ip: + * @index: + * + * Returns: errno + */ + +static int +alloc_page_backing(struct gfs_inode *ip, unsigned long index) +{ + struct gfs_sbd *sdp = ip->i_sbd; + uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift); + unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; + struct gfs_alloc *al; + unsigned int x; + int error; + + al = gfs_alloc_get(ip); + + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + gfs_write_calc_reserv(ip, PAGE_CACHE_SIZE, + &al->al_requested_data, &al->al_requested_meta); + + error = gfs_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + /* Trans may require: + a dinode block, RG bitmaps to allocate from, + indirect blocks, and a quota block */ + + error = gfs_trans_begin(sdp, + 1 + al->al_rgd->rd_ri.ri_length + + al->al_requested_meta, 1); + if (error) + goto out_ipres; + + if (gfs_is_stuffed(ip)) { + error = gfs_unstuff_dinode(ip, gfs_unstuffer_page, NULL); + if (error) + goto out_trans; + } + + for (x = 0; x < blocks; ) { + uint64_t dblock; + unsigned int extlen; + int new = TRUE; + + error = gfs_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + goto out_trans; + GFS_ASSERT_INODE(dblock, ip,); + + lblock += extlen; + x += extlen; + } + + GFS_ASSERT_INODE(al->al_alloced_meta || al->al_alloced_data, ip,); + + out_trans: + gfs_trans_end(sdp); + + out_ipres: + gfs_inplace_release(ip); + + out_gunlock_q: + gfs_quota_unlock_m(ip); + + out: + gfs_alloc_put(ip); + + return error; +} + +/** + * gfs_sharewrite_nopage - + * @area: + * @address: + * @type: + * + * Returns: the page + */ + +static struct page * +gfs_sharewrite_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct gfs_inode *ip = vn2ip(area->vm_file->f_mapping->host); + struct gfs_holder i_gh; + struct page *result = NULL; + unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + int alloc_required; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_vm); + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return NULL; + + if (gfs_is_jdata(ip)) + goto out; + + set_bit(GIF_PAGED, &ip->i_flags); + set_bit(GIF_SW_PAGED, &ip->i_flags); + + error = gfs_write_alloc_required(ip, (uint64_t)index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &alloc_required); + if (error) + goto out; + + result = filemap_nopage(area, address, type); + if (!result || result == NOPAGE_OOM) + goto out; + + if (alloc_required) { + error = alloc_page_backing(ip, index); + if (error) { + page_cache_release(result); + result = NULL; + } + set_page_dirty(result); + } + + out: + gfs_glock_dq_uninit(&i_gh); + + return result; +} + +struct vm_operations_struct gfs_vm_ops_private = { + .nopage = gfs_private_nopage, +}; + +struct vm_operations_struct gfs_vm_ops_sharewrite = { + .nopage = gfs_sharewrite_nopage, +}; + diff -urN linux-orig/fs/gfs/ops_vm.h linux-patched/fs/gfs/ops_vm.h --- linux-orig/fs/gfs/ops_vm.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/ops_vm.h 2004-06-30 13:27:49.355708346 -0500 @@ -0,0 +1,20 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_VM_DOT_H__ +#define __OPS_VM_DOT_H__ + +extern struct vm_operations_struct gfs_vm_ops_private; +extern struct vm_operations_struct gfs_vm_ops_sharewrite; + +#endif /* __OPS_VM_DOT_H__ */ diff -urN linux-orig/fs/gfs/page.c linux-patched/fs/gfs/page.c --- linux-orig/fs/gfs/page.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/page.c 2004-06-30 13:27:49.355708346 -0500 @@ -0,0 +1,276 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "inode.h" +#include "page.h" + +/** + * gfs_inval_pte - Sync and invalidate all PTEs associated with a glock + * @gl: the glock + * + */ + +void +gfs_inval_pte(struct gfs_glock *gl) +{ + struct gfs_inode *ip; + struct inode *inode; + + ip = gl2ip(gl); + if (!ip || + ip->i_di.di_type != GFS_FILE_REG) + return; + + if (!test_bit(GIF_PAGED, &ip->i_flags)) + return; + + inode = gfs_iget(ip, NO_CREATE); + if (inode) { + unmap_shared_mapping_range(inode->i_mapping, 0, 0); + iput(inode); + + if (test_bit(GIF_SW_PAGED, &ip->i_flags)) + set_bit(GLF_DIRTY, &gl->gl_flags); + } + + clear_bit(GIF_SW_PAGED, &ip->i_flags); +} + +/** + * gfs_inval_page - Invalidate all pages associated with a glock + * @gl: the glock + * + */ + +void +gfs_inval_page(struct gfs_glock *gl) +{ + struct gfs_inode *ip; + struct inode *inode; + + ip = gl2ip(gl); + if (!ip || + ip->i_di.di_type != GFS_FILE_REG) + return; + + inode = gfs_iget(ip, NO_CREATE); + if (inode) { + struct address_space *mapping = inode->i_mapping; + + truncate_inode_pages(mapping, 0); + GFS_ASSERT_INODE(!mapping->nrpages, ip,); + + iput(inode); + } + + clear_bit(GIF_PAGED, &ip->i_flags); +} + +/** + * gfs_sync_page_i - Sync the pages for a struct inode + * @inode: the inode + * @flags: DIO_START | DIO_WAIT + * + */ + +void +gfs_sync_page_i(struct inode *inode, int flags) +{ + struct address_space *mapping = inode->i_mapping; + int error = 0; + + if (flags & DIO_START) + error = filemap_fdatawrite(mapping); + if (!error && (flags & DIO_WAIT)) + filemap_fdatawait(mapping); + + if (error) + gfs_io_error_inode(vn2ip(inode)); +} + +/** + * gfs_sync_page - sync the pages associated with a glock + * @gl: the glock + * @flags: DIO_START | DIO_WAIT + * + */ + +void +gfs_sync_page(struct gfs_glock *gl, int flags) +{ + struct gfs_inode *ip; + struct inode *inode; + + ip = gl2ip(gl); + if (!ip || + ip->i_di.di_type != GFS_FILE_REG) + return; + + inode = gfs_iget(ip, NO_CREATE); + if (inode) { + gfs_sync_page_i(inode, flags); + iput(inode); + } +} + +/** + * gfs_unstuffer_page - unstuff a stuffed inode into a block cached by a page + * @ip: the inode + * @dibh: the dinode buffer + * @block: the block number that was allocated + * @private: any locked page held by the caller process + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_unstuffer_page(struct gfs_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct inode *inode = ip->i_vnode; + struct page *page = (struct page *)private; + struct buffer_head *bh; + int release = FALSE; + + if (!page || page->index) { + RETRY_MALLOC(page = grab_cache_page(inode->i_mapping, 0), page); + release = TRUE; + } + + GFS_ASSERT_INODE(PageLocked(page), ip,); + + if (!PageUptodate(page)) { + void *kaddr = kmap(page); + + memcpy(kaddr, + dibh->b_data + sizeof(struct gfs_dinode), + ip->i_di.di_size); + memset(kaddr + ip->i_di.di_size, + 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + SetPageUptodate(page); + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + bh = page_buffers(page); + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, block); + else + GFS_ASSERT_INODE(bh->b_bdev == inode->i_sb->s_bdev && + bh->b_blocknr == block, + ip,); + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + + if (release) { + unlock_page(page); + page_cache_release(page); + } + + return 0; +} + +/** + * gfs_truncator_page - truncate a partial data block in the page cache + * @ip: the inode + * @size: the size the file should be + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_truncator_page(struct gfs_inode *ip, uint64_t size) +{ + struct inode *inode = ip->i_vnode; + struct page *page; + struct buffer_head *bh; + void *kaddr; + uint64_t lbn, dbn; + unsigned long index; + unsigned int offset; + unsigned int bufnum; + int not_new = 0; + int error; + + lbn = size >> inode->i_blkbits; + error = gfs_block_map(ip, + lbn, ¬_new, + &dbn, NULL); + if (error || !dbn) + return error; + + index = size >> PAGE_CACHE_SHIFT; + offset = size & (PAGE_CACHE_SIZE - 1); + bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits)); + + /* Not in a transaction here -- a non-disk-I/O error is ok. */ + + page = read_cache_page(inode->i_mapping, index, + (filler_t *)inode->i_mapping->a_ops->readpage, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + + if (!PageUptodate(page) || PageError(page)) { + error = -EIO; + goto out; + } + + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + kunmap(page); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) + /* Do nothing */; + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, dbn); + else + GFS_ASSERT_INODE(bh->b_bdev == inode->i_sb->s_bdev && + bh->b_blocknr == dbn, + ip,); + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + + out: + unlock_page(page); + page_cache_release(page); + + return error; +} diff -urN linux-orig/fs/gfs/page.h linux-patched/fs/gfs/page.h --- linux-orig/fs/gfs/page.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/page.h 2004-06-30 13:27:49.355708346 -0500 @@ -0,0 +1,26 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __PAGE_DOT_H__ +#define __PAGE_DOT_H__ + +void gfs_inval_pte(struct gfs_glock *gl); +void gfs_inval_page(struct gfs_glock *gl); +void gfs_sync_page_i(struct inode *inode, int flags); +void gfs_sync_page(struct gfs_glock *gl, int flags); + +int gfs_unstuffer_page(struct gfs_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); +int gfs_truncator_page(struct gfs_inode *ip, uint64_t size); + +#endif /* __PAGE_DOT_H__ */ diff -urN linux-orig/fs/gfs/quota.c linux-patched/fs/gfs/quota.c --- linux-orig/fs/gfs/quota.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/quota.c 2004-06-30 13:27:49.356708115 -0500 @@ -0,0 +1,1146 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bmap.h" +#include "file.h" +#include "glock.h" +#include "glops.h" +#include "log.h" +#include "quota.h" +#include "rgrp.h" +#include "super.h" +#include "trans.h" + +/** + * gfs_quota_get - Get a structure to represent a quota change + * @sdp: the filesystem + * @user: TRUE if this is a user quota + * @id: the uid or gid + * @create: if TRUE, create the structure, otherwise return NULL + * @qdp: the returned quota structure + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_get(struct gfs_sbd *sdp, int user, uint32_t id, int create, + struct gfs_quota_data **qdp) +{ + struct gfs_quota_data *qd = NULL, *new_qd = NULL; + struct list_head *tmp, *head; + int error = 0; + + for (;;) { + spin_lock(&sdp->sd_quota_lock); + + for (head = &sdp->sd_quota_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + qd = list_entry(tmp, struct gfs_quota_data, qd_list); + if (qd->qd_id == id && + !test_bit(QDF_USER, &qd->qd_flags) == !user) { + qd->qd_count++; + break; + } + } + + if (tmp == head) + qd = NULL; + + if (!qd && new_qd) { + qd = new_qd; + list_add(&qd->qd_list, &sdp->sd_quota_list); + new_qd = NULL; + } + + spin_unlock(&sdp->sd_quota_lock); + + if (qd || !create) { + if (new_qd) { + gfs_lvb_unhold(new_qd->qd_gl); + kfree(new_qd); + atomic_dec(&sdp->sd_quota_count); + } + goto out; + } + + new_qd = gmalloc(sizeof(struct gfs_quota_data)); + memset(new_qd, 0, sizeof(struct gfs_quota_data)); + + new_qd->qd_count = 1; + + new_qd->qd_id = id; + if (user) + set_bit(QDF_USER, &new_qd->qd_flags); + + INIT_LIST_HEAD(&new_qd->qd_le_list); + + error = gfs_glock_get(sdp, 2 * (uint64_t)id + ((user) ? 0 : 1), + &gfs_quota_glops, CREATE, + &new_qd->qd_gl); + if (error) { + kfree(new_qd); + goto out; + } + + error = gfs_lvb_hold(new_qd->qd_gl); + + gfs_glock_put(new_qd->qd_gl); + + if (error) { + kfree(new_qd); + goto out; + } + + atomic_inc(&sdp->sd_quota_count); + } + + out: + *qdp = qd; + + return error; +} + +/** + * gfs_quota_hold - increment the usage count on a struct gfs_quota_data + * @sdp: the filesystem + * @qd: the structure + * + */ + +void +gfs_quota_hold(struct gfs_sbd *sdp, struct gfs_quota_data *qd) +{ + spin_lock(&sdp->sd_quota_lock); + qd->qd_count++; + spin_unlock(&sdp->sd_quota_lock); +} + +/** + * gfs_quota_put - decrement the usage count on a struct gfs_quota_data + * @sdp: the filesystem + * @qd: the structure + * + * Free the structure if its reference count hits zero. + * + */ + +void +gfs_quota_put(struct gfs_sbd *sdp, struct gfs_quota_data *qd) +{ + spin_lock(&sdp->sd_quota_lock); + GFS_ASSERT_SBD(qd->qd_count, sdp,); + qd->qd_count--; + spin_unlock(&sdp->sd_quota_lock); +} + +/** + * quota_find - Find a quota change to sync to the quota file + * @sdp: the filesystem + * + * The returned structure is locked and needs to be unlocked + * with quota_unlock(). + * + * Returns: A quota structure, or NULL + */ + +static struct gfs_quota_data * +quota_find(struct gfs_sbd *sdp) +{ + struct list_head *tmp, *head; + struct gfs_quota_data *qd = NULL; + + if (test_bit(SDF_ROFS, &sdp->sd_flags)) + return NULL; + + gfs_log_lock(sdp); + spin_lock(&sdp->sd_quota_lock); + + if (!atomic_read(&sdp->sd_quota_od_count)) + goto out; + + for (head = &sdp->sd_quota_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + qd = list_entry(tmp, struct gfs_quota_data, qd_list); + + if (test_bit(QDF_LOCK, &qd->qd_flags)) + continue; + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) + continue; + if (qd->qd_sync_gen >= sdp->sd_quota_sync_gen) + continue; + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + + set_bit(QDF_LOCK, &qd->qd_flags); + qd->qd_count++; + qd->qd_change_sync = qd->qd_change_od; + + goto out; + } + + qd = NULL; + + out: + spin_unlock(&sdp->sd_quota_lock); + gfs_log_unlock(sdp); + + return qd; +} + +/** + * quota_trylock - Try to lock a given quota entry + * @sdp: the filesystem + * @qd: the quota data structure + * + * Returns: TRUE if the lock was successful, FALSE, otherwise + */ + +static int +quota_trylock(struct gfs_sbd *sdp, struct gfs_quota_data *qd) +{ + int ret = FALSE; + + if (test_bit(SDF_ROFS, &sdp->sd_flags)) + return FALSE; + + gfs_log_lock(sdp); + spin_lock(&sdp->sd_quota_lock); + + if (test_bit(QDF_LOCK, &qd->qd_flags)) + goto out; + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) + goto out; + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + + set_bit(QDF_LOCK, &qd->qd_flags); + qd->qd_count++; + qd->qd_change_sync = qd->qd_change_od; + + ret = TRUE; + + out: + spin_unlock(&sdp->sd_quota_lock); + gfs_log_unlock(sdp); + + return ret; +} + +/** + * quota_unlock - drop and a reference on a quota structure + * @sdp: the filesystem + * @qd: the quota inode structure + * + */ + +static void +quota_unlock(struct gfs_sbd *sdp, struct gfs_quota_data *qd) +{ + spin_lock(&sdp->sd_quota_lock); + + GFS_ASSERT_SBD(test_bit(QDF_LOCK, &qd->qd_flags), sdp,); + clear_bit(QDF_LOCK, &qd->qd_flags); + + GFS_ASSERT_SBD(qd->qd_count, sdp,); + qd->qd_count--; + + spin_unlock(&sdp->sd_quota_lock); +} + +/** + * gfs_quota_merge - add/remove a quota change from the in-memory list + * @sdp: the filesystem + * @tag: the quota change tag + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_merge(struct gfs_sbd *sdp, struct gfs_quota_tag *tag) +{ + struct gfs_quota_data *qd; + int error; + + error = gfs_quota_get(sdp, + tag->qt_flags & GFS_QTF_USER, tag->qt_id, + CREATE, &qd); + if (error) + return error; + + GFS_ASSERT_SBD(qd->qd_change_ic == qd->qd_change_od, sdp,); + + gfs_log_lock(sdp); + + qd->qd_change_ic += tag->qt_change; + qd->qd_change_od += tag->qt_change; + + if (qd->qd_change_od) { + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) { + gfs_quota_hold(sdp, qd); + set_bit(QDF_OD_LIST, &qd->qd_flags); + atomic_inc(&sdp->sd_quota_od_count); + } + } else { + GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags), sdp,); + clear_bit(QDF_OD_LIST, &qd->qd_flags); + gfs_quota_put(sdp, qd); + GFS_ASSERT_SBD(atomic_read(&sdp->sd_quota_od_count), sdp,); + atomic_dec(&sdp->sd_quota_od_count); + } + + gfs_log_unlock(sdp); + + gfs_quota_put(sdp, qd); + + return 0; +} + +/** + * gfs_quota_scan - Look for unused struct gfs_quota_data structures to throw away + * @sdp: the filesystem + * + */ + +void +gfs_quota_scan(struct gfs_sbd *sdp) +{ + struct list_head *head, *tmp, *next; + struct gfs_quota_data *qd; + LIST_HEAD(dead); + + spin_lock(&sdp->sd_quota_lock); + + for (head = &sdp->sd_quota_list, tmp = head->next, next = tmp->next; + tmp != head; + tmp = next, next = next->next) { + qd = list_entry(tmp, struct gfs_quota_data, qd_list); + if (!qd->qd_count) + list_move(&qd->qd_list, &dead); + } + + spin_unlock(&sdp->sd_quota_lock); + + while (!list_empty(&dead)) { + qd = list_entry(dead.next, struct gfs_quota_data, qd_list); + + GFS_ASSERT_SBD(!qd->qd_count, sdp,); + GFS_ASSERT_SBD(!test_bit(QDF_OD_LIST, &qd->qd_flags) && + !test_bit(QDF_LOCK, &qd->qd_flags), sdp,); + GFS_ASSERT_SBD(!qd->qd_change_new && !qd->qd_change_ic && + !qd->qd_change_od, sdp,); + + list_del(&qd->qd_list); + gfs_lvb_unhold(qd->qd_gl); + kfree(qd); + atomic_dec(&sdp->sd_quota_count); + } +} + +/** + * gfs_quota_cleanup - get rid of any extra struct gfs_quota_data structures + * @sdp: the filesystem + * + */ + +void +gfs_quota_cleanup(struct gfs_sbd *sdp) +{ + struct gfs_quota_data *qd; + + restart: + gfs_log_lock(sdp); + + spin_lock(&sdp->sd_quota_lock); + + while (!list_empty(&sdp->sd_quota_list)) { + qd = list_entry(sdp->sd_quota_list.next, + struct gfs_quota_data, + qd_list); + + if (qd->qd_count > 1) { + spin_unlock(&sdp->sd_quota_lock); + gfs_log_unlock(sdp); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto restart; + + } else if (qd->qd_count) { + GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags) && + !test_bit(QDF_LOCK, &qd->qd_flags), + sdp,); + GFS_ASSERT_SBD(qd->qd_change_od && + qd->qd_change_od == qd->qd_change_ic, + sdp,); + GFS_ASSERT_SBD(!qd->qd_change_new, sdp,); + + list_del(&qd->qd_list); + atomic_dec(&sdp->sd_quota_od_count); + + spin_unlock(&sdp->sd_quota_lock); + gfs_lvb_unhold(qd->qd_gl); + kfree(qd); + atomic_dec(&sdp->sd_quota_count); + spin_lock(&sdp->sd_quota_lock); + + } else { + GFS_ASSERT_SBD(!test_bit(QDF_OD_LIST, &qd->qd_flags) && + !test_bit(QDF_LOCK, &qd->qd_flags), sdp,); + GFS_ASSERT_SBD(!qd->qd_change_new && + !qd->qd_change_ic && + !qd->qd_change_od, sdp,); + + list_del(&qd->qd_list); + + spin_unlock(&sdp->sd_quota_lock); + gfs_lvb_unhold(qd->qd_gl); + kfree(qd); + atomic_dec(&sdp->sd_quota_count); + spin_lock(&sdp->sd_quota_lock); + } + } + + spin_unlock(&sdp->sd_quota_lock); + + GFS_ASSERT_SBD(!atomic_read(&sdp->sd_quota_od_count), sdp,); + + gfs_log_unlock(sdp); +} + +/** + * sort_qd - figure out the order between two quota data structures + * @a: first quota data structure + * @b: second quota data structure + * + * Returns: -1 if @a comes before @b, 0 if @a equals @b, 1 if @b comes before @a + */ + +static int +sort_qd(const void *a, const void *b) +{ + struct gfs_quota_data *qd_a = *(struct gfs_quota_data **)a; + struct gfs_quota_data *qd_b = *(struct gfs_quota_data **)b; + int ret = 0; + + if (!test_bit(QDF_USER, &qd_a->qd_flags) != + !test_bit(QDF_USER, &qd_b->qd_flags)) { + if (test_bit(QDF_USER, &qd_a->qd_flags)) + ret = -1; + else + ret = 1; + } else { + if (qd_a->qd_id < qd_b->qd_id) + ret = -1; + else if (qd_a->qd_id > qd_b->qd_id) + ret = 1; + } + + return ret; +} + +/** + * do_quota_sync - Sync a bunch quota changes to the quota file + * @sdp: the filesystem + * @qda: an array of struct gfs_quota_data structures to be synced + * @num_qd: the number of elements in @qda + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +do_quota_sync(struct gfs_sbd *sdp, struct gfs_quota_data **qda, + unsigned int num_qd) +{ + struct gfs_inode *ip = sdp->sd_qinode; + struct gfs_alloc *al = NULL; + struct gfs_holder i_gh, *ghs; + struct gfs_quota q; + char buf[sizeof(struct gfs_quota)]; + uint64_t offset; + unsigned int qx, x; + int ar; + unsigned int nalloc = 0; + unsigned int data_blocks, ind_blocks; + int error; + + gfs_write_calc_reserv(ip, sizeof(struct gfs_quota), &data_blocks, + &ind_blocks); + + ghs = gmalloc(num_qd * sizeof(struct gfs_holder)); + + gfs_sort(qda, num_qd, sizeof (struct gfs_quota_data *), sort_qd); + for (qx = 0; qx < num_qd; qx++) { + error = gfs_glock_nq_init(qda[qx]->qd_gl, + LM_ST_EXCLUSIVE, + GL_NOCACHE, &ghs[qx]); + if (error) + goto fail; + } + + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + goto fail; + + for (x = 0; x < num_qd; x++) { + offset = (2 * (uint64_t)qda[x]->qd_id + + ((test_bit(QDF_USER, &qda[x]->qd_flags)) ? 0 : 1)) * + sizeof(struct gfs_quota); + + error = gfs_write_alloc_required(ip, offset, + sizeof(struct gfs_quota), + &ar); + if (error) + goto fail_gunlock; + + if (ar) + nalloc++; + } + + if (nalloc) { + al = gfs_alloc_get(ip); + + error = + gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, + NO_QUOTA_CHANGE); + if (error) + goto fail_alloc; + + al->al_requested_meta = nalloc * (data_blocks + ind_blocks); + + error = gfs_inplace_reserve(ip); + if (error) + goto fail_qs; + + /* Trans may require: + two (journaled) data blocks, a dinode block, RG bitmaps to allocate from, + indirect blocks, and a quota block */ + + error = gfs_trans_begin(sdp, + 1 + al->al_rgd->rd_ri.ri_length + + num_qd * data_blocks + + nalloc * ind_blocks, + gfs_struct2blk(sdp, num_qd + 2, + sizeof(struct gfs_quota_tag))); + if (error) + goto fail_ipres; + } else { + /* Trans may require: + Data blocks, a dinode block, and quota blocks */ + + error = gfs_trans_begin(sdp, + 1 + data_blocks * num_qd, + gfs_struct2blk(sdp, num_qd, + sizeof(struct gfs_quota_tag))); + if (error) + goto fail_gunlock; + } + + for (x = 0; x < num_qd; x++) { + offset = (2 * (uint64_t)qda[x]->qd_id + + ((test_bit(QDF_USER, &qda[x]->qd_flags)) ? 0 : 1)) * + sizeof(struct gfs_quota); + + /* The quota file may not be a multiple of sizeof(struct gfs_quota) bytes. */ + memset(buf, 0, sizeof(struct gfs_quota)); + + error = gfs_internal_read(ip, buf, offset, + sizeof(struct gfs_quota)); + if (error < 0) + goto fail_end_trans; + + gfs_quota_in(&q, buf); + q.qu_value += qda[x]->qd_change_sync; + gfs_quota_out(&q, buf); + + error = gfs_internal_write(ip, buf, offset, + sizeof(struct gfs_quota)); + if (error < 0) + goto fail_end_trans; + else if (error != sizeof(struct gfs_quota)) { + error = -EIO; + goto fail_end_trans; + } + + if (test_bit(QDF_USER, &qda[x]->qd_flags)) + gfs_trans_add_quota(sdp, -qda[x]->qd_change_sync, + qda[x]->qd_id, NO_QUOTA_CHANGE); + else + gfs_trans_add_quota(sdp, -qda[x]->qd_change_sync, + NO_QUOTA_CHANGE, qda[x]->qd_id); + + memset(&qda[x]->qd_qb, 0, sizeof(struct gfs_quota_lvb)); + qda[x]->qd_qb.qb_magic = GFS_MAGIC; + qda[x]->qd_qb.qb_limit = q.qu_limit; + qda[x]->qd_qb.qb_warn = q.qu_warn; + qda[x]->qd_qb.qb_value = q.qu_value; + + gfs_quota_lvb_out(&qda[x]->qd_qb, qda[x]->qd_gl->gl_lvb); + clear_bit(GLF_LVB_INVALID, &qda[x]->qd_gl->gl_flags); + } + + gfs_trans_end(sdp); + + if (nalloc) { + GFS_ASSERT_SBD(al->al_alloced_meta, sdp,); + gfs_inplace_release(ip); + gfs_quota_unhold_m(ip); + gfs_alloc_put(ip); + } + + gfs_glock_dq_uninit(&i_gh); + + for (x = 0; x < num_qd; x++) + gfs_glock_dq_uninit(&ghs[x]); + + kfree(ghs); + + gfs_log_flush_glock(ip->i_gl); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_ipres: + if (nalloc) + gfs_inplace_release(ip); + + fail_qs: + if (nalloc) + gfs_quota_unhold_m(ip); + + fail_alloc: + if (nalloc) + gfs_alloc_put(ip); + + fail_gunlock: + gfs_glock_dq_uninit(&i_gh); + + fail: + while (qx--) + gfs_glock_dq_uninit(&ghs[qx]); + + kfree(ghs); + + return error; +} + +/** + * glock_q - Acquire a lock for a quota entry + * @sdp: the filesystem + * @qd: the quota data structure to glock + * @force_refresh: If TRUE, always read from the quota file + * @q_gh: the glock holder for the quota lock + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +glock_q(struct gfs_sbd *sdp, struct gfs_quota_data *qd, int force_refresh, + struct gfs_holder *q_gh) +{ + struct gfs_holder i_gh; + struct gfs_quota q; + char buf[sizeof(struct gfs_quota)]; + int error; + + restart: + error = gfs_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); + if (error) + return error; + + gfs_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb); + + if (force_refresh || + qd->qd_qb.qb_magic != GFS_MAGIC || + test_bit(GLF_LVB_INVALID, &qd->qd_gl->gl_flags)) { + gfs_glock_dq_uninit(q_gh); + error = gfs_glock_nq_init(qd->qd_gl, + LM_ST_EXCLUSIVE, GL_NOCACHE, + q_gh); + if (error) + return error; + + error = gfs_glock_nq_init(sdp->sd_qinode->i_gl, + LM_ST_SHARED, 0, + &i_gh); + if (error) + goto fail; + + memset(buf, 0, sizeof(struct gfs_quota)); + + error = gfs_internal_read(sdp->sd_qinode, buf, + (2 * (uint64_t)qd->qd_id + + ((test_bit(QDF_USER, &qd->qd_flags)) ? 0 : 1)) * + sizeof(struct gfs_quota), + sizeof(struct gfs_quota)); + if (error < 0) + goto fail_gunlock; + + gfs_glock_dq_uninit(&i_gh); + + gfs_quota_in(&q, buf); + + memset(&qd->qd_qb, 0, sizeof(struct gfs_quota_lvb)); + qd->qd_qb.qb_magic = GFS_MAGIC; + qd->qd_qb.qb_limit = q.qu_limit; + qd->qd_qb.qb_warn = q.qu_warn; + qd->qd_qb.qb_value = q.qu_value; + + gfs_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb); + clear_bit(GLF_LVB_INVALID, &qd->qd_gl->gl_flags); + + gfs_glock_dq_uninit(q_gh); + force_refresh = FALSE; + goto restart; + } + + return 0; + + fail_gunlock: + gfs_glock_dq_uninit(&i_gh); + + fail: + gfs_glock_dq_uninit(q_gh); + + return error; +} + +/** + * gfs_quota_hold_m - Hold the quota structures for up to 4 IDs + * @ip: Two of the IDs are the UID and GID from this file + * @uid: a UID or the constant NO_QUOTA_CHANGE + * @gid: a GID or the constant NO_QUOTA_CHANGE + * + * The struct gfs_quota_data structures representing the locks are + * stored in the ip->i_alloc->al_qd array. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_hold_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + unsigned int x = 0; + int error; + + GFS_ASSERT_INODE(al && !al->al_qd_num && + !test_bit(GIF_QD_LOCKED, &ip->i_flags), ip,); + + if (!sdp->sd_tune.gt_quota_account) + return 0; + + error = gfs_quota_get(sdp, TRUE, ip->i_di.di_uid, + CREATE, &al->al_qd[x]); + if (error) + goto fail; + x++; + + error = gfs_quota_get(sdp, FALSE, ip->i_di.di_gid, + CREATE, &al->al_qd[x]); + if (error) + goto fail; + x++; + + if (uid != NO_QUOTA_CHANGE) { + error = gfs_quota_get(sdp, TRUE, uid, + CREATE, &al->al_qd[x]); + if (error) + goto fail; + x++; + } + + if (gid != NO_QUOTA_CHANGE) { + error = gfs_quota_get(sdp, FALSE, gid, + CREATE, &al->al_qd[x]); + if (error) + goto fail; + x++; + } + + al->al_qd_num = x; + + return 0; + + fail: + if (x) { + al->al_qd_num = x; + gfs_quota_unhold_m(ip); + } + + return error; +} + +/** + * gfs_quota_unhold_m - throw away some quota locks + * @ip: the inode who's ip->i_alloc->al_qd array holds the structures + * + */ + +void +gfs_quota_unhold_m(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + unsigned int x; + + GFS_ASSERT_INODE(al && + !test_bit(GIF_QD_LOCKED, &ip->i_flags), ip,); + + for (x = 0; x < al->al_qd_num; x++) { + gfs_quota_put(sdp, al->al_qd[x]); + al->al_qd[x] = NULL; + } + al->al_qd_num = 0; +} + +/** + * gfs_quota_lock_m - Acquire the quota locks for up to 4 IDs + * @ip: Two of the IDs are the UID and GID from this file + * @uid: a UID or the constant NO_QUOTA_CHANGE + * @gid: a GID or the constant NO_QUOTA_CHANGE + * + * The struct gfs_quota_data structures representing the locks are + * stored in the ip->i_alloc->al_qd array. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_lock_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + unsigned int x; + int error; + + gfs_quota_hold_m(ip, uid, gid); + + if (!sdp->sd_tune.gt_quota_enforce) + return 0; + if (capable(CAP_SYS_RESOURCE)) + return 0; + + gfs_sort(al->al_qd, al->al_qd_num, + sizeof(struct gfs_quota_data *), sort_qd); + + for (x = 0; x < al->al_qd_num; x++) { + error = glock_q(sdp, al->al_qd[x], FALSE, &al->al_qd_ghs[x]); + if (error) + goto fail; + } + + set_bit(GIF_QD_LOCKED, &ip->i_flags); + + return 0; + + fail: + while (x--) + gfs_glock_dq_uninit(&al->al_qd_ghs[x]); + + return error; +} + +/** + * gfs_quota_unlock_m - drop some quota locks + * @ip: the inode who's ip->i_alloc->al_qd array holds the locks + * + */ + +void +gfs_quota_unlock_m(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + struct gfs_quota_data *qd, *qda[4]; + int64_t value; + unsigned int count = 0; + unsigned int x; + int do_sync; + + if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) + goto out; + + for (x = 0; x < al->al_qd_num; x++) { + qd = al->al_qd[x]; + + spin_lock(&sdp->sd_quota_lock); + value = qd->qd_change_new + qd->qd_change_ic; + spin_unlock(&sdp->sd_quota_lock); + + do_sync = TRUE; + if (!qd->qd_qb.qb_limit) + do_sync = FALSE; + else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit) + do_sync = FALSE; + else { + int64_t v; + v = value * gfs_num_journals(sdp) * sdp->sd_tune.gt_quota_scale_num; + do_div(v, sdp->sd_tune.gt_quota_scale_den); + v += qd->qd_qb.qb_value; + if (v < (int64_t)qd->qd_qb.qb_limit) + do_sync = FALSE; + } + + gfs_glock_dq_uninit(&al->al_qd_ghs[x]); + + if (do_sync) { + gfs_log_flush(sdp); + if (quota_trylock(sdp, qd)) + qda[count++] = qd; + } + } + + if (count) { + do_quota_sync(sdp, qda, count); + + for (x = 0; x < count; x++) + quota_unlock(sdp, qda[x]); + } + + out: + gfs_quota_unhold_m(ip); +} + +/** + * print_quota_message - print a message to the user's tty about quotas + * @sdp: the filesystem + * @qd: the quota ID that the message is about + * @type: the type of message ("exceeded" or "warning") + * + */ + +static void +print_quota_message(struct gfs_sbd *sdp, struct gfs_quota_data *qd, char *type) +{ + char *line = gmalloc(256); + int len; + struct tty_struct *tty; + + len = snprintf(line, 256, "GFS: fsid=%s: quota %s for %s %u\r\n", + sdp->sd_fsname, type, + (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", + qd->qd_id); + + if (current->signal) { + tty = current->signal->tty; + if (tty && tty->driver->write) + tty->driver->write(tty, 0, line, len); + } + + kfree(line); +} + +/** + * gfs_quota_check - Check to see if a block allocation is possible + * @ip: the inode who's ip->i_res.ir_qd array holds the quota locks + * @uid: the UID the block is allocated for + * @gid: the GID the block is allocated for + * + */ + +int +gfs_quota_check(struct gfs_inode *ip, uint32_t uid, uint32_t gid) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + struct gfs_quota_data *qd; + int64_t value; + unsigned int x; + int error = 0; + + if (!al) + return 0; + + for (x = 0; x < al->al_qd_num; x++) { + qd = al->al_qd[x]; + + if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || + (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) + continue; + + spin_lock(&sdp->sd_quota_lock); + value = qd->qd_change_new + qd->qd_change_ic; + spin_unlock(&sdp->sd_quota_lock); + value += qd->qd_qb.qb_value; + + if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) { + print_quota_message(sdp, qd, "exceeded"); + error = -EDQUOT; + break; + } else if (qd->qd_qb.qb_warn && + (int64_t)qd->qd_qb.qb_warn < value && + time_after_eq(jiffies, + qd->qd_last_warn + + sdp->sd_tune.gt_quota_warn_period * HZ)) { + print_quota_message(sdp, qd, "warning"); + qd->qd_last_warn = jiffies; + } + } + + return error; +} + +/** + * gfs_quota_sync - Sync quota changes to the quota file + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_sync(struct gfs_sbd *sdp) +{ + struct gfs_quota_data **qda; + unsigned int max_qd = sdp->sd_tune.gt_quota_simul_sync; + unsigned int num_qd; + unsigned int x; + int error = 0; + + sdp->sd_quota_sync_gen++; + + qda = gmalloc(max_qd * sizeof(struct gfs_quota_data *)); + + memset(qda, 0, max_qd * sizeof(struct gfs_quota_data *)); + + do { + num_qd = 0; + + for (;;) { + qda[num_qd] = quota_find(sdp); + if (!qda[num_qd]) + break; + + if (++num_qd == max_qd) + break; + } + + if (num_qd) { + error = do_quota_sync(sdp, qda, num_qd); + if (!error) + for (x = 0; x < num_qd; x++) + qda[x]->qd_sync_gen = + sdp->sd_quota_sync_gen; + + for (x = 0; x < num_qd; x++) + quota_unlock(sdp, qda[x]); + } + } + while (!error && num_qd == max_qd); + + kfree(qda); + + return error; +} + +/** + * gfs_quota_refresh - Refresh the LVB for a given quota ID + * @sdp: the filesystem + * @arg: a pointer to a struct gfs_quota_name in user space + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_refresh(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_quota_name qn; + struct gfs_quota_data *qd; + struct gfs_holder q_gh; + int error; + + if (copy_from_user(&qn, arg, sizeof(struct gfs_quota_name))) + return -EFAULT; + + error = gfs_quota_get(sdp, qn.qn_user, qn.qn_id, CREATE, &qd); + if (error) + return error; + + error = glock_q(sdp, qd, TRUE, &q_gh); + if (!error) + gfs_glock_dq_uninit(&q_gh); + + gfs_quota_put(sdp, qd); + + return error; +} + +/** + * gfs_quota_read - Read the info a given quota ID + * @sdp: the filesystem + * @arg: a pointer to a gfs_quota_refresh_t in user space + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_quota_read(struct gfs_sbd *sdp, void *arg) +{ + struct gfs_quota_name qn; + struct gfs_quota_data *qd; + struct gfs_holder q_gh; + struct gfs_quota q; + int error; + + if (copy_from_user(&qn, arg, sizeof(struct gfs_quota_name))) + return -EFAULT; + + if (((qn.qn_user) ? + (qn.qn_id != current->fsuid) : + (!in_group_p(qn.qn_id))) && + !capable(CAP_SYS_ADMIN)) + return -EACCES; + + error = gfs_quota_get(sdp, qn.qn_user, qn.qn_id, CREATE, &qd); + if (error) + return error; + + error = glock_q(sdp, qd, FALSE, &q_gh); + if (error) + goto out; + + memset(&q, 0, sizeof(struct gfs_quota)); + q.qu_limit = qd->qd_qb.qb_limit; + q.qu_warn = qd->qd_qb.qb_warn; + q.qu_value = qd->qd_qb.qb_value; + + spin_lock(&sdp->sd_quota_lock); + q.qu_value += qd->qd_change_new + qd->qd_change_ic; + spin_unlock(&sdp->sd_quota_lock); + + gfs_glock_dq_uninit(&q_gh); + + out: + gfs_quota_put(sdp, qd); + + if (!error && + copy_to_user((char *)arg + sizeof(struct gfs_quota_name), + &q, sizeof(struct gfs_quota))) + error = -EFAULT; + + return error; +} diff -urN linux-orig/fs/gfs/quota.h linux-patched/fs/gfs/quota.h --- linux-orig/fs/gfs/quota.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/quota.h 2004-06-30 13:27:49.356708115 -0500 @@ -0,0 +1,40 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __QUOTA_DOT_H__ +#define __QUOTA_DOT_H__ + +#define NO_QUOTA_CHANGE ((uint32_t)-1) + +int gfs_quota_get(struct gfs_sbd *sdp, int user, uint32_t id, int create, + struct gfs_quota_data **qdp); +void gfs_quota_hold(struct gfs_sbd *sdp, struct gfs_quota_data *qd); +void gfs_quota_put(struct gfs_sbd *sdp, struct gfs_quota_data *qd); + +int gfs_quota_merge(struct gfs_sbd *sdp, struct gfs_quota_tag *tag); +void gfs_quota_scan(struct gfs_sbd *sdp); +void gfs_quota_cleanup(struct gfs_sbd *sdp); + +int gfs_quota_hold_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid); +void gfs_quota_unhold_m(struct gfs_inode *ip); + +int gfs_quota_lock_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid); +void gfs_quota_unlock_m(struct gfs_inode *ip); + +int gfs_quota_check(struct gfs_inode *ip, uint32_t uid, uint32_t gid); + +int gfs_quota_sync(struct gfs_sbd *sdp); +int gfs_quota_refresh(struct gfs_sbd *sdp, void *arg); +int gfs_quota_read(struct gfs_sbd *sdp, void *arg); + +#endif /* __QUOTA_DOT_H__ */ diff -urN linux-orig/fs/gfs/recovery.c linux-patched/fs/gfs/recovery.c --- linux-orig/fs/gfs/recovery.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/recovery.c 2004-06-30 13:27:49.357707883 -0500 @@ -0,0 +1,749 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "glops.h" +#include "lops.h" +#include "recovery.h" + +#define bn2seg(bn) (((uint32_t)((bn) - jdesc->ji_addr)) / sdp->sd_sb.sb_seg_size) +#define seg2bn(seg) ((seg) * sdp->sd_sb.sb_seg_size + jdesc->ji_addr) + +struct dirty_j { + struct list_head dj_list; + unsigned int dj_jid; + struct gfs_jindex dj_desc; +}; + +/** + * gfs_add_dirty_j - add a jid to the list of dirty journals + * @sdp: the filesystem + * @jid: the journal ID number + * + */ + +void +gfs_add_dirty_j(struct gfs_sbd *sdp, unsigned int jid) +{ + struct dirty_j *dj; + + dj = gmalloc(sizeof(struct dirty_j)); + memset(dj, 0, sizeof(struct dirty_j)); + + dj->dj_jid = jid; + + spin_lock(&sdp->sd_dirty_j_lock); + list_add(&dj->dj_list, &sdp->sd_dirty_j); + spin_unlock(&sdp->sd_dirty_j_lock); +} + +/** + * get_dirty_j - return a dirty journal from the list + * @sdp: the filesystem + * + * Returns: a struct dirty_j or NULL + */ + +static struct dirty_j * +get_dirty_j(struct gfs_sbd *sdp) +{ + struct dirty_j *dj = NULL; + + spin_lock(&sdp->sd_dirty_j_lock); + if (!list_empty(&sdp->sd_dirty_j)) { + dj = list_entry(sdp->sd_dirty_j.prev, struct dirty_j, dj_list); + list_del(&dj->dj_list); + } + spin_unlock(&sdp->sd_dirty_j_lock); + + return dj; +} + +/** + * gfs_clear_dirty_j - destroy the list of dirty journals + * @sdp: the filesystem + * + */ + +void +gfs_clear_dirty_j(struct gfs_sbd *sdp) +{ + struct dirty_j *dj; + for (;;) { + dj = get_dirty_j(sdp); + if (!dj) + break; + kfree(dj); + } +} + +/** + * gfs_log_header - read the log header for a given segment + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @seg: the segment to look at + * @lh: the log header to return + * + * Read the log header for a given segement in a given journal. Do a few + * sanity checks on it. + * + * Returns: 0 on success, 1 if the header was invalid or incomplete and, -EXXX on error + */ + +static int +get_log_header(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint32_t seg, struct gfs_log_header *lh) +{ + struct buffer_head *bh; + struct gfs_log_header lh2; + int error; + + error = gfs_dread(sdp, seg2bn(seg), gl, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + + gfs_log_header_in(lh, bh->b_data); + gfs_log_header_in(&lh2, + bh->b_data + GFS_BASIC_BLOCK - + sizeof(struct gfs_log_header)); + + brelse(bh); + + if (memcmp(lh, &lh2, sizeof(struct gfs_log_header)) != 0 || + lh->lh_header.mh_magic != GFS_MAGIC || + lh->lh_header.mh_type != GFS_METATYPE_LH) + error = 1; + + return error; +} + +/** + * find_good_lh - find a good log header + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @seg: the segment to start searching from (it's also filled in with a new value.) + * @lh: the log header to fill in + * @forward: if true search forward in the log, else search backward + * + * Call get_log_header() to get a log header for a segment, but if the + * segment is bad, either scan forward or backward until we find a good one. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +find_good_lh(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint32_t *seg, struct gfs_log_header *lh, + int forward) +{ + int error; + uint32_t orig_seg = *seg; + + for (;;) { + error = get_log_header(sdp, jdesc, gl, *seg, lh); + if (error <= 0) + return error; + + if (forward) { + if (++*seg == jdesc->ji_nsegment) + *seg = 0; + } else { + if (*seg-- == 0) + *seg = jdesc->ji_nsegment - 1; + } + + GFS_ASSERT_SBD(*seg != orig_seg, sdp,); + } +} + +/** + * verify_jhead - make sure we've found the head of the log + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @head: this is filled in with the log descriptor of the head + * + * At this point, seg and lh should be either the head of the log or just + * before. Scan forward until we find the head. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +verify_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, struct gfs_log_header *head) +{ + struct gfs_log_header lh; + uint32_t seg; + int error; + + seg = bn2seg(head->lh_first); + + for (;;) { + if (++seg == jdesc->ji_nsegment) + seg = 0; + + error = get_log_header(sdp, jdesc, gl, seg, &lh); + if (error < 0) + return error; + + if (error == 1) + continue; + if (lh.lh_sequence == head->lh_sequence) + continue; + + if (lh.lh_sequence < head->lh_sequence) + break; + + memcpy(head, &lh, sizeof(struct gfs_log_header)); + } + + return 0; +} + +/** + * gfs_find_jhead - find the head of a log + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @head: the log descriptor for the head of the log is returned here + * + * Do a binary search of a journal and find the valid log entry with the + * highest sequence number. (i.e. the log head) + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, struct gfs_log_header *head) +{ + struct gfs_log_header lh1, lh_m; + uint32_t seg1, seg2, seg_m; + int error; + + seg1 = 0; + seg2 = jdesc->ji_nsegment - 1; + + for (;;) { + seg_m = (seg1 + seg2) / 2; + + error = find_good_lh(sdp, jdesc, gl, &seg1, &lh1, TRUE); + if (error) + break; + + if (seg1 == seg_m) { + error = verify_jhead(sdp, jdesc, gl, &lh1); + memcpy(head, &lh1, sizeof(struct gfs_log_header)); + break; + } + + error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh_m, FALSE); + if (error) + break; + + if (lh1.lh_sequence <= lh_m.lh_sequence) + seg1 = seg_m; + else + seg2 = seg_m; + } + + return error; +} + +/** + * gfs_increment_blkno - move to the next block in a journal + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @addr: the block number to increment + * @skip_header: if this is TRUE, skip log headers + * + * Replace @addr with the location of the next block in the log. + * Take care of journal wrap and skip of log header if necessary. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_increment_blkno(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t *addr, int skip_headers) +{ + struct gfs_log_header header; + int error; + + (*addr)++; + + /* Handle journal wrap */ + + if (*addr == seg2bn(jdesc->ji_nsegment)) + *addr -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size; + + gfs_start_ra(gl, *addr, + jdesc->ji_addr + + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size - *addr); + + /* Handle landing on a header block */ + + if (skip_headers && !do_mod(*addr, sdp->sd_sb.sb_seg_size)) { + error = get_log_header(sdp, jdesc, gl, bn2seg(*addr), &header); + if (error < 0) + return error; + + GFS_ASSERT_SBD(!error, sdp,); /* Corrupt headers here are bad */ + GFS_ASSERT_SBD(header.lh_first != *addr, sdp, + gfs_log_header_print(&header); + printk("*addr = %"PRIu64"\n", *addr);); + + (*addr)++; + /* Can't wrap here */ + } + + return 0; +} + +/** + * foreach_descriptor - go through the active part of the log + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @start: the first log header in the active region + * @end: the last log header (don't process the contents of this entry)) + * @pass: the recovery pass + * + * Call a given function once for every log descriptor in the active + * portion of the log. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +foreach_descriptor(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t start, uint64_t end, + unsigned int pass) +{ + struct gfs_log_header header; + struct gfs_log_descriptor desc; + struct buffer_head *bh; + int error = 0; + + while (start != end) { + GFS_ASSERT_SBD(!do_mod(start, sdp->sd_sb.sb_seg_size), sdp,); + + error = get_log_header(sdp, jdesc, gl, bn2seg(start), &header); + if (error < 0) + return error; + + GFS_ASSERT_SBD(!error, sdp,); /* Corrupt headers are bad */ + GFS_ASSERT_SBD(header.lh_first == start, sdp, + gfs_log_header_print(&header); + printk("start = %"PRIu64"\n", start);); + + start++; + + for (;;) { + error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + + gfs_metatype_check(sdp, bh, GFS_METATYPE_LD); + gfs_desc_in(&desc, bh->b_data); + + brelse(bh); + + if (desc.ld_type != GFS_LOG_DESC_LAST) { + error = LO_SCAN_ELEMENTS(sdp, jdesc, gl, start, + &desc, pass); + if (error) + return error; + + while (desc.ld_length--) { + error = gfs_increment_blkno(sdp, jdesc, gl, + &start, TRUE); + if (error) + return error; + } + } else { + while (desc.ld_length--) { + error = gfs_increment_blkno(sdp, jdesc, gl, + &start, + !!desc.ld_length); + if (error) + return error; + } + + break; + } + } + } + + return error; +} + +/** + * clean_journal - mark a dirty journal as being clean + * @sdp: the filesystem + * @jdesc: the journal + * @gl: the journal's glock + * @head: the head journal to start from + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +clean_journal(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, struct gfs_log_header *head) +{ + struct gfs_log_header lh; + struct gfs_log_descriptor desc; + struct buffer_head *bh; + uint32_t seg; + uint64_t blkno; + int error; + + seg = bn2seg(head->lh_first); + + for (;;) { + if (++seg == jdesc->ji_nsegment) + seg = 0; + + error = get_log_header(sdp, jdesc, gl, seg, &lh); + if (error < 0) + return error; + + /* Rewrite corrupt header blocks */ + + if (error == 1) { + bh = gfs_dgetblk(sdp, seg2bn(seg), gl); + + gfs_prep_new_buffer(bh); + gfs_buffer_clear(bh); + gfs_log_header_out(head, bh->b_data); + gfs_log_header_out(head, + bh->b_data + GFS_BASIC_BLOCK - + sizeof(struct gfs_log_header)); + + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT); + brelse(bh); + if (error) + return error; + } + + /* Stop when we get to the end of the log. */ + + if (lh.lh_sequence < head->lh_sequence) + break; + } + + /* Build a "last" descriptor for the transaction we are + about to commit by writing the shutdown header. */ + + memset(&desc, 0, sizeof(struct gfs_log_descriptor)); + desc.ld_header.mh_magic = GFS_MAGIC; + desc.ld_header.mh_type = GFS_METATYPE_LD; + desc.ld_header.mh_format = GFS_FORMAT_LD; + desc.ld_type = GFS_LOG_DESC_LAST; + desc.ld_length = 0; + + for (blkno = head->lh_first + 1; blkno != seg2bn(seg);) { + if (do_mod(blkno, sdp->sd_sb.sb_seg_size)) + desc.ld_length++; + if (++blkno == seg2bn(jdesc->ji_nsegment)) + blkno -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size; + } + + /* Write the descriptor */ + + bh = gfs_dgetblk(sdp, head->lh_first + 1, gl); + + gfs_prep_new_buffer(bh); + gfs_buffer_clear(bh); + gfs_desc_out(&desc, bh->b_data); + + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT); + brelse(bh); + if (error) + return error; + + /* Build a log header that says the journal is clean */ + + memset(&lh, 0, sizeof(struct gfs_log_header)); + lh.lh_header.mh_magic = GFS_MAGIC; + lh.lh_header.mh_type = GFS_METATYPE_LH; + lh.lh_header.mh_format = GFS_FORMAT_LH; + lh.lh_flags = GFS_LOG_HEAD_UNMOUNT; + lh.lh_first = seg2bn(seg); + lh.lh_sequence = head->lh_sequence + 1; + /* Don't care about tail */ + lh.lh_last_dump = head->lh_last_dump; + + /* Write the header */ + + bh = gfs_dgetblk(sdp, lh.lh_first, gl); + + gfs_prep_new_buffer(bh); + gfs_buffer_clear(bh); + gfs_log_header_out(&lh, bh->b_data); + gfs_log_header_out(&lh, + bh->b_data + GFS_BASIC_BLOCK - + sizeof(struct gfs_log_header)); + + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT); + brelse(bh); + + return error; +} + +/** + * gfs_recover_journal - recovery a given journal + * @sdp: the filesystem + * @jid: the number of the journal to recover + * @jdesc: the struct gfs_jindex describing the journal + * @wait: Don't return until the journal is clean (or an error is encountered) + * + * Acquire a journals lock, check to see if the journal is clean, and + * do recovery if necessary. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_recover_journal(struct gfs_sbd *sdp, + unsigned int jid, struct gfs_jindex *jdesc, + int wait) +{ + struct gfs_log_header head; + struct gfs_holder j_gh, t_gh; + unsigned long t; + int error; + + printk("GFS: fsid=%s: jid=%u: Trying to acquire journal lock...\n", + sdp->sd_fsname, jid); + + /* Aquire the journal lock so we can do recovery */ + + error = gfs_glock_nq_num(sdp, + jdesc->ji_addr, &gfs_meta_glops, + LM_ST_EXCLUSIVE, + LM_FLAG_NOEXP | + ((wait) ? 0 : LM_FLAG_TRY) | + GL_NOCACHE, &j_gh); + switch (error) { + case 0: + break; + + case GLR_TRYFAILED: + GFS_ASSERT_SBD(!wait, sdp,); + printk("GFS: fsid=%s: jid=%u: Busy\n", sdp->sd_fsname, jid); + error = 0; + + default: + goto fail; + }; + + printk("GFS: fsid=%s: jid=%u: Looking at journal...\n", + sdp->sd_fsname, jid); + + error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, &head); + if (error) + goto fail_gunlock; + + if (!(head.lh_flags & GFS_LOG_HEAD_UNMOUNT)) { + if (test_bit(SDF_ROFS, &sdp->sd_flags)) { + printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n", + sdp->sd_fsname, jid); + error = -EROFS; + goto fail_gunlock; + } + + printk("GFS: fsid=%s: jid=%u: Acquiring the transaction lock...\n", + sdp->sd_fsname, jid); + + t = jiffies; + + /* Acquire an exclusive hold on the transaction lock */ + + error = gfs_glock_nq_init(sdp->sd_trans_gl, + LM_ST_EXCLUSIVE, + LM_FLAG_NOEXP | + LM_FLAG_PRIORITY | + GL_NOCACHE, + &t_gh); + if (error) + goto fail_gunlock; + + if (test_bit(SDF_ROFS, &sdp->sd_flags)) { + printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n", + sdp->sd_fsname, jid); + error = -EROFS; + goto fail_gunlock_tr; + } + + printk("GFS: fsid=%s: jid=%u: Replaying journal...\n", + sdp->sd_fsname, jid); + + set_bit(GLF_DIRTY, &j_gh.gh_gl->gl_flags); + + LO_BEFORE_SCAN(sdp, jid, &head, GFS_RECPASS_A1); + + error = foreach_descriptor(sdp, jdesc, j_gh.gh_gl, + head.lh_tail, head.lh_first, + GFS_RECPASS_A1); + if (error) + goto fail_gunlock_tr; + + LO_AFTER_SCAN(sdp, jid, GFS_RECPASS_A1); + + gfs_replay_wait(sdp); + + error = clean_journal(sdp, jdesc, j_gh.gh_gl, &head); + if (error) + goto fail_gunlock_tr; + + gfs_glock_dq_uninit(&t_gh); + + t = DIV_RU(jiffies - t, HZ); + + printk("GFS: fsid=%s: jid=%u: Journal replayed in %lus\n", + sdp->sd_fsname, jid, t); + } + + sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace, + jid, + LM_RD_SUCCESS); + + gfs_glock_dq_uninit(&j_gh); + + printk("GFS: fsid=%s: jid=%u: Done\n", sdp->sd_fsname, jid); + + return 0; + + fail_gunlock_tr: + gfs_replay_wait(sdp); + gfs_glock_dq_uninit(&t_gh); + + fail_gunlock: + gfs_glock_dq_uninit(&j_gh); + + printk("GFS: fsid=%s: jid=%u: %s\n", + sdp->sd_fsname, jid, (error) ? "Failed" : "Done"); + + fail: + sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace, + jid, + LM_RD_GAVEUP); + + return error; +} + +/** + * gfs_check_journals - Recovery any dirty journals + * @sdp: the filesystem + * + */ + +void +gfs_check_journals(struct gfs_sbd *sdp) +{ + struct dirty_j *dj; + + for (;;) { + dj = get_dirty_j(sdp); + if (!dj) + break; + + down(&sdp->sd_jindex_lock); + + if (dj->dj_jid != sdp->sd_lockstruct.ls_jid && + dj->dj_jid < sdp->sd_journals) { + memcpy(&dj->dj_desc, + sdp->sd_jindex + dj->dj_jid, + sizeof(struct gfs_jindex)); + up(&sdp->sd_jindex_lock); + + gfs_recover_journal(sdp, + dj->dj_jid, &dj->dj_desc, + FALSE); + + } else { + up(&sdp->sd_jindex_lock); + sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace, + dj->dj_jid, LM_RD_GAVEUP); + } + + kfree(dj); + } +} + +/** + * gfs_recover_dump - recover the log elements in this machine's journal + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_recover_dump(struct gfs_sbd *sdp) +{ + struct gfs_log_header head; + int error; + + error = gfs_find_jhead(sdp, &sdp->sd_jdesc, sdp->sd_journal_gh.gh_gl, + &head); + if (error) + goto fail; + + GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,); + if (!head.lh_last_dump) + return error; + + printk("GFS: fsid=%s: Scanning for log elements...\n", + sdp->sd_fsname); + + LO_BEFORE_SCAN(sdp, sdp->sd_lockstruct.ls_jid, &head, GFS_RECPASS_B1); + + error = foreach_descriptor(sdp, &sdp->sd_jdesc, sdp->sd_journal_gh.gh_gl, + head.lh_last_dump, head.lh_first, + GFS_RECPASS_B1); + if (error) + goto fail; + + LO_AFTER_SCAN(sdp, sdp->sd_lockstruct.ls_jid, GFS_RECPASS_B1); + + /* We need to make sure if we crash during the next log dump that + all intermediate headers in the transaction point to the last + log dump before the one we're making so we don't lose it. */ + + sdp->sd_log_dump_last = head.lh_last_dump; + + printk("GFS: fsid=%s: Done\n", sdp->sd_fsname); + + return 0; + + fail: + printk("GFS: fsid=%s: Failed\n", sdp->sd_fsname); + + return error; +} diff -urN linux-orig/fs/gfs/recovery.h linux-patched/fs/gfs/recovery.h --- linux-orig/fs/gfs/recovery.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/recovery.h 2004-06-30 13:27:49.357707883 -0500 @@ -0,0 +1,36 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __RECOVERY_DOT_H__ +#define __RECOVERY_DOT_H__ + +#define GFS_RECPASS_A1 (12) +#define GFS_RECPASS_B1 (14) + +void gfs_add_dirty_j(struct gfs_sbd *sdp, unsigned int jid); +void gfs_clear_dirty_j(struct gfs_sbd *sdp); + +int gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, struct gfs_log_header *head); +int gfs_increment_blkno(struct gfs_sbd *sdp, struct gfs_jindex *jdesc, + struct gfs_glock *gl, uint64_t *addr, + int skip_headers); + +int gfs_recover_journal(struct gfs_sbd *sdp, + unsigned int jid, struct gfs_jindex *jdesc, + int wait); +void gfs_check_journals(struct gfs_sbd *sdp); + +int gfs_recover_dump(struct gfs_sbd *sdp); + +#endif /* __RECOVERY_DOT_H__ */ diff -urN linux-orig/fs/gfs/rgrp.c linux-patched/fs/gfs/rgrp.c --- linux-orig/fs/gfs/rgrp.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/rgrp.c 2004-06-30 13:27:49.358707651 -0500 @@ -0,0 +1,1932 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "bits.h" +#include "dio.h" +#include "file.h" +#include "glock.h" +#include "glops.h" +#include "rgrp.h" +#include "super.h" +#include "trans.h" + +/** + * mhc_hash: find the mhc hash bucket for a buffer + * @bh: the buffer + * + * Returns: The bucket number + */ + +static unsigned int +mhc_hash(struct buffer_head *bh) +{ + uint64_t blkno; + unsigned int h; + + blkno = bh->b_blocknr; + h = gfs_hash(&blkno, sizeof(uint64_t)) & GFS_MHC_HASH_MASK; + + return h; +} + +/** + * mhc_trim - + * @sdp: + * @max: + * + */ + +static void +mhc_trim(struct gfs_sbd *sdp, unsigned int max) +{ + struct gfs_meta_header_cache *mc; + + for (;;) { + spin_lock(&sdp->sd_mhc_lock); + if (list_empty(&sdp->sd_mhc_single)) { + spin_unlock(&sdp->sd_mhc_lock); + return; + } else { + mc = list_entry(sdp->sd_mhc_single.prev, + struct gfs_meta_header_cache, + mc_list_single); + list_del(&mc->mc_list_hash); + list_del(&mc->mc_list_single); + list_del(&mc->mc_list_rgd); + spin_unlock(&sdp->sd_mhc_lock); + + kmem_cache_free(gfs_mhc_cachep, mc); + atomic_dec(&sdp->sd_mhc_count); + + if (atomic_read(&sdp->sd_mhc_count) <= max) + return; + } + } +} + +/** + * gfs_mhc_add - add buffers to the cache of metadata + * @rgd: a RG + * @bh: an array of buffers + * @num: the number of buffers in the array + * + */ + +void +gfs_mhc_add(struct gfs_rgrpd *rgd, + struct buffer_head **bh, unsigned int num) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_meta_header_cache *mc; + unsigned int x; + uint64_t gen; + struct list_head *head; + + for (x = 0; x < num; x++) { + gfs_meta_check(sdp, bh[x]); + + RETRY_MALLOC(mc = kmem_cache_alloc(gfs_mhc_cachep, GFP_KERNEL), mc); + memset(mc, 0, sizeof(struct gfs_meta_header_cache)); + + mc->mc_block = bh[x]->b_blocknr; + memcpy(&mc->mc_mh, bh[x]->b_data, + sizeof(struct gfs_meta_header)); + + gen = gfs64_to_cpu(mc->mc_mh.mh_generation) + 2; + mc->mc_mh.mh_generation = cpu_to_gfs64(gen); + + head = &sdp->sd_mhc[mhc_hash(bh[x])]; + + spin_lock(&sdp->sd_mhc_lock); + list_add(&mc->mc_list_hash, head); + list_add(&mc->mc_list_single, &sdp->sd_mhc_single); + list_add(&mc->mc_list_rgd, &rgd->rd_mhc); + spin_unlock(&sdp->sd_mhc_lock); + + atomic_inc(&sdp->sd_mhc_count); + } + + if (atomic_read(&sdp->sd_mhc_count) > sdp->sd_tune.gt_max_mhc) + mhc_trim(sdp, sdp->sd_tune.gt_max_mhc); +} + +/** + * gfs_mhc_fish - Try to fill in a buffer with data from the cache + * @sdp: the filesystem + * @bh: the buffer to fill in + * + * Returns: TRUE if the buffer was cached, FALSE otherwise + */ + +int +gfs_mhc_fish(struct gfs_sbd *sdp, struct buffer_head *bh) +{ + struct list_head *tmp, *head; + struct gfs_meta_header_cache *mc; + + head = &sdp->sd_mhc[mhc_hash(bh)]; + + spin_lock(&sdp->sd_mhc_lock); + + for (tmp = head->next; + tmp != head; + tmp = tmp->next) { + mc = list_entry(tmp, struct gfs_meta_header_cache, mc_list_hash); + if (mc->mc_block != bh->b_blocknr) + continue; + + list_del(&mc->mc_list_hash); + list_del(&mc->mc_list_single); + list_del(&mc->mc_list_rgd); + spin_unlock(&sdp->sd_mhc_lock); + + gfs_prep_new_buffer(bh); + memcpy(bh->b_data, &mc->mc_mh, + sizeof(struct gfs_meta_header)); + + kmem_cache_free(gfs_mhc_cachep, mc); + atomic_dec(&sdp->sd_mhc_count); + + return TRUE; + } + + spin_unlock(&sdp->sd_mhc_lock); + + return FALSE; +} + +/** + * gfs_mhc_zap - Get rid of the data in the cache of metadata headers + * @rgd: a RG + * + */ + +void +gfs_mhc_zap(struct gfs_rgrpd *rgd) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_meta_header_cache *mc; + + spin_lock(&sdp->sd_mhc_lock); + + while (!list_empty(&rgd->rd_mhc)) { + mc = list_entry(rgd->rd_mhc.next, + struct gfs_meta_header_cache, + mc_list_rgd); + + list_del(&mc->mc_list_hash); + list_del(&mc->mc_list_single); + list_del(&mc->mc_list_rgd); + spin_unlock(&sdp->sd_mhc_lock); + + kmem_cache_free(gfs_mhc_cachep, mc); + atomic_dec(&sdp->sd_mhc_count); + + spin_lock(&sdp->sd_mhc_lock); + } + + spin_unlock(&sdp->sd_mhc_lock); +} + +/** + * depend_hash() - Turn glock number into hash bucket number + * @formal_ino: + * + * Returns: The number of the corresponding hash bucket + */ + +static unsigned int +depend_hash(uint64_t formal_ino) +{ + unsigned int h; + + h = gfs_hash(&formal_ino, sizeof(uint64_t)); + h &= GFS_DEPEND_HASH_MASK; + + return h; +} + +/** + * depend_sync_one - + * @sdp: + * @gd: + * + */ + +static void +depend_sync_one(struct gfs_sbd *sdp, struct gfs_depend *gd) +{ + struct gfs_glock *gl; + + spin_lock(&sdp->sd_depend_lock); + list_del(&gd->gd_list_hash); + spin_unlock(&sdp->sd_depend_lock); + list_del(&gd->gd_list_rgd); + + gl = gfs_glock_find(sdp, + &(struct lm_lockname){gd->gd_formal_ino, + LM_TYPE_INODE}); + if (gl) { + if (gl->gl_ops->go_sync) + gl->gl_ops->go_sync(gl, + DIO_METADATA | + DIO_INVISIBLE); + gfs_glock_put(gl); + } + + kfree(gd); + atomic_dec(&sdp->sd_depend_count); +} + +/** + * depend_sync_old - + * @rgd: + * + */ + +static void +depend_sync_old(struct gfs_rgrpd *rgd) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_depend *gd; + + for (;;) { + gd = list_entry(rgd->rd_depend.prev, + struct gfs_depend, + gd_list_rgd); + + if (time_before(jiffies, + gd->gd_time + + sdp->sd_tune.gt_depend_secs * HZ)) + return; + + depend_sync_one(sdp, gd); + } +} + +/** + * gfs_depend_add - + * @rgd: + * @formal_ino: + * + */ + +void +gfs_depend_add(struct gfs_rgrpd *rgd, uint64_t formal_ino) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct list_head *head, *tmp; + struct gfs_depend *gd; + + head = &sdp->sd_depend[depend_hash(formal_ino)]; + + spin_lock(&sdp->sd_depend_lock); + + for (tmp = head->next; + tmp != head; + tmp = tmp->next) { + gd = list_entry(tmp, struct gfs_depend, gd_list_hash); + if (gd->gd_rgd == rgd && + gd->gd_formal_ino == formal_ino) { + list_move(&gd->gd_list_hash, head); + spin_unlock(&sdp->sd_depend_lock); + list_move(&gd->gd_list_rgd, &rgd->rd_depend); + gd->gd_time = jiffies; + return; + } + } + + spin_unlock(&sdp->sd_depend_lock); + + gd = gmalloc(sizeof(struct gfs_depend)); + memset(gd, 0, sizeof(struct gfs_depend)); + + gd->gd_rgd = rgd; + gd->gd_formal_ino = formal_ino; + gd->gd_time = jiffies; + + spin_lock(&sdp->sd_depend_lock); + list_add(&gd->gd_list_hash, head); + spin_unlock(&sdp->sd_depend_lock); + list_add(&gd->gd_list_rgd, &rgd->rd_depend); + + atomic_inc(&sdp->sd_depend_count); + + depend_sync_old(rgd); +} + +/** + * gfs_depend_sync - + * @rgd: + * + */ + +void +gfs_depend_sync(struct gfs_rgrpd *rgd) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_depend *gd; + + while (!list_empty(&rgd->rd_depend)) { + gd = list_entry(rgd->rd_depend.next, + struct gfs_depend, + gd_list_rgd); + depend_sync_one(sdp, gd); + } +} + +/** + * rgrp_verify - Verify that a resource group is consistent + * @sdp: the filesystem + * @rgd: the rgrp + * + * Somebody should have already called gfs_glock_rg() on this RG. + */ + +static void +rgrp_verify(struct gfs_rgrpd *rgd) +{ + struct gfs_bitmap *bits = NULL; + uint32_t length = rgd->rd_ri.ri_length; + uint32_t count[4], tmp; + int buf, x; + + memset(count, 0, 4 * sizeof(uint32_t)); + + for (buf = 0; buf < length; buf++) { + bits = &rgd->rd_bits[buf]; + for (x = 0; x < 4; x++) + count[x] += gfs_bitcount(rgd, + rgd->rd_bh[buf]->b_data + + bits->bi_offset, + bits->bi_len, x); + } + + GFS_ASSERT_RGRPD(count[0] == rgd->rd_rg.rg_free, rgd, + printk("free data mismatch: %u != %u\n", + count[0], rgd->rd_rg.rg_free);); + + tmp = rgd->rd_ri.ri_data - + (rgd->rd_rg.rg_usedmeta + rgd->rd_rg.rg_freemeta) - + (rgd->rd_rg.rg_useddi + rgd->rd_rg.rg_freedi) - + rgd->rd_rg.rg_free; + GFS_ASSERT_RGRPD(count[1] == tmp, rgd, + printk("used data mismatch: %u != %u\n", + count[1], tmp);); + + GFS_ASSERT_RGRPD(count[2] == rgd->rd_rg.rg_freemeta, rgd, + printk("free metadata mismatch: %u != %u\n", + count[2], rgd->rd_rg.rg_freemeta);); + + tmp = rgd->rd_rg.rg_usedmeta + + (rgd->rd_rg.rg_useddi + rgd->rd_rg.rg_freedi); + GFS_ASSERT_RGRPD(count[3] == tmp, rgd, + printk("used metadata mismatch: %u != %u\n", + count[3], tmp);); +} + +/** + * gfs_blk2rgrpd - Find resource group for a given data block number + * @sdp: The GFS superblock + * @n: The data block number + * + * Returns: Ths resource group, or NULL if not found + */ + +struct gfs_rgrpd * +gfs_blk2rgrpd(struct gfs_sbd *sdp, uint64_t blk) +{ + struct list_head *tmp, *head; + struct gfs_rgrpd *rgd = NULL; + struct gfs_rindex *ri; + + spin_lock(&sdp->sd_rg_mru_lock); + + for (head = &sdp->sd_rg_mru_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + rgd = list_entry(tmp, struct gfs_rgrpd, rd_list_mru); + ri = &rgd->rd_ri; + + if (ri->ri_data1 <= blk && blk < ri->ri_data1 + ri->ri_data) { + list_move(&rgd->rd_list_mru, &sdp->sd_rg_mru_list); + spin_unlock(&sdp->sd_rg_mru_lock); + return rgd; + } + } + + spin_unlock(&sdp->sd_rg_mru_lock); + + return NULL; +} + +/** + * gfs_rgrpd_get_first - get the first RG + * @sdp: The GFS superblock + * + * Returns: The first rgrp in the filesystem + */ + +struct gfs_rgrpd * +gfs_rgrpd_get_first(struct gfs_sbd *sdp) +{ + GFS_ASSERT_SBD(!list_empty(&sdp->sd_rglist), sdp,); + return list_entry(sdp->sd_rglist.next, struct gfs_rgrpd, rd_list); +} + +/** + * gfs_rgrpd_get_next - get the next RG + * @rgd: A RG + * + * Returns: The next rgrp + */ + +struct gfs_rgrpd * +gfs_rgrpd_get_next(struct gfs_rgrpd *rgd) +{ + if (rgd->rd_list.next == &rgd->rd_sbd->sd_rglist) + return NULL; + return list_entry(rgd->rd_list.next, struct gfs_rgrpd, rd_list); +} + +/** + * clear_rgrpdi - Clear up rgrps + * @sdp: The GFS superblock + * + */ + +void +clear_rgrpdi(struct gfs_sbd *sdp) +{ + struct gfs_rgrpd *rgd; + struct gfs_glock *gl; + + sdp->sd_rg_forward = NULL; + + while (!list_empty(&sdp->sd_rg_recent)) { + rgd = list_entry(sdp->sd_rg_recent.next, + struct gfs_rgrpd, rd_recent); + list_del(&rgd->rd_recent); + } + + while (!list_empty(&sdp->sd_rglist)) { + rgd = list_entry(sdp->sd_rglist.next, + struct gfs_rgrpd, rd_list); + gl = rgd->rd_gl; + + list_del(&rgd->rd_list); + list_del(&rgd->rd_list_mru); + + if (gl) { + gfs_glock_force_drop(gl); + if (atomic_read(&gl->gl_lvb_count)) + gfs_lvb_unhold(gl); + gl2rgd(gl) = NULL; + gfs_glock_put(gl); + } + + if (rgd->rd_bits) + kfree(rgd->rd_bits); + if (rgd->rd_bh) + kfree(rgd->rd_bh); + + kfree(rgd); + } +} + +/** + * gfs_clear_rgrpd - Clear up rgrps + * @sdp: The GFS superblock + * + */ + +void +gfs_clear_rgrpd(struct gfs_sbd *sdp) +{ + down(&sdp->sd_rindex_lock); + clear_rgrpdi(sdp); + up(&sdp->sd_rindex_lock); +} + +/** + * gfs_compute_bitstructs - Compute the bitmap sizes + * @rgd: The resource group descriptor + * + */ + +static void +compute_bitstructs(struct gfs_rgrpd *rgd) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_bitmap *bits; + uint32_t length = rgd->rd_ri.ri_length; + uint32_t bytes_left, bytes; + int x; + + rgd->rd_bits = gmalloc(length * sizeof(struct gfs_bitmap)); + memset(rgd->rd_bits, 0, length * sizeof(struct gfs_bitmap)); + + bytes_left = rgd->rd_ri.ri_bitbytes; + + for (x = 0; x < length; x++) { + bits = &rgd->rd_bits[x]; + + if (length == 1) { + bytes = bytes_left; + bits->bi_offset = sizeof(struct gfs_rgrp); + bits->bi_start = 0; + bits->bi_len = bytes; + } else if (x == 0) { + bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_rgrp); + bits->bi_offset = sizeof(struct gfs_rgrp); + bits->bi_start = 0; + bits->bi_len = bytes; + } else if (x + 1 == length) { + bytes = bytes_left; + bits->bi_offset = sizeof(struct gfs_meta_header); + bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; + bits->bi_len = bytes; + } else { + bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header); + bits->bi_offset = sizeof(struct gfs_meta_header); + bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; + bits->bi_len = bytes; + } + + bytes_left -= bytes; + } + + GFS_ASSERT_RGRPD(!bytes_left, rgd,); + GFS_ASSERT_RGRPD((rgd->rd_bits[length - 1].bi_start + + rgd->rd_bits[length - 1].bi_len) * GFS_NBBY == + rgd->rd_ri.ri_data, rgd, + printk("start=%u len=%u offset=%u\n", + rgd->rd_bits[length - 1].bi_start, + rgd->rd_bits[length - 1].bi_len, + rgd->rd_bits[length - 1].bi_offset); + gfs_rindex_print(&rgd->rd_ri);); + + rgd->rd_bh = gmalloc(length * sizeof(struct buffer_head *)); + memset(rgd->rd_bh, 0, length * sizeof(struct buffer_head *)); +} + +/** + * gfs_ri_update - Pull in a new resource index from the disk + * @gl: The glock covering the rindex inode + * + * Returns: 0 on successful update, error code otherwise + */ + +static int +gfs_ri_update(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_rgrpd *rgd; + char buf[sizeof(struct gfs_rindex)]; + int error; + + GFS_ASSERT_SBD(!do_mod(ip->i_di.di_size, sizeof(struct gfs_rindex)), + sdp,); + + clear_rgrpdi(sdp); + + for (sdp->sd_rgcount = 0;; sdp->sd_rgcount++) { + error = gfs_internal_read(ip, buf, + sdp->sd_rgcount * + sizeof(struct gfs_rindex), + sizeof(struct gfs_rindex)); + if (!error) + break; + if (error != sizeof(struct gfs_rindex)) { + if (error > 0) + error = -EIO; + goto fail; + } + + rgd = gmalloc(sizeof(struct gfs_rgrpd)); + memset(rgd, 0, sizeof(struct gfs_rgrpd)); + + INIT_LIST_HEAD(&rgd->rd_mhc); + INIT_LIST_HEAD(&rgd->rd_depend); + rgd->rd_sbd = sdp; + + list_add_tail(&rgd->rd_list, &sdp->sd_rglist); + list_add_tail(&rgd->rd_list_mru, &sdp->sd_rg_mru_list); + + gfs_rindex_in(&rgd->rd_ri, buf); + + compute_bitstructs(rgd); + + error = gfs_glock_get(sdp, rgd->rd_ri.ri_addr, &gfs_rgrp_glops, + CREATE, &rgd->rd_gl); + if (error) + goto fail; + + error = gfs_lvb_hold(rgd->rd_gl); + if (error) + goto fail; + + gl2rgd(rgd->rd_gl) = rgd; + rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; + } + + sdp->sd_riinode_vn = ip->i_gl->gl_vn; + + return 0; + + fail: + clear_rgrpdi(sdp); + + return error; +} + +/** + * gfs_rindex_hold - Grab a lock on the rindex + * @sdp: The GFS superblock + * @ri_gh: the glock holder + * + * We grab a lock in the rindex inode to make sure that it doesn't + * change whilst we are performing an operation. We keep this lock + * for quite long periods of time compared to other locks. This + * doesn't matter, since its shared and it is very, very rarely + * accessed in the exclusive mode. + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_rindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ri_gh) +{ + struct gfs_inode *ip = sdp->sd_riinode; + struct gfs_glock *gl = ip->i_gl; + int error; + + error = gfs_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh); + if (error) + return error; + + if (sdp->sd_riinode_vn != gl->gl_vn) { + down(&sdp->sd_rindex_lock); + if (sdp->sd_riinode_vn != gl->gl_vn) { + error = gfs_ri_update(ip); + if (error) + gfs_glock_dq_uninit(ri_gh); + } + up(&sdp->sd_rindex_lock); + } + + return error; +} + +/** + * gfs_rgrp_read - Read in a RG's bitmaps + * @rgd: the struct gfs_rgrpd describing the RG to read in + * + * Read in RG bitmaps. Must call gfs_rgrp_relse() it free the bitmaps. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_rgrp_read(struct gfs_rgrpd *rgd) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_glock *gl = rgd->rd_gl; + unsigned int x, length = rgd->rd_ri.ri_length; + int error; + + for (x = 0; x < length; x++) { + GFS_ASSERT_RGRPD(!rgd->rd_bh[x], rgd,); + rgd->rd_bh[x] = gfs_dgetblk(sdp, rgd->rd_ri.ri_addr + x, gl); + } + + for (x = 0; x < length; x++) { + error = gfs_dreread(sdp, rgd->rd_bh[x], DIO_START); + if (error) + goto fail; + } + + for (x = length; x--;) { + error = gfs_dreread(sdp, rgd->rd_bh[x], DIO_WAIT); + if (error) + goto fail; + gfs_metatype_check(sdp, rgd->rd_bh[x], + (x) ? GFS_METATYPE_RB : GFS_METATYPE_RG); + } + + if (rgd->rd_rg_vn != gl->gl_vn) { + gfs_rgrp_in(&rgd->rd_rg, (rgd->rd_bh[0])->b_data); + rgd->rd_rg_vn = gl->gl_vn; + } + + return 0; + + fail: + for (x = 0; x < length; x++) { + brelse(rgd->rd_bh[x]); + rgd->rd_bh[x] = NULL; + } + + return error; +} + +/** + * gfs_rgrp_relse - Release RG bitmaps read in with gfs_rgrp_read() + * @rgd: the struct gfs_rgrpd describing the RG to read in + * + */ + +void +gfs_rgrp_relse(struct gfs_rgrpd *rgd) +{ + int x, length = rgd->rd_ri.ri_length; + + for (x = 0; x < length; x++) { + brelse(rgd->rd_bh[x]); + rgd->rd_bh[x] = NULL; + } +} + +/** + * gfs_rgrp_lvb_fill - copy RG usage data out of the struct gfs_rgrp into the struct gfs_rgrp_lvb + * @rgd: the resource group data structure + * + */ + +void +gfs_rgrp_lvb_fill(struct gfs_rgrpd *rgd) +{ + struct gfs_rgrp *rg = &rgd->rd_rg; + struct gfs_rgrp_lvb *rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb; + + rb->rb_magic = cpu_to_gfs32(GFS_MAGIC); + rb->rb_free = cpu_to_gfs32(rg->rg_free); + rb->rb_useddi = cpu_to_gfs32(rg->rg_useddi); + rb->rb_freedi = cpu_to_gfs32(rg->rg_freedi); + rb->rb_usedmeta = cpu_to_gfs32(rg->rg_usedmeta); + rb->rb_freemeta = cpu_to_gfs32(rg->rg_freemeta); + + clear_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags); +} + +/** + * gfs_rgrp_lvb_init - Init the data of a RG LVB + * @rgd: the resource group data structure + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_rgrp_lvb_init(struct gfs_rgrpd *rgd) +{ + struct gfs_glock *gl = rgd->rd_gl; + struct gfs_holder rgd_gh; + int error; + + error = gfs_glock_nq_init(gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (!error) { + gfs_rgrp_lvb_fill(rgd); + gfs_glock_dq_uninit(&rgd_gh); + } + + return error; +} + +/** + * gfs_alloc_get - allocate a struct gfs_alloc structure for an inode + * @ip: the inode + * + * Returns: the struct gfs_alloc + */ + +struct gfs_alloc * +gfs_alloc_get(struct gfs_inode *ip) +{ + struct gfs_alloc *al = ip->i_alloc; + + GFS_ASSERT_INODE(!al, ip,); + + al = gmalloc(sizeof(struct gfs_alloc)); + memset(al, 0, sizeof(struct gfs_alloc)); + + ip->i_alloc = al; + + return al; +} + +/** + * gfs_alloc_put - throw away the struct gfs_alloc for an inode + * @ip: the inode + * + */ + +void +gfs_alloc_put(struct gfs_inode *ip) +{ + struct gfs_alloc *al = ip->i_alloc; + + GFS_ASSERT_INODE(al, ip,); + + ip->i_alloc = NULL; + kfree(al); +} + +/** + * try_rgrp_fit - See if a given reservation will fit in a given RG + * @rgd: the RG data + * @al: the struct gfs_alloc structure describing the reservation + * + * Sets the $ir_datares field in @res. + * Sets the $ir_metares field in @res. + * + * Returns: 1 on success, 0 on failure + */ + +static int +try_rgrp_fit(struct gfs_rgrpd *rgd, struct gfs_alloc *al) +{ + uint32_t freeblks = rgd->rd_rg.rg_free; + uint32_t freemeta = rgd->rd_rg.rg_freemeta; + uint32_t metares = al->al_requested_meta; + uint32_t datares = al->al_requested_data; + + /* First take care of the data blocks required */ + + if (freeblks < al->al_requested_data) + return 0; + + freeblks -= al->al_requested_data; + + /* Then take care of the dinodes */ + + metares += al->al_requested_di; + + /* Then take care of the metadata blocks */ + + while (freemeta < metares) { + if (freeblks < GFS_META_CLUMP) + return 0; + + freeblks -= GFS_META_CLUMP; + freemeta += GFS_META_CLUMP; + + datares += GFS_META_CLUMP; + } + + al->al_rgd = rgd; + al->al_reserved_meta = metares; + al->al_reserved_data = datares; + + return 1; +} + +/** + * recent_rgrp_first - get first RG from recent list + * @sdp: The GFS superblock + * @rglast: address of the rgrp used last + * + * Returns: The first rgrp in the recent list + */ + +static struct gfs_rgrpd * +recent_rgrp_first(struct gfs_sbd *sdp, uint64_t rglast) +{ + struct list_head *tmp, *head; + struct gfs_rgrpd *rgd = NULL; + + spin_lock(&sdp->sd_rg_recent_lock); + + if (list_empty(&sdp->sd_rg_recent)) + goto out; + + if (!rglast) + goto first; + + for (head = &sdp->sd_rg_recent, tmp = head->next; + tmp != head; + tmp = tmp->next) { + rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent); + if (rgd->rd_ri.ri_addr == rglast) + goto out; + } + + first: + rgd = list_entry(sdp->sd_rg_recent.next, struct gfs_rgrpd, rd_recent); + + out: + spin_unlock(&sdp->sd_rg_recent_lock); + + return rgd; +} + +/** + * recent_rgrp_next - get next RG from recent list + * @cur_rgd: current rgrp + * + * Returns: The next rgrp in the recent list + */ + +static struct gfs_rgrpd * +recent_rgrp_next(struct gfs_rgrpd *cur_rgd) +{ + struct gfs_sbd *sdp = cur_rgd->rd_sbd; + struct list_head *tmp, *head; + struct gfs_rgrpd *rgd; + + spin_lock(&sdp->sd_rg_recent_lock); + + for (head = &sdp->sd_rg_recent, tmp = head->next; + tmp != head; + tmp = tmp->next) { + rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent); + if (rgd == cur_rgd) { + if (cur_rgd->rd_recent.next != &sdp->sd_rg_recent) + rgd = list_entry(cur_rgd->rd_recent.next, + struct gfs_rgrpd, rd_recent); + else + rgd = NULL; + + goto out; + } + } + + rgd = NULL; + + out: + spin_unlock(&sdp->sd_rg_recent_lock); + + return rgd; +} + +/** + * recent_rgrp_remove - remove an RG from recent list + * @rgd: The rgrp to remove + * + */ + +static void +recent_rgrp_remove(struct gfs_rgrpd *rgd) +{ + spin_lock(&rgd->rd_sbd->sd_rg_recent_lock); + list_del(&rgd->rd_recent); + spin_unlock(&rgd->rd_sbd->sd_rg_recent_lock); +} + +/** + * recent_rgrp_add - add an RG to recent list + * @new_rgd: The rgrp to add + * + */ + +static void +recent_rgrp_add(struct gfs_rgrpd *new_rgd) +{ + struct gfs_sbd *sdp = new_rgd->rd_sbd; + struct list_head *tmp, *head; + struct gfs_rgrpd *rgd = NULL; + unsigned int count = 0; + unsigned int max = sdp->sd_rgcount / gfs_num_journals(sdp); + + spin_lock(&sdp->sd_rg_recent_lock); + + for (head = &sdp->sd_rg_recent, tmp = head->next; + tmp != head; + tmp = tmp->next) { + rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent); + if (rgd == new_rgd) + goto out; + + if (++count >= max) + goto out; + } + list_add_tail(&new_rgd->rd_recent, &sdp->sd_rg_recent); + + out: + spin_unlock(&sdp->sd_rg_recent_lock); +} + +/** + * forward_rgrp_get - get an rgrp to try next from full list + * @sdp: The GFS superblock + * + * Returns: The rgrp to try next + */ + +static struct gfs_rgrpd * +forward_rgrp_get(struct gfs_sbd *sdp) +{ + struct gfs_rgrpd *rgd; + unsigned int journals = gfs_num_journals(sdp); + unsigned int rg = 0, x; + + spin_lock(&sdp->sd_rg_forward_lock); + + rgd = sdp->sd_rg_forward; + if (!rgd) { + if (sdp->sd_rgcount >= journals) + rg = sdp->sd_rgcount * + sdp->sd_lockstruct.ls_jid / + journals; + + for (x = 0, rgd = gfs_rgrpd_get_first(sdp); + x < rg; + x++, rgd = gfs_rgrpd_get_next(rgd)) + /* Do Nothing */; + + sdp->sd_rg_forward = rgd; + } + + spin_unlock(&sdp->sd_rg_forward_lock); + + return rgd; +} + +/** + * forward_rgrp_set - set the forward rgrp pointer + * @sdp: the filesystem + * @rgd: The new forward rgrp + * + */ + +static void +forward_rgrp_set(struct gfs_sbd *sdp, struct gfs_rgrpd *rgd) +{ + spin_lock(&sdp->sd_rg_forward_lock); + sdp->sd_rg_forward = rgd; + spin_unlock(&sdp->sd_rg_forward_lock); +} + +/** + * get_local_rgrp - Choose and lock a rgrp for allocation + * @ip: the inode to reserve space for + * @rgp: the chosen and locked rgrp + * + * Try to acquire rgrp in way which avoids contending with others. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +get_local_rgrp(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_rgrpd *rgd, *begin, *next = NULL; + struct gfs_alloc *al = ip->i_alloc; + int flags = LM_FLAG_TRY; + int error = 0; + int skipped = 0; + int loops = 0; + int update_recent = FALSE; + + /* Try recently successful rgrps */ + + rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); + + while (rgd) { + error = gfs_glock_nq_init(rgd->rd_gl, + LM_ST_EXCLUSIVE, LM_FLAG_TRY, + &al->al_rgd_gh); + switch (error) { + case 0: + if (try_rgrp_fit(rgd, al)) + goto out; + + next = recent_rgrp_next(rgd); + recent_rgrp_remove(rgd); + gfs_glock_dq_uninit(&al->al_rgd_gh); + rgd = next; + break; + + case GLR_TRYFAILED: + rgd = recent_rgrp_next(rgd); + break; + + default: + GFS_ASSERT_RGRPD(error < 0, rgd,); + return error; + } + } + + /* Go through full list of rgrps */ + + update_recent = TRUE; + begin = rgd = forward_rgrp_get(sdp); + + for (;;) { + error = gfs_glock_nq_init(rgd->rd_gl, + LM_ST_EXCLUSIVE, flags, + &al->al_rgd_gh); + switch (error) { + case 0: + if (try_rgrp_fit(rgd, al)) + goto out; + gfs_glock_dq_uninit(&al->al_rgd_gh); + break; + + case GLR_TRYFAILED: + GFS_ASSERT_RGRPD(flags == LM_FLAG_TRY, rgd,); + skipped++; + break; + + default: + GFS_ASSERT_RGRPD(error < 0, rgd,); + return error; + } + + rgd = gfs_rgrpd_get_next(rgd); + if (!rgd) + rgd = gfs_rgrpd_get_first(sdp); + + if (rgd == begin) { + if (++loops >= 2 || !skipped) { + return -ENOSPC; + } + flags = 0; + } + } + + out: + ip->i_last_rg_alloc = rgd->rd_ri.ri_addr; + + if (update_recent) { + recent_rgrp_add(rgd); + rgd = gfs_rgrpd_get_next(rgd); + forward_rgrp_set(sdp, rgd); + } + + return 0; +} + +/** + * gfs_inplace_reserve_i - Reserve space in the filesystem + * @ip: the inode to reserve space for + * + * Acquire resource group locks to allow for the maximum allocation + * described by "res". + * + * This should probably become more complex again, but for now, let's go + * for simple (one resource group) reservations. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_inplace_reserve_i(struct gfs_inode *ip, + char *file, unsigned int line) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + int error; + + GFS_ASSERT_INODE(al->al_requested_di || + al->al_requested_data || + al->al_requested_meta, ip,); + + error = gfs_rindex_hold(sdp, &al->al_ri_gh); + if (error) + return error; + + error = get_local_rgrp(ip); + if (error) { + gfs_glock_dq_uninit(&al->al_ri_gh); + return error; + } + + gfs_depend_sync(al->al_rgd); + + al->al_file = file; + al->al_line = line; + + return 0; +} + +/** + * gfs_inplace_release - release an inplace reservation + * @ip: the inode the reservation was taken out on + * + * Release a reservation made by gfs_inplace_reserve(). + */ + +void +gfs_inplace_release(struct gfs_inode *ip) +{ + struct gfs_alloc *al = ip->i_alloc; + + GFS_ASSERT_INODE(al->al_alloced_di <= al->al_requested_di, ip, + printk("al_alloced_di = %u, al_requested_di = %u\n", + al->al_alloced_di, al->al_requested_di); + printk("al_file = %s, al_line = %u\n", + al->al_file, al->al_line);); + GFS_ASSERT_INODE(al->al_alloced_meta <= al->al_reserved_meta, ip, + printk("al_alloced_meta = %u, al_reserved_meta = %u\n", + al->al_alloced_meta, al->al_reserved_meta); + printk("al_file = %s, al_line = %u\n", + al->al_file, al->al_line);); + GFS_ASSERT_INODE(al->al_alloced_data <= al->al_reserved_data, ip, + printk("al_alloced_data = %u, al_reserved_data = %u\n", + al->al_alloced_data, al->al_reserved_data); + printk("al_file = %s, al_line = %u\n", + al->al_file, al->al_line);); + + al->al_rgd = NULL; + gfs_glock_dq_uninit(&al->al_rgd_gh); + gfs_glock_dq_uninit(&al->al_ri_gh); +} + +/** + * gfs_get_block_type - Check a block in a RG is of given type + * @rgd: the resource group holding the block + * @block: the block number + * + * Returns: The block type (GFS_BLKST_*) + */ + +unsigned char +gfs_get_block_type(struct gfs_rgrpd *rgd, uint64_t block) +{ + struct gfs_bitmap *bits = NULL; + uint32_t length, rgrp_block, buf_block; + unsigned int buf; + unsigned char type; + + length = rgd->rd_ri.ri_length; + rgrp_block = block - rgd->rd_ri.ri_data1; + + for (buf = 0; buf < length; buf++) { + bits = &rgd->rd_bits[buf]; + if (rgrp_block < (bits->bi_start + bits->bi_len) * GFS_NBBY) + break; + } + + GFS_ASSERT_RGRPD(buf < length, rgd,); + buf_block = rgrp_block - bits->bi_start * GFS_NBBY; + + type = gfs_testbit(rgd, + rgd->rd_bh[buf]->b_data + bits->bi_offset, + bits->bi_len, buf_block); + + return type; +} + +/** + * blkalloc_internal - allocate a single block + * @rgd: the resource group descriptor + * @goal: the goal block in the RG + * @old_state: the type of block to find + * @new_state: the resulting block type + * + * This function never fails. + * + * Returns: returns the block allocated + */ + +static uint32_t +blkalloc_internal(struct gfs_rgrpd *rgd, + uint32_t goal, + unsigned char old_state, unsigned char new_state) +{ + struct gfs_bitmap *bits = NULL; + uint32_t length = rgd->rd_ri.ri_length; + uint32_t blk = 0; + unsigned int buf, x; + + for (buf = 0; buf < length; buf++) { + bits = &rgd->rd_bits[buf]; + if (goal < (bits->bi_start + bits->bi_len) * GFS_NBBY) + break; + } + + GFS_ASSERT_RGRPD(buf < length, rgd,); + goal -= bits->bi_start * GFS_NBBY; + + /* "x <= length" because we're skipping over some of the first + buffer when the goal is non-zero. */ + + for (x = 0; x <= length; x++) { + blk = gfs_bitfit(rgd, + rgd->rd_bh[buf]->b_data + bits->bi_offset, + bits->bi_len, goal, old_state); + if (blk != BFITNOENT) + break; + + buf = (buf + 1) % length; + bits = &rgd->rd_bits[buf]; + goal = 0; + } + + GFS_ASSERT_RGRPD(x <= length, rgd,); + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[buf]); + gfs_setbit(rgd, + rgd->rd_bh[buf]->b_data + bits->bi_offset, + bits->bi_len, blk, new_state); + + return bits->bi_start * GFS_NBBY + blk; +} + +/** + * blkfree_internal - Free a block + * @sdp: the filesystem + * @bstart: the start of a run of blocks to free + * @blen: the length of the block run + * @new_state: the new state of the block + * + */ + +static struct gfs_rgrpd * +blkfree_internal(struct gfs_sbd *sdp, uint64_t bstart, uint32_t blen, + unsigned char new_state) +{ + struct gfs_rgrpd *rgd; + struct gfs_bitmap *bits = NULL; + uint32_t length, rgrp_blk, buf_blk; + unsigned int buf; + + rgd = gfs_blk2rgrpd(sdp, bstart); + GFS_ASSERT_SBD(rgd, sdp, + printk("block = %"PRIu64"\n", bstart);); + + length = rgd->rd_ri.ri_length; + rgrp_blk = bstart - rgd->rd_ri.ri_data1; + + while (blen--) { + for (buf = 0; buf < length; buf++) { + bits = &rgd->rd_bits[buf]; + if (rgrp_blk < (bits->bi_start + bits->bi_len) * GFS_NBBY) + break; + } + + GFS_ASSERT_RGRPD(buf < length, rgd,); + buf_blk = rgrp_blk - bits->bi_start * GFS_NBBY; + rgrp_blk++; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[buf]); + gfs_setbit(rgd, + rgd->rd_bh[buf]->b_data + bits->bi_offset, + bits->bi_len, buf_blk, new_state); + } + + return rgd; +} + +/** + * clump_alloc - Allocate a clump of metadata + * @rgd: the resource group descriptor + * @first: returns the first block allocated + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +clump_alloc(struct gfs_rgrpd *rgd, uint32_t *first) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_meta_header mh; + struct buffer_head **bh; + uint32_t goal, blk; + unsigned int x; + int error = 0; + + memset(&mh, 0, sizeof(struct gfs_meta_header)); + mh.mh_magic = GFS_MAGIC; + mh.mh_type = GFS_METATYPE_NONE; + + bh = gmalloc(GFS_META_CLUMP * sizeof(struct buffer_head *)); + memset(bh, 0, sizeof(GFS_META_CLUMP * sizeof(struct buffer_head *))); + + goal = rgd->rd_last_alloc_data; + + for (x = 0; x < GFS_META_CLUMP; x++) { + blk = blkalloc_internal(rgd, goal, GFS_BLKST_FREE, + GFS_BLKST_FREEMETA); + if (!x) + *first = blk; + + bh[x] = gfs_dgetblk(sdp, rgd->rd_ri.ri_data1 + blk, rgd->rd_gl); + + gfs_prep_new_buffer(bh[x]); + + gfs_meta_header_out(&mh, bh[x]->b_data); + ((struct gfs_meta_header *)bh[x]->b_data)->mh_generation = 0; + + error = gfs_dwrite(sdp, bh[x], DIO_DIRTY | DIO_START); + if (error) + goto out; + + goal = blk; + } + + rgd->rd_last_alloc_data = goal; + + for (x = 0; x < GFS_META_CLUMP; x++) { + error = gfs_dwrite(sdp, bh[x], DIO_WAIT); + if (error) + goto out; + } + + gfs_mhc_add(rgd, bh, GFS_META_CLUMP); + + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_free >= GFS_META_CLUMP, rgd,); + rgd->rd_rg.rg_free -= GFS_META_CLUMP; + rgd->rd_rg.rg_freemeta += GFS_META_CLUMP; + + out: + for (x = 0; x < GFS_META_CLUMP; x++) + if (bh[x]) { + gfs_dwrite(sdp, bh[x], DIO_WAIT); + brelse(bh[x]); + } + kfree(bh); + + return error; +} + +/** + * gfs_blkalloc - Allocate a data block + * @ip: the inode to allocate the data block for + * @block: the block allocated + * + */ + +void +gfs_blkalloc(struct gfs_inode *ip, uint64_t *block) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + struct gfs_rgrpd *rgd = al->al_rgd; + uint32_t goal, blk; + int same; + + GFS_ASSERT_INODE(rgd, ip,); + + same = (rgd->rd_ri.ri_addr == ip->i_di.di_goal_rgrp); + goal = (same) ? ip->i_di.di_goal_dblk : rgd->rd_last_alloc_data; + + blk = blkalloc_internal(rgd, goal, + GFS_BLKST_FREE, GFS_BLKST_USED); + rgd->rd_last_alloc_data = blk; + + if (!same) { + ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr; + ip->i_di.di_goal_mblk = 0; + } + ip->i_di.di_goal_dblk = blk; + + *block = rgd->rd_ri.ri_data1 + blk; + + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_free, rgd,); + rgd->rd_rg.rg_free--; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + + al->al_alloced_data++; + + gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid); +} + +/** + * gfs_metaalloc - Allocate a metadata block to a file + * @ip: the file + * @block: the block allocated + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_metaalloc(struct gfs_inode *ip, uint64_t *block) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_alloc *al = ip->i_alloc; + struct gfs_rgrpd *rgd = al->al_rgd; + uint32_t goal, blk; + int same; + int error; + + GFS_ASSERT_INODE(rgd, ip,); + + same = (rgd->rd_ri.ri_addr == ip->i_di.di_goal_rgrp); + + if (!rgd->rd_rg.rg_freemeta) { + error = clump_alloc(rgd, &goal); + if (error) + return error; + + al->al_alloced_data += GFS_META_CLUMP; + } else + goal = (same) ? ip->i_di.di_goal_mblk : rgd->rd_last_alloc_meta; + + blk = blkalloc_internal(rgd, goal, + GFS_BLKST_FREEMETA, GFS_BLKST_USEDMETA); + rgd->rd_last_alloc_meta = blk; + + if (!same) { + ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr; + ip->i_di.di_goal_dblk = 0; + } + ip->i_di.di_goal_mblk = blk; + + *block = rgd->rd_ri.ri_data1 + blk; + + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_freemeta, rgd,); + rgd->rd_rg.rg_freemeta--; + rgd->rd_rg.rg_usedmeta++; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + + al->al_alloced_meta++; + + gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid); + + return 0; +} + +/** + * gfs_dialloc - Allocate a dinode + * @dip: the directory that the inode is going in + * @block: the block + * + * Returns: errno + */ + +int +gfs_dialloc(struct gfs_inode *dip, uint64_t *block) +{ + struct gfs_alloc *al = dip->i_alloc; + struct gfs_rgrpd *rgd = al->al_rgd; + uint32_t goal, blk; + int error = 0; + + GFS_ASSERT_INODE(rgd, dip,); + + if (rgd->rd_rg.rg_freemeta) + goal = rgd->rd_last_alloc_meta; + else { + error = clump_alloc(rgd, &goal); + if (error) + return error; + + al->al_alloced_data += GFS_META_CLUMP; + } + + blk = blkalloc_internal(rgd, goal, + GFS_BLKST_FREEMETA, GFS_BLKST_USEDMETA); + rgd->rd_last_alloc_meta = blk; + + *block = rgd->rd_ri.ri_data1 + blk; + + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_freemeta, rgd,); + rgd->rd_rg.rg_freemeta--; + rgd->rd_rg.rg_useddi++; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + + al->al_alloced_di++; + al->al_alloced_meta++; + + return error; +} + +/** + * gfs_blkfree - free a piece of data + * @ip: the inode these blocks are being free from + * @bstart: the start of a run of blocks to free + * @blen: the length of the block run + * + */ + +void +gfs_blkfree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_rgrpd *rgd; + + rgd = blkfree_internal(sdp, bstart, blen, GFS_BLKST_FREE); + + rgd->rd_rg.rg_free += blen; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + + gfs_trans_add_quota(sdp, -(int64_t)blen, + ip->i_di.di_uid, + ip->i_di.di_gid); +} + +/** + * gfs_metafree - free a piece of metadata + * @ip: the inode these blocks are being free from + * @bstart: the start of a run of blocks to free + * @blen: the length of the block run + * + */ + +void +gfs_metafree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen) +{ + struct gfs_sbd *sdp = ip->i_sbd; + struct gfs_rgrpd *rgd; + + rgd = blkfree_internal(sdp, bstart, blen, GFS_BLKST_FREEMETA); + + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_usedmeta >= blen, rgd,); + rgd->rd_rg.rg_usedmeta -= blen; + rgd->rd_rg.rg_freemeta += blen; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + + gfs_trans_add_quota(sdp, -(int64_t)blen, + ip->i_di.di_uid, + ip->i_di.di_gid); + gfs_wipe_buffers(ip, rgd, bstart, blen); +} + +/** + * gfs_difree_uninit - free a piece of metadata + * @rgd: the resource group that contains the dinode + * @addr: the dinode address + * + */ + +void +gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr) +{ + struct gfs_sbd *sdp = rgd->rd_sbd; + struct gfs_rgrpd *tmp_rgd; + + tmp_rgd = blkfree_internal(sdp, addr, 1, + GFS_BLKST_FREEMETA); + GFS_ASSERT_RGRPD(rgd == tmp_rgd, rgd,); + + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_useddi, rgd,); + rgd->rd_rg.rg_useddi--; + rgd->rd_rg.rg_freemeta++; + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); +} + +/** + * gfs_difree - free a piece of metadata + * @rgd: the resource group that contains the dinode + * @ip: the inode representing the dinode to free + * + */ + +void +gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip) +{ + gfs_difree_uninit(rgd, ip->i_num.no_addr); + + gfs_trans_add_quota(ip->i_sbd, -1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs_wipe_buffers(ip, rgd, ip->i_num.no_addr, 1); +} + +/** + * gfs_rlist_add - add a RG to a list of RGs + * @sdp: the filesystem + * @rlist: the list of resource groups + * @block: the block + * + * Figure out what RG a block belongs to and add that RG to the list + * + */ + +void +gfs_rlist_add(struct gfs_sbd *sdp, struct gfs_rgrp_list *rlist, uint64_t block) +{ + struct gfs_rgrpd *rgd; + struct gfs_rgrpd **tmp; + unsigned int new_space; + unsigned int x; + + GFS_ASSERT_SBD(rlist->rl_rgrps <= rlist->rl_space, sdp,); + GFS_ASSERT_SBD(!rlist->rl_ghs, sdp,); + + rgd = gfs_blk2rgrpd(sdp, block); + GFS_ASSERT_SBD(rgd, sdp, + printk("block = %"PRIu64"\n", block);); + + for (x = 0; x < rlist->rl_rgrps; x++) + if (rlist->rl_rgd[x] == rgd) + return; + + if (rlist->rl_rgrps == rlist->rl_space) { + new_space = rlist->rl_space + 10; + + tmp = gmalloc(new_space * sizeof(struct gfs_rgrpd *)); + + if (rlist->rl_rgd) { + memcpy(tmp, rlist->rl_rgd, + rlist->rl_space * sizeof(struct gfs_rgrpd *)); + kfree(rlist->rl_rgd); + } + + rlist->rl_space = new_space; + rlist->rl_rgd = tmp; + } + + rlist->rl_rgd[rlist->rl_rgrps++] = rgd; +} + +/** + * gfs_rlist_alloc - all RGs have been added to the rlist, allocated holders for them + * @rlist: the list of resource groups + * @state: the lock state to acquire the RG lock in + * @flags: the modifier flags for the holder structures + * + */ + +void +gfs_rlist_alloc(struct gfs_rgrp_list *rlist, unsigned int state, int flags) +{ + unsigned int x; + + rlist->rl_ghs = gmalloc(rlist->rl_rgrps * sizeof(struct gfs_holder)); + for (x = 0; x < rlist->rl_rgrps; x++) + gfs_holder_init(rlist->rl_rgd[x]->rd_gl, + state, flags, + &rlist->rl_ghs[x]); +} + +/** + * gfs_rlist_free - free a resource group list + * @list: the list of resource groups + * + */ + +void +gfs_rlist_free(struct gfs_rgrp_list *rlist) +{ + unsigned int x; + + if (rlist->rl_rgd) + kfree(rlist->rl_rgd); + + if (rlist->rl_ghs) { + for (x = 0; x < rlist->rl_rgrps; x++) + gfs_holder_uninit(&rlist->rl_ghs[x]); + kfree(rlist->rl_ghs); + } +} + +/** + * gfs_reclaim_metadata - reclaims unused metadata + * @sdp: the file system + * @stats: stats on reclaimation + * + * This function will look through the resource groups and + * free the unused metadata. + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats) +{ + struct gfs_holder ji_gh, ri_gh, rgd_gh, t_gh; + struct gfs_rgrpd *rgd; + struct gfs_rgrp *rg; + struct gfs_dinode *di; + struct gfs_inum next; + struct buffer_head *bh; + uint32_t flags; + uint32_t goal; + unsigned int x; + int error = 0; + + /* Acquire the jindex lock here so we don't deadlock with a + process writing the the jindex inode. :-( */ + + error = gfs_jindex_hold(sdp, &ji_gh); + if (error) + goto fail; + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + goto fail_jindex_relse; + + for (rgd = gfs_rgrpd_get_first(sdp); + rgd; + rgd = gfs_rgrpd_get_next(rgd)) { + error = gfs_glock_nq_init(rgd->rd_gl, + LM_ST_EXCLUSIVE, GL_NOCACHE, + &rgd_gh); + if (error) + goto fail_rindex_relse; + + rgrp_verify(rgd); + + rg = &rgd->rd_rg; + + if (!rg->rg_freedi && !rg->rg_freemeta) { + gfs_glock_dq_uninit(&rgd_gh); + continue; + } + + gfs_mhc_zap(rgd); + gfs_depend_sync(rgd); + + error = gfs_lock_fs_check_clean(sdp, LM_ST_EXCLUSIVE, &t_gh); + if (error) + goto fail_gunlock_rg; + + error = gfs_trans_begin(sdp, rgd->rd_ri.ri_length, 0); + if (error) + goto fail_unlock_fs; + + next = rg->rg_freedi_list; + + for (x = rg->rg_freedi; x--;) { + GFS_ASSERT_RGRPD(next.no_formal_ino && + next.no_addr, rgd,); + + blkfree_internal(sdp, next.no_addr, 1, GFS_BLKST_FREE); + + error = gfs_dread(sdp, next.no_addr, rgd->rd_gl, + DIO_FORCE | DIO_START | DIO_WAIT, &bh); + if (error) + goto fail_end_trans; + + di = (struct gfs_dinode *)bh->b_data; + flags = di->di_flags; + flags = gfs32_to_cpu(flags); + GFS_ASSERT_RGRPD(flags & GFS_DIF_UNUSED, rgd,); + + gfs_inum_in(&next, (char *)&di->di_next_unused); + + brelse(bh); + + rg->rg_freedi--; + rg->rg_free++; + stats->rc_inodes++; + } + + GFS_ASSERT_RGRPD(!next.no_formal_ino && !next.no_addr, rgd,); + rg->rg_freedi_list = next; + + goal = 0; + for (x = rg->rg_freemeta; x--;) { + goal = blkalloc_internal(rgd, goal, + GFS_BLKST_FREEMETA, GFS_BLKST_FREE); + rg->rg_freemeta--; + rg->rg_free++; + stats->rc_metadata++; + } + + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); + gfs_rgrp_out(rg, rgd->rd_bh[0]->b_data); + + gfs_trans_end(sdp); + + gfs_glock_dq_uninit(&t_gh); + + gfs_glock_dq_uninit(&rgd_gh); + } + + gfs_glock_dq_uninit(&ri_gh); + + gfs_glock_dq_uninit(&ji_gh); + + return 0; + + fail_end_trans: + gfs_trans_end(sdp); + + fail_unlock_fs: + gfs_glock_dq_uninit(&t_gh); + + fail_gunlock_rg: + gfs_glock_dq_uninit(&rgd_gh); + + fail_rindex_relse: + gfs_glock_dq_uninit(&ri_gh); + + fail_jindex_relse: + gfs_glock_dq_uninit(&ji_gh); + + fail: + return error; +} diff -urN linux-orig/fs/gfs/rgrp.h linux-patched/fs/gfs/rgrp.h --- linux-orig/fs/gfs/rgrp.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/rgrp.h 2004-06-30 13:27:49.358707651 -0500 @@ -0,0 +1,75 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __RGRP_DOT_H__ +#define __RGRP_DOT_H__ + +void gfs_mhc_add(struct gfs_rgrpd *rgd, struct buffer_head **bh, + unsigned int num); +int gfs_mhc_fish(struct gfs_sbd *sdp, struct buffer_head *bh); +void gfs_mhc_zap(struct gfs_rgrpd *rgd); + +void gfs_depend_add(struct gfs_rgrpd *rgd, uint64_t formal_ino); +void gfs_depend_sync(struct gfs_rgrpd *rgd); + +struct gfs_rgrpd *gfs_blk2rgrpd(struct gfs_sbd *sdp, uint64_t blk); +struct gfs_rgrpd *gfs_rgrpd_get_first(struct gfs_sbd *sdp); +struct gfs_rgrpd *gfs_rgrpd_get_next(struct gfs_rgrpd *rgd); + +void gfs_clear_rgrpd(struct gfs_sbd *sdp); + +int gfs_rindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ri_gh); + +int gfs_rgrp_read(struct gfs_rgrpd *rgd); +void gfs_rgrp_relse(struct gfs_rgrpd *rgd); + +void gfs_rgrp_lvb_fill(struct gfs_rgrpd *rgd); +int gfs_rgrp_lvb_init(struct gfs_rgrpd *rgd); + +struct gfs_alloc *gfs_alloc_get(struct gfs_inode *ip); +void gfs_alloc_put(struct gfs_inode *ip); + +int gfs_inplace_reserve_i(struct gfs_inode *ip, + char *file, unsigned int line); +#define gfs_inplace_reserve(ip) \ +gfs_inplace_reserve_i((ip), __FILE__, __LINE__) + +void gfs_inplace_release(struct gfs_inode *ip); + +unsigned char gfs_get_block_type(struct gfs_rgrpd *rgd, uint64_t block); + +void gfs_blkalloc(struct gfs_inode *ip, uint64_t *block); +int gfs_metaalloc(struct gfs_inode *ip, uint64_t *block); +int gfs_dialloc(struct gfs_inode *dip, uint64_t *block); + +void gfs_blkfree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen); +void gfs_metafree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen); +void gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr); +void gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip); + +struct gfs_rgrp_list { + unsigned int rl_rgrps; + unsigned int rl_space; + struct gfs_rgrpd **rl_rgd; + struct gfs_holder *rl_ghs; +}; + +void gfs_rlist_add(struct gfs_sbd *sdp, struct gfs_rgrp_list *rlist, + uint64_t block); +void gfs_rlist_alloc(struct gfs_rgrp_list *rlist, unsigned int state, + int flags); +void gfs_rlist_free(struct gfs_rgrp_list *rlist); + +int gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats); + +#endif /* __RGRP_DOT_H__ */ diff -urN linux-orig/fs/gfs/super.c linux-patched/fs/gfs/super.c --- linux-orig/fs/gfs/super.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/super.c 2004-06-30 13:27:49.359707419 -0500 @@ -0,0 +1,1035 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "file.h" +#include "format.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "unlinked.h" + +/** + * gfs_init_tune_data - Fill in the struct gfs_tune (sd_tune) in the struct gfs_sbd. + * @sdp: the filesystem + * + */ + +void +gfs_init_tune_data(struct gfs_sbd *sdp) +{ + struct gfs_tune *gt = &sdp->sd_tune; + + gt->gt_tune_version = GFS_TUNE_VERSION; + + gt->gt_ilimit1 = 100; + gt->gt_ilimit1_tries = 3; + gt->gt_ilimit1_min = 1; + gt->gt_ilimit2 = 500; + gt->gt_ilimit2_tries = 10; + gt->gt_ilimit2_min = 3; + gt->gt_demote_secs = 300; + gt->gt_incore_log_blocks = 1024; + gt->gt_jindex_refresh_secs = 60; + gt->gt_depend_secs = 60; + gt->gt_scand_secs = 5; + gt->gt_recoverd_secs = 60; + gt->gt_logd_secs = 1; + gt->gt_quotad_secs = 5; + gt->gt_inoded_secs = 15; + gt->gt_quota_simul_sync = 64; + gt->gt_quota_warn_period = 10; + gt->gt_atime_quantum = 3600; + gt->gt_quota_quantum = 60; + gt->gt_quota_scale_num = 1; + gt->gt_quota_scale_den = 1; + gt->gt_quota_enforce = 1; + gt->gt_quota_account = 1; + gt->gt_new_files_jdata = 0; + gt->gt_new_files_directio = 0; + gt->gt_max_atomic_write = 4 << 20; + gt->gt_max_readahead = 1 << 18; + gt->gt_lockdump_size = 131072; + gt->gt_stall_secs = 600; + gt->gt_complain_secs = 10; + gt->gt_reclaim_limit = 5000; + gt->gt_entries_per_readdir = 32; + gt->gt_prefetch_secs = 10; + gt->gt_statfs_slots = 64; + gt->gt_max_mhc = 10000; +} + +/** + * gfs_check_sb - Check superblock + * @sdp: the filesystem + * @sb: The superblock + * @silent: Don't print a message if the check fails + * + * Checks the version code of the FS is one that we understand how to + * read and that the sizes of the various on-disk structures have not + * changed. + */ + +int +gfs_check_sb(struct gfs_sbd *sdp, struct gfs_sb *sb, int silent) +{ + unsigned int x; + + if (sb->sb_header.mh_magic != GFS_MAGIC || + sb->sb_header.mh_type != GFS_METATYPE_SB) { + if (!silent) + printk("GFS: not a GFS filesystem\n"); + return -EINVAL; + } + + /* If format numbers match exactly, we're done. */ + + if (sb->sb_fs_format == GFS_FORMAT_FS && + sb->sb_multihost_format == GFS_FORMAT_MULTI) + return 0; + + if (sb->sb_fs_format != GFS_FORMAT_FS) { + for (x = 0; gfs_old_fs_formats[x]; x++) + if (gfs_old_fs_formats[x] == sb->sb_fs_format) + break; + + if (!gfs_old_fs_formats[x]) { + printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n", + GFS_FORMAT_FS, GFS_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (sb->sb_multihost_format != GFS_FORMAT_MULTI) { + for (x = 0; gfs_old_multihost_formats[x]; x++) + if (gfs_old_multihost_formats[x] == sb->sb_multihost_format) + break; + + if (!gfs_old_multihost_formats[x]) { + printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n", + GFS_FORMAT_FS, GFS_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (!sdp->sd_args.ar_upgrade) { + printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n", + GFS_FORMAT_FS, GFS_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS: Use the \"upgrade\" mount option to upgrade the FS\n"); + printk("GFS: See the manual for more details\n"); + return -EINVAL; + } + + return 0; +} + +/** + * gfs_read_sb - Read super block + * @sdp: The GFS superblock + * @gl: the glock for the superblock (assumed to be held) + * @silent: Don't print message if mount fails + * + */ + +int +gfs_read_sb(struct gfs_sbd *sdp, struct gfs_glock *gl, int silent) +{ + struct buffer_head *bh; + uint32_t hash_blocks, ind_blocks, leaf_blocks; + uint32_t tmp_blocks; + uint64_t space = 0; + unsigned int x; + int error; + + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, + gl, DIO_FORCE | DIO_START | DIO_WAIT, &bh); + if (error) { + if (!silent) + printk("GFS: fsid=%s: can't read superblock\n", + sdp->sd_fsname); + return error; + } + + GFS_ASSERT_SBD(sizeof(struct gfs_sb) <= bh->b_size, sdp,); + + gfs_sb_in(&sdp->sd_sb, bh->b_data); + + brelse(bh); + + error = gfs_check_sb(sdp, &sdp->sd_sb, silent); + if (error) + return error; + + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - + GFS_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) / + sizeof(uint64_t); + sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_indirect)) / + sizeof(uint64_t); + sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header); + sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; + sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; + sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t); + + /* Compute maximum reservation required to add a entry to a directory */ + + hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS_DIR_MAX_DEPTH), + sdp->sd_jbsize); + + ind_blocks = 0; + for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { + tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs); + ind_blocks += tmp_blocks; + } + + leaf_blocks = 2 + GFS_DIR_MAX_DEPTH; + + sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; + + sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode); + sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; + for (x = 2;; x++) { + uint64_t d; + uint32_t m; + space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_heightsize[x - 1] || m) + break; + sdp->sd_heightsize[x] = space; + } + sdp->sd_max_height = x; + GFS_ASSERT_SBD(sdp->sd_max_height <= GFS_MAX_META_HEIGHT, sdp,); + + sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode); + sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; + for (x = 2;; x++) { + uint64_t d; + uint32_t m; + space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_jheightsize[x - 1] || m) + break; + sdp->sd_jheightsize[x] = space; + } + sdp->sd_max_jheight = x; + GFS_ASSERT_SBD(sdp->sd_max_jheight <= GFS_MAX_META_HEIGHT, sdp,); + + return 0; +} + +/** + * gfs_do_upgrade - upgrade a filesystem + * @sdp: The GFS superblock + * + */ + +int +gfs_do_upgrade(struct gfs_sbd *sdp, struct gfs_glock *sb_gl) +{ + struct gfs_holder ji_gh, t_gh, j_gh; + struct gfs_log_header lh; + struct buffer_head *bh; + unsigned int x; + int error; + + /* If format numbers match exactly, we're done. */ + + if (sdp->sd_sb.sb_fs_format == GFS_FORMAT_FS && + sdp->sd_sb.sb_multihost_format == GFS_FORMAT_MULTI) { + printk("GFS: fsid=%s: no upgrade necessary\n", + sdp->sd_fsname); + sdp->sd_args.ar_upgrade = FALSE; + return 0; + } + + error = gfs_jindex_hold(sdp, &ji_gh); + if (error) + goto fail; + + error = gfs_glock_nq_init(sdp->sd_trans_gl, + LM_ST_EXCLUSIVE, GL_NOCACHE, + &t_gh); + if (error) + goto fail_ji_relse; + + if (test_bit(SDF_ROFS, &sdp->sd_flags)) { + printk("GFS: fsid=%s: can't upgrade: read-only FS\n", + sdp->sd_fsname); + error = -EROFS; + goto fail_gunlock_tr; + } + + for (x = 0; x < sdp->sd_journals; x++) { + error = gfs_glock_nq_num(sdp, + sdp->sd_jindex[x].ji_addr, + &gfs_meta_glops, LM_ST_SHARED, + LM_FLAG_TRY | GL_NOCACHE, &j_gh); + switch (error) { + case 0: + break; + + case GLR_TRYFAILED: + printk("GFS: fsid=%s: journal %u is busy\n", + sdp->sd_fsname, x); + error = -EBUSY; + + default: + goto fail_gunlock_tr; + } + + error = gfs_find_jhead(sdp, &sdp->sd_jindex[x], + j_gh.gh_gl, &lh); + + gfs_glock_dq_uninit(&j_gh); + + if (error) + goto fail_gunlock_tr; + + if (!(lh.lh_flags & GFS_LOG_HEAD_UNMOUNT) || lh.lh_last_dump) { + printk("GFS: fsid=%s: journal %u is busy\n", + sdp->sd_fsname, x); + error = -EBUSY; + goto fail_gunlock_tr; + } + } + + /* We don't need to journal this change because we're changing + only one sector of one block. We definitely don't want to have + the journaling code running at this point. */ + + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, sb_gl, + DIO_START | DIO_WAIT, &bh); + if (error) + goto fail_gunlock_tr; + + gfs_sb_in(&sdp->sd_sb, bh->b_data); + + error = gfs_check_sb(sdp, &sdp->sd_sb, FALSE); + GFS_ASSERT_SBD(!error, sdp,); + + sdp->sd_sb.sb_fs_format = GFS_FORMAT_FS; + sdp->sd_sb.sb_multihost_format = GFS_FORMAT_MULTI; + + gfs_sb_out(&sdp->sd_sb, bh->b_data); + + set_bit(GLF_DIRTY, &sb_gl->gl_flags); + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT); + + brelse(bh); + + gfs_glock_dq_uninit(&t_gh); + + gfs_glock_dq_uninit(&ji_gh); + + if (!error) { + printk("GFS: fsid=%s: upgrade successful\n", + sdp->sd_fsname); + sdp->sd_args.ar_upgrade = FALSE; + } + + return error; + + fail_gunlock_tr: + gfs_glock_dq_uninit(&t_gh); + + fail_ji_relse: + gfs_glock_dq_uninit(&ji_gh); + + fail: + if (error == -EBUSY) + printk("GFS: fsid=%s: can't upgrade: the FS is still busy or contains dirty journals\n", + sdp->sd_fsname); + else + printk("GFS: fsid=%s: can't upgrade: %d\n", + sdp->sd_fsname, error); + + return error; +} + +/** + * clear_journalsi - Clear all the journal index information (without locking) + * @sdp: The GFS superblock + * + */ + +static void +clear_journalsi(struct gfs_sbd *sdp) +{ + if (sdp->sd_jindex) { + kfree(sdp->sd_jindex); + sdp->sd_jindex = NULL; + } + sdp->sd_journals = 0; +} + +/** + * gfs_clear_journals - Clear all the journal index information + * @sdp: The GFS superblock + * + */ + +void +gfs_clear_journals(struct gfs_sbd *sdp) +{ + down(&sdp->sd_jindex_lock); + clear_journalsi(sdp); + up(&sdp->sd_jindex_lock); +} + +/** + * gfs_ji_update - Update the journal index information + * @ip: The journal index inode + * + * Returns: 0 on success, error code otherwise + */ + +static int +gfs_ji_update(struct gfs_inode *ip) +{ + struct gfs_sbd *sdp = ip->i_sbd; + char buf[sizeof(struct gfs_jindex)]; + unsigned int j; + int error; + + GFS_ASSERT_SBD(!do_mod(ip->i_di.di_size, sizeof(struct gfs_jindex)), + sdp,); + + clear_journalsi(sdp); + + sdp->sd_jindex = gmalloc(ip->i_di.di_size); + memset(sdp->sd_jindex, 0, ip->i_di.di_size); + + for (j = 0;; j++) { + error = gfs_internal_read(ip, buf, + j * sizeof(struct gfs_jindex), + sizeof(struct gfs_jindex)); + if (!error) + break; + if (error != sizeof(struct gfs_jindex)) { + if (error > 0) + error = -EIO; + goto fail; + } + + gfs_jindex_in(sdp->sd_jindex + j, buf); + } + + GFS_ASSERT_SBD(j * sizeof(struct gfs_jindex) == ip->i_di.di_size, + sdp,); + + sdp->sd_journals = j; + sdp->sd_jiinode_vn = ip->i_gl->gl_vn; + + return 0; + + fail: + clear_journalsi(sdp); + return error; +} + +/** + * gfs_jindex_hold - Grab a lock on the jindex + * @sdp: The GFS superblock + * @ji_gh: the holder for the jindex glock + * + * This is very similar to the gfs_rindex_hold() function, except that + * in general we hold the jindex lock for longer periods of time and + * we grab it far less frequently (in general) then the rgrp lock. + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_jindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ji_gh) +{ + struct gfs_inode *ip = sdp->sd_jiinode; + struct gfs_glock *gl = ip->i_gl; + int error; + + error = gfs_glock_nq_init(gl, LM_ST_SHARED, 0, ji_gh); + if (error) + return error; + + if (sdp->sd_jiinode_vn != gl->gl_vn) { + down(&sdp->sd_jindex_lock); + if (sdp->sd_jiinode_vn != gl->gl_vn) + error = gfs_ji_update(ip); + up(&sdp->sd_jindex_lock); + } + + if (error) + gfs_glock_dq_uninit(ji_gh); + + return error; +} + +/** + * gfs_get_jiinode - Read in the jindex inode for the superblock + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_get_jiinode(struct gfs_sbd *sdp) +{ + struct gfs_holder ji_gh; + int error; + + error = gfs_glock_nq_num(sdp, + sdp->sd_sb.sb_jindex_di.no_formal_ino, + &gfs_inode_glops, + LM_ST_SHARED, GL_LOCAL_EXCL, + &ji_gh); + if (error) + return error; + + error = gfs_inode_get(ji_gh.gh_gl, &sdp->sd_sb.sb_jindex_di, + CREATE, &sdp->sd_jiinode); + if (!error) { + sdp->sd_jiinode_vn = ji_gh.gh_gl->gl_vn - 1; + set_bit(GLF_STICKY, &ji_gh.gh_gl->gl_flags); + } + + gfs_glock_dq_uninit(&ji_gh); + + return error; +} + +/** + * gfs_get_riinode - Read in the rindex inode for the superblock + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_get_riinode(struct gfs_sbd *sdp) +{ + struct gfs_holder ri_gh; + int error; + + error = gfs_glock_nq_num(sdp, + sdp->sd_sb.sb_rindex_di.no_formal_ino, + &gfs_inode_glops, + LM_ST_SHARED, GL_LOCAL_EXCL, + &ri_gh); + if (error) + return error; + + error = gfs_inode_get(ri_gh.gh_gl, &sdp->sd_sb.sb_rindex_di, + CREATE, &sdp->sd_riinode); + if (!error) { + sdp->sd_riinode_vn = ri_gh.gh_gl->gl_vn - 1; + set_bit(GLF_STICKY, &ri_gh.gh_gl->gl_flags); + } + + gfs_glock_dq_uninit(&ri_gh); + + return error; +} + +/** + * gfs_get_rootinode - Read in the root inode + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_get_rootinode(struct gfs_sbd *sdp) +{ + struct gfs_holder i_gh; + int error; + + error = gfs_glock_nq_num(sdp, + sdp->sd_sb.sb_root_di.no_formal_ino, + &gfs_inode_glops, + LM_ST_SHARED, GL_LOCAL_EXCL, + &i_gh); + if (error) + return error; + + error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_root_di, + CREATE, &sdp->sd_rooti); + + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_get_qinode - Read in the quota inode + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_get_qinode(struct gfs_sbd *sdp) +{ + struct gfs_holder i_gh; + int error; + + if (!sdp->sd_sb.sb_quota_di.no_formal_ino) { + error = gfs_alloc_qinode(sdp); + if (error) + return error; + } + + error = gfs_glock_nq_num(sdp, + sdp->sd_sb.sb_quota_di.no_formal_ino, + &gfs_inode_glops, + LM_ST_SHARED, GL_LOCAL_EXCL, + &i_gh); + if (error) + return error; + + error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_quota_di, + CREATE, &sdp->sd_qinode); + + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_get_linode - Read in the quota inode + * @sdp: The GFS superblock + * + * Returns: 0 on success, error code otherwise + */ + +int +gfs_get_linode(struct gfs_sbd *sdp) +{ + struct gfs_holder i_gh; + int error; + + if (!sdp->sd_sb.sb_license_di.no_formal_ino) { + error = gfs_alloc_linode(sdp); + if (error) + return error; + } + + error = gfs_glock_nq_num(sdp, + sdp->sd_sb.sb_license_di.no_formal_ino, + &gfs_inode_glops, + LM_ST_SHARED, GL_LOCAL_EXCL, + &i_gh); + if (error) + return error; + + error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_license_di, + CREATE, &sdp->sd_linode); + + gfs_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs_make_fs_rw - Turn a RO FS into a RW one + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_make_fs_rw(struct gfs_sbd *sdp) +{ + struct gfs_glock *j_gl = sdp->sd_journal_gh.gh_gl; + struct gfs_holder t_gh; + struct gfs_log_header head; + int error; + + error = gfs_glock_nq_init(sdp->sd_trans_gl, + LM_ST_SHARED, + GL_LOCAL_EXCL | GL_EXACT, + &t_gh); + if (error) + return error; + + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + + error = gfs_find_jhead(sdp, &sdp->sd_jdesc, j_gl, &head); + if (error) + goto fail; + + GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,); + + /* Initialize some head of the log stuff */ + sdp->sd_sequence = head.lh_sequence; + sdp->sd_log_head = head.lh_first + 1; + + error = gfs_recover_dump(sdp); + if (error) + goto fail; + + set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + clear_bit(SDF_ROFS, &sdp->sd_flags); + + set_bit(GLF_DIRTY, &j_gl->gl_flags); + gfs_log_dump(sdp, TRUE); + + gfs_glock_dq_uninit(&t_gh); + + return 0; + + fail: + t_gh.gh_flags |= GL_NOCACHE; + gfs_glock_dq_uninit(&t_gh); + + return error; +} + +/** + * gfs_make_fs_ro - Turn a RW FS into a RO one + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_make_fs_ro(struct gfs_sbd *sdp) +{ + struct gfs_holder t_gh; + int error; + + error = gfs_glock_nq_init(sdp->sd_trans_gl, + LM_ST_SHARED, + GL_LOCAL_EXCL | GL_EXACT | GL_NOCACHE, + &t_gh); + if (error) + return error; + + gfs_sync_meta(sdp); + gfs_log_dump(sdp, TRUE); + + error = gfs_log_shutdown(sdp); + if (error) + gfs_io_error(sdp); + + set_bit(SDF_ROFS, &sdp->sd_flags); + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + gfs_glock_dq_uninit(&t_gh); + + gfs_unlinked_cleanup(sdp); + gfs_quota_cleanup(sdp); + + return error; +} + +/** + * stat_gfs_async - Stat a filesystem using asynchronous locking + * @sdp: the filesystem + * @usage: the usage info that will be returned + * @interruptible: TRUE if we should look for signals. + * + * Any error (other than a signal) will cause this routine to fall back + * to the synchronous version. + * + * This really shouldn't busy wait like this. + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +stat_gfs_async(struct gfs_sbd *sdp, struct gfs_usage *usage, int interruptible) +{ + struct gfs_rgrpd *rgd_next = gfs_rgrpd_get_first(sdp), *rgd; + struct gfs_holder *gha, *gh; + struct gfs_rgrp_lvb *rb; + unsigned int slots = sdp->sd_tune.gt_statfs_slots; + unsigned int x; + int done; + int error = 0, err; + + gha = gmalloc(slots * sizeof(struct gfs_holder)); + memset(gha, 0, slots * sizeof(struct gfs_holder)); + + for (;;) { + done = TRUE; + + for (x = 0; x < slots; x++) { + gh = gha + x; + + if (gh->gh_gl && gfs_glock_poll(gh)) { + err = gfs_glock_wait(gh); + if (err) { + gfs_holder_uninit(gh); + error = err; + } else { + rgd = gl2rgd(gh->gh_gl); + + rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb; + if (gfs32_to_cpu(rb->rb_magic) == GFS_MAGIC && + !test_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags)) { + usage->gu_total_blocks += rgd->rd_ri.ri_data; + usage->gu_free += gfs32_to_cpu(rb->rb_free); + usage->gu_used_dinode += gfs32_to_cpu(rb->rb_useddi); + usage->gu_free_dinode += gfs32_to_cpu(rb->rb_freedi); + usage->gu_used_meta += gfs32_to_cpu(rb->rb_usedmeta); + usage->gu_free_meta += gfs32_to_cpu(rb->rb_freemeta); + } else + error = -EINVAL; + + gfs_glock_dq_uninit(gh); + } + } + + if (gh->gh_gl) + done = FALSE; + else if (rgd_next && !error) { + gfs_glock_nq_init(rgd_next->rd_gl, + LM_ST_SHARED, + GL_LOCAL_EXCL | GL_SKIP | GL_ASYNC, + gh); + rgd_next = gfs_rgrpd_get_next(rgd_next); + done = FALSE; + } + + if (interruptible && signal_pending(current)) + error = -ERESTARTSYS; + } + + if (done) + break; + + yield(); + } + + kfree(gha); + + return error; +} + +/** + * gfs_stat_gfs - Do a statfs + * @sdp: the filesystem + * @usage: the usage structure + * @interruptible: Stop if there is a signal pending + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_stat_gfs(struct gfs_sbd *sdp, struct gfs_usage *usage, int interruptible) +{ + struct gfs_holder ri_gh, rgd_gh; + struct gfs_rgrpd *rgd; + struct gfs_rgrp_lvb *rb; + int error; + + memset(usage, 0, sizeof(struct gfs_usage)); + usage->gu_block_size = sdp->sd_sb.sb_bsize; + + error = gfs_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + if (GFS_ASYNC_LM(sdp)) { + error = stat_gfs_async(sdp, usage, interruptible); + if (!error || error == -ERESTARTSYS) + goto out; + + memset(usage, 0, sizeof(struct gfs_usage)); + usage->gu_block_size = sdp->sd_sb.sb_bsize; + } + + for (rgd = gfs_rgrpd_get_first(sdp); + rgd; + rgd = gfs_rgrpd_get_next(rgd)) { + for (;;) { + error = gfs_glock_nq_init(rgd->rd_gl, + LM_ST_SHARED, + GL_LOCAL_EXCL | GL_SKIP, + &rgd_gh); + if (error) + goto out; + + rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb; + if (gfs32_to_cpu(rb->rb_magic) == GFS_MAGIC && + !test_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags)) { + usage->gu_total_blocks += rgd->rd_ri.ri_data; + usage->gu_free += gfs32_to_cpu(rb->rb_free); + usage->gu_used_dinode += gfs32_to_cpu(rb->rb_useddi); + usage->gu_free_dinode += gfs32_to_cpu(rb->rb_freedi); + usage->gu_used_meta += gfs32_to_cpu(rb->rb_usedmeta); + usage->gu_free_meta += gfs32_to_cpu(rb->rb_freemeta); + + gfs_glock_dq_uninit(&rgd_gh); + + break; + } else { + gfs_glock_dq_uninit(&rgd_gh); + + error = gfs_rgrp_lvb_init(rgd); + if (error) + goto out; + } + } + + if (interruptible && signal_pending(current)) { + error = -ERESTARTSYS; + goto out; + } + } + + out: + gfs_glock_dq_uninit(&ri_gh); + + return error; +} + +/** + * gfs_lock_fs_check_clean - Stop all writes to the FS and check that all journals are clean + * @sdp: the file system + * @state: the state to put the transaction lock into + * @t_gh: the hold on the transaction lock + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_lock_fs_check_clean(struct gfs_sbd *sdp, unsigned int state, + struct gfs_holder *t_gh) +{ + struct gfs_holder ji_gh, cl_gh; + struct gfs_log_header lh; + unsigned int x; + int error; + + error = gfs_jindex_hold(sdp, &ji_gh); + if (error) + return error; + + error = gfs_glock_nq_num(sdp, + GFS_CRAP_LOCK, &gfs_meta_glops, + LM_ST_SHARED, GL_NOCACHE, + &cl_gh); + if (error) + goto fail; + + error = gfs_glock_nq_init(sdp->sd_trans_gl, state, + LM_FLAG_PRIORITY | GL_EXACT | GL_NOCACHE, + t_gh); + if (error) + goto fail_gunlock_craplock; + + for (x = 0; x < sdp->sd_journals; x++) { + error = gfs_find_jhead(sdp, &sdp->sd_jindex[x], + cl_gh.gh_gl, &lh); + if (error) + goto fail_gunlock_trans; + + if (!(lh.lh_flags & GFS_LOG_HEAD_UNMOUNT)) { + error = -EBUSY; + goto fail_gunlock_trans; + } + } + + gfs_glock_dq_uninit(&cl_gh); + gfs_glock_dq_uninit(&ji_gh); + + return 0; + + fail_gunlock_trans: + gfs_glock_dq_uninit(t_gh); + + fail_gunlock_craplock: + gfs_glock_dq_uninit(&cl_gh); + + fail: + gfs_glock_dq_uninit(&ji_gh); + + return error; +} + +/** + * gfs_freeze_fs - freezes the file system + * @sdp: the file system + * + * This function flushes data and meta data for all machines by + * aquiring the transaction log exclusively. All journals are + * ensured to be in a clean state as well. + * + * Returns: 0 on success, -EXXX on error + */ + +int +gfs_freeze_fs(struct gfs_sbd *sdp) +{ + int error = 0; + + down(&sdp->sd_freeze_lock); + + if (!sdp->sd_freeze_count++) { + error = gfs_lock_fs_check_clean(sdp, LM_ST_DEFERRED, + &sdp->sd_freeze_gh); + if (error) + sdp->sd_freeze_count--; + else + sdp->sd_freeze_gh.gh_owner = NULL; + } + + up(&sdp->sd_freeze_lock); + + return error; +} + +/** + * gfs_unfreeze_fs - unfreezes the file system + * @sdp: the file system + * + * This function allows the file system to proceed by unlocking + * the exclusively held transaction lock. Other GFS nodes are + * now free to acquire the lock shared and go on with their lives. + * + */ + +void +gfs_unfreeze_fs(struct gfs_sbd *sdp) +{ + down(&sdp->sd_freeze_lock); + + if (sdp->sd_freeze_count && !--sdp->sd_freeze_count) + gfs_glock_dq_uninit(&sdp->sd_freeze_gh); + + up(&sdp->sd_freeze_lock); +} diff -urN linux-orig/fs/gfs/super.h linux-patched/fs/gfs/super.h --- linux-orig/fs/gfs/super.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/super.h 2004-06-30 13:27:49.359707419 -0500 @@ -0,0 +1,53 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __SUPER_DOT_H__ +#define __SUPER_DOT_H__ + +void gfs_init_tune_data(struct gfs_sbd *sdp); + +int gfs_check_sb(struct gfs_sbd *sdp, struct gfs_sb *sb, int silent); +int gfs_read_sb(struct gfs_sbd *sdp, struct gfs_glock *gl, int silent); +int gfs_do_upgrade(struct gfs_sbd *sdp, struct gfs_glock *gl_sb); + +static __inline__ unsigned int +gfs_num_journals(struct gfs_sbd *sdp) +{ + unsigned int num; + down(&sdp->sd_jindex_lock); + num = sdp->sd_journals; + up(&sdp->sd_jindex_lock); + return num; +} + +int gfs_jindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ji_gh); +void gfs_clear_journals(struct gfs_sbd *sdp); + +int gfs_get_jiinode(struct gfs_sbd *sdp); +int gfs_get_riinode(struct gfs_sbd *sdp); +int gfs_get_rootinode(struct gfs_sbd *sdp); +int gfs_get_qinode(struct gfs_sbd *sdp); +int gfs_get_linode(struct gfs_sbd *sdp); + +int gfs_make_fs_rw(struct gfs_sbd *sdp); +int gfs_make_fs_ro(struct gfs_sbd *sdp); + +int gfs_stat_gfs(struct gfs_sbd *sdp, struct gfs_usage *usage, + int interruptible); + +int gfs_lock_fs_check_clean(struct gfs_sbd *sdp, unsigned int state, + struct gfs_holder *t_gh); +int gfs_freeze_fs(struct gfs_sbd *sdp); +void gfs_unfreeze_fs(struct gfs_sbd *sdp); + +#endif /* __SUPER_DOT_H__ */ diff -urN linux-orig/fs/gfs/trans.c linux-patched/fs/gfs/trans.c --- linux-orig/fs/gfs/trans.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/trans.c 2004-06-30 13:27:49.359707419 -0500 @@ -0,0 +1,410 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "dio.h" +#include "glock.h" +#include "log.h" +#include "lops.h" +#include "quota.h" +#include "trans.h" +#include "unlinked.h" + +/** + * gfs_trans_print - Print a transaction to the console + * @sdp: the filesystem + * @tr: The GFS transaction + * @where: Situation of transaction + * + */ + +void +gfs_trans_print(struct gfs_sbd *sdp, struct gfs_trans *tr, unsigned int where) +{ + struct gfs_log_element *le; + struct list_head *tmp, *head; + unsigned int mblks = 0, eblks = 0; + + LO_TRANS_SIZE(sdp, tr, &mblks, &eblks, NULL, NULL); + + printk("Transaction: (%s, %u)\n", tr->tr_file, tr->tr_line); + printk(" tr_mblks_asked = %u, tr_eblks_asked = %u, tr_seg_reserved = %u\n", + tr->tr_mblks_asked, tr->tr_eblks_asked, tr->tr_seg_reserved); + printk(" mblks = %u, eblks = %u\n", mblks, eblks); + printk(" tr_flags = 0x%.8X\n", tr->tr_flags); + + for (head = &tr->tr_elements, tmp = head->next; + tmp != head; + tmp = tmp->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + LO_PRINT(sdp, le, where); + } + + printk("End Trans\n"); +} + +/** + * gfs_trans_begin_i - Perpare to start a transaction + * @sdp: The GFS superblock + * @meta_blocks: Reserve this many metadata blocks in the log + * @extra_blocks: Number of non-metadata blocks to reserve + * + * Allocate the struct gfs_trans struct. Do in-place and + * log reservations. + * + * Returns: 0 on success, -EXXX on failure + */ + +int +gfs_trans_begin_i(struct gfs_sbd *sdp, + unsigned int meta_blocks, unsigned int extra_blocks, + char *file, unsigned int line) +{ + struct gfs_trans *tr; + unsigned int blocks; + int error; + + tr = gmalloc(sizeof(struct gfs_trans)); + memset(tr, 0, sizeof(struct gfs_trans)); + + INIT_LIST_HEAD(&tr->tr_elements); + INIT_LIST_HEAD(&tr->tr_free_bufs); + INIT_LIST_HEAD(&tr->tr_free_bmem); + INIT_LIST_HEAD(&tr->tr_bufs); + INIT_LIST_HEAD(&tr->tr_ail_bufs); + + tr->tr_file = file; + tr->tr_line = line; + tr->tr_t_gh = gfs_holder_get(sdp->sd_trans_gl, LM_ST_SHARED, 0); + + error = gfs_glock_nq(tr->tr_t_gh); + if (error) + goto fail; + + if (test_bit(SDF_ROFS, &sdp->sd_flags)) { + tr->tr_t_gh->gh_flags |= GL_NOCACHE; + error = -EROFS; + goto fail_gunlock; + } + + /* Do log reservation */ + + tr->tr_mblks_asked = meta_blocks; + tr->tr_eblks_asked = extra_blocks; + + blocks = 1; + if (meta_blocks) + blocks += gfs_struct2blk(sdp, meta_blocks, + sizeof(struct gfs_block_tag)) + + meta_blocks; + blocks += extra_blocks; + tr->tr_seg_reserved = gfs_blk2seg(sdp, blocks); + + error = gfs_log_reserve(sdp, tr->tr_seg_reserved, FALSE); + if (error) + goto fail_gunlock; + + GFS_ASSERT_SBD(!current_transaction, sdp,); + current_transaction = tr; + + return 0; + + fail_gunlock: + gfs_glock_dq(tr->tr_t_gh); + + fail: + gfs_holder_put(tr->tr_t_gh); + kfree(tr); + + return error; +} + +/** + * gfs_trans_end - End a transaction + * @sdp: The GFS superblock + * + * If buffers were actually added to the transaction, + * commit it. + */ + +void +gfs_trans_end(struct gfs_sbd *sdp) +{ + struct gfs_trans *tr; + struct gfs_holder *t_gh; + struct list_head *tmp, *head; + struct gfs_log_element *le; + + tr = current_transaction; + GFS_ASSERT_SBD(tr, sdp,); + current_transaction = NULL; + + t_gh = tr->tr_t_gh; + tr->tr_t_gh = NULL; + + if (list_empty(&tr->tr_elements)) { + gfs_log_release(sdp, tr->tr_seg_reserved); + kfree(tr); + + gfs_glock_dq(t_gh); + gfs_holder_put(t_gh); + + return; + } + + for (head = &tr->tr_elements, tmp = head->next; + tmp != head; + tmp = tmp->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + LO_TRANS_END(sdp, le); + } + + gfs_log_commit(sdp, tr); + + gfs_glock_dq(t_gh); + gfs_holder_put(t_gh); + + if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) + gfs_log_flush(sdp); +} + +/** + * gfs_trans_add_gl - Add a glock to a transaction + * @gl: the glock + * + * Add the given glock to this process's transaction + */ + +void +gfs_trans_add_gl(struct gfs_glock *gl) +{ + if (!gl->gl_new_le.le_trans) { + GFS_ASSERT_GLOCK(gfs_glock_is_locked_by_me(gl) && + gfs_glock_is_held_excl(gl), gl,); + gfs_glock_hold(gl); /* Released in glock_trans_end() */ + + set_bit(GLF_DIRTY, &gl->gl_flags); + + LO_ADD(gl->gl_sbd, &gl->gl_new_le); + gl->gl_new_le.le_trans->tr_num_gl++; + } +} + +/** + * gfs_trans_add_bh - Add a buffer to the current transaction + * @gl: the glock the buffer belongs to + * @bh: The buffer to add + * + * Add a buffer to the current transaction. The glock for the buffer + * should be held. This pins the buffer as well. + * + * Call this as many times as you want during transaction formation. + * It only does its work once. + * + */ + +void +gfs_trans_add_bh(struct gfs_glock *gl, struct buffer_head *bh) +{ + struct gfs_sbd *sdp = gl->gl_sbd; + struct gfs_bufdata *bd; + + bd = bh2bd(bh); + if (!bd) { + gfs_attach_bufdata(bh, gl); + bd = bh2bd(bh); + } + + if (bd->bd_new_le.le_trans) + return; + + gfs_meta_check(sdp, bh); + + GFS_ASSERT_GLOCK(bd->bd_gl == gl, gl,); + + if (!gl->gl_new_le.le_trans) + gfs_trans_add_gl(gl); + + gfs_dpin(sdp, bh); + + LO_ADD(sdp, &bd->bd_new_le); + bd->bd_new_le.le_trans->tr_num_buf++; +} + +/** + * gfs_trans_add_unlinked - Add a unlinked/dealloced tag to the current transaction + * @sdp: the filesystem + * @type: the type of entry + * @inum: the inode number + * + * Returns: the unlinked structure + */ + +struct gfs_unlinked * +gfs_trans_add_unlinked(struct gfs_sbd *sdp, unsigned int type, + struct gfs_inum *inum) +{ + struct gfs_unlinked *ul; + + ul = gfs_unlinked_get(sdp, inum, CREATE); + + LO_ADD(sdp, &ul->ul_new_le); + + switch (type) { + case GFS_LOG_DESC_IUL: + set_bit(ULF_NEW_UL, &ul->ul_flags); + ul->ul_new_le.le_trans->tr_num_iul++; + break; + case GFS_LOG_DESC_IDA: + clear_bit(ULF_NEW_UL, &ul->ul_flags); + ul->ul_new_le.le_trans->tr_num_ida++; + break; + default: + GFS_ASSERT_SBD(FALSE, sdp,); + break; + } + + return ul; +} + +/** + * gfs_trans_add_quota - Add quota changes to a transaction + * @sdp: the filesystem + * @change: The number of blocks allocated (positive) or freed (negative) + * @uid: the user ID doing the change + * @gid: the group ID doing the change + * + */ + +void +gfs_trans_add_quota(struct gfs_sbd *sdp, int64_t change, + uint32_t uid, uint32_t gid) +{ + struct gfs_trans *tr; + struct list_head *tmp, *head, *next; + struct gfs_log_element *le; + struct gfs_quota_le *ql; + int found_uid, found_gid; + int error; + + if (!sdp->sd_tune.gt_quota_account) + return; + + GFS_ASSERT_SBD(change, sdp,); + + found_uid = (uid == NO_QUOTA_CHANGE); + found_gid = (gid == NO_QUOTA_CHANGE); + + GFS_ASSERT_SBD(!found_uid || !found_gid, sdp,); + + tr = current_transaction; + GFS_ASSERT_SBD(tr, sdp,); + + for (head = &tr->tr_elements, tmp = head->next, next = tmp->next; + tmp != head; + tmp = next, next = next->next) { + le = list_entry(tmp, struct gfs_log_element, le_list); + if (le->le_ops != &gfs_quota_lops) + continue; + + ql = container_of(le, struct gfs_quota_le, ql_le); + + if (test_bit(QDF_USER, &ql->ql_data->qd_flags)) { + if (ql->ql_data->qd_id == uid) { + ql->ql_change += change; + + spin_lock(&sdp->sd_quota_lock); + ql->ql_data->qd_change_new += change; + spin_unlock(&sdp->sd_quota_lock); + + list_del(&le->le_list); + + if (ql->ql_change) + list_add(&le->le_list, + &tr->tr_elements); + else { + gfs_quota_put(sdp, ql->ql_data); + kfree(ql); + tr->tr_num_q--; + } + + GFS_ASSERT_SBD(!found_uid, sdp,); + found_uid = TRUE; + if (found_gid) + break; + } + } else { + if (ql->ql_data->qd_id == gid) { + ql->ql_change += change; + + spin_lock(&sdp->sd_quota_lock); + ql->ql_data->qd_change_new += change; + spin_unlock(&sdp->sd_quota_lock); + + list_del(&le->le_list); + + if (ql->ql_change) + list_add(&le->le_list, + &tr->tr_elements); + else { + gfs_quota_put(sdp, ql->ql_data); + kfree(ql); + tr->tr_num_q--; + } + + GFS_ASSERT_SBD(!found_gid, sdp,); + found_gid = TRUE; + if (found_uid) + break; + } + } + } + + while (!found_uid || !found_gid) { + ql = gmalloc(sizeof(struct gfs_quota_le)); + memset(ql, 0, sizeof(struct gfs_quota_le)); + + INIT_LE(&ql->ql_le, &gfs_quota_lops); + + if (found_uid) { + error = gfs_quota_get(sdp, FALSE, gid, + NO_CREATE, + &ql->ql_data); + found_gid = TRUE; + } else { + error = gfs_quota_get(sdp, TRUE, uid, + NO_CREATE, + &ql->ql_data); + found_uid = TRUE; + } + + GFS_ASSERT_SBD(!error && ql->ql_data, sdp,); + + ql->ql_change = change; + + spin_lock(&sdp->sd_quota_lock); + ql->ql_data->qd_change_new += change; + spin_unlock(&sdp->sd_quota_lock); + + LO_ADD(sdp, &ql->ql_le); + tr->tr_num_q++; + } +} diff -urN linux-orig/fs/gfs/trans.h linux-patched/fs/gfs/trans.h --- linux-orig/fs/gfs/trans.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/trans.h 2004-06-30 13:27:49.359707419 -0500 @@ -0,0 +1,37 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __TRANS_DOT_H__ +#define __TRANS_DOT_H__ + +#define TRANS_IS_NEW (53) +#define TRANS_IS_INCORE (54) +void gfs_trans_print(struct gfs_sbd *sdp, struct gfs_trans *tr, + unsigned int where); + +int gfs_trans_begin_i(struct gfs_sbd *sdp, + unsigned int meta_blocks, unsigned int extra_blocks, + char *file, unsigned int line); +#define gfs_trans_begin(sdp, mb, eb) \ +gfs_trans_begin_i((sdp), (mb), (eb), __FILE__, __LINE__) + +void gfs_trans_end(struct gfs_sbd *sdp); + +void gfs_trans_add_gl(struct gfs_glock *gl); +void gfs_trans_add_bh(struct gfs_glock *gl, struct buffer_head *bh); +struct gfs_unlinked *gfs_trans_add_unlinked(struct gfs_sbd *sdp, unsigned int type, + struct gfs_inum *inum); +void gfs_trans_add_quota(struct gfs_sbd *sdp, int64_t change, uint32_t uid, + uint32_t gid); + +#endif /* __TRANS_DOT_H__ */ diff -urN linux-orig/fs/gfs/unlinked.c linux-patched/fs/gfs/unlinked.c --- linux-orig/fs/gfs/unlinked.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/unlinked.c 2004-06-30 13:27:49.360707187 -0500 @@ -0,0 +1,427 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "inode.h" +#include "log.h" +#include "lops.h" +#include "unlinked.h" + +/** + * gfs_unlinked_get - Get a structure to represent an unlinked inode + * @sdp: the filesystem + * @inum: the inode that's unlinked + * @create: if TRUE, create the structure, otherwise return NULL + * + * Returns: the structure, or NULL + */ + +struct gfs_unlinked * +gfs_unlinked_get(struct gfs_sbd *sdp, struct gfs_inum *inum, int create) +{ + struct gfs_unlinked *ul = NULL, *new_ul = NULL; + struct list_head *tmp, *head; + + for (;;) { + spin_lock(&sdp->sd_unlinked_lock); + + for (head = &sdp->sd_unlinked_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + ul = list_entry(tmp, struct gfs_unlinked, ul_list); + if (gfs_inum_equal(&ul->ul_inum, inum)) { + ul->ul_count++; + break; + } + } + + if (tmp == head) + ul = NULL; + + if (!ul && new_ul) { + ul = new_ul; + list_add(&ul->ul_list, &sdp->sd_unlinked_list); + new_ul = NULL; + } + + spin_unlock(&sdp->sd_unlinked_lock); + + if (ul || !create) { + if (new_ul) + kfree(new_ul); + return ul; + } + + new_ul = gmalloc(sizeof(struct gfs_unlinked)); + memset(new_ul, 0, sizeof(struct gfs_unlinked)); + + new_ul->ul_count = 1; + new_ul->ul_inum = *inum; + + INIT_LE(&new_ul->ul_new_le, &gfs_unlinked_lops); + INIT_LE(&new_ul->ul_incore_le, &gfs_unlinked_lops); + INIT_LE(&new_ul->ul_ondisk_le, &gfs_unlinked_lops); + } +} + +/** + * gfs_unlinked_hold - increment the usage count on a struct gfs_unlinked + * @sdp: the filesystem + * @ul: the structure + * + */ + +void +gfs_unlinked_hold(struct gfs_sbd *sdp, struct gfs_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_lock); + ul->ul_count++; + spin_unlock(&sdp->sd_unlinked_lock); +} + +/** + * gfs_unlinked_put - decrement the usage count on a struct gfs_unlinked + * @sdp: the filesystem + * @ul: the structure + * + * Free the structure if its reference count hits zero. + * + */ + +void +gfs_unlinked_put(struct gfs_sbd *sdp, struct gfs_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_lock); + + GFS_ASSERT_SBD(ul->ul_count, sdp,); + ul->ul_count--; + + if (!ul->ul_count) { + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) && + !test_bit(ULF_OD_LIST, &ul->ul_flags) && + !test_bit(ULF_LOCK, &ul->ul_flags), + sdp,); + list_del(&ul->ul_list); + spin_unlock(&sdp->sd_unlinked_lock); + kfree(ul); + } else + spin_unlock(&sdp->sd_unlinked_lock); +} + +/** + * unlinked_find - Find a inode to try to deallocate + * @sdp: the filesystem + * + * The returned structure is locked and needs to be unlocked + * with gfs_unlinked_unlock(). + * + * Returns: A unlinked structure, or NULL + */ + +struct gfs_unlinked * +unlinked_find(struct gfs_sbd *sdp) +{ + struct list_head *tmp, *head; + struct gfs_unlinked *ul = NULL; + + if (test_bit(SDF_ROFS, &sdp->sd_flags)) + return NULL; + + gfs_log_lock(sdp); + spin_lock(&sdp->sd_unlinked_lock); + + if (!atomic_read(&sdp->sd_unlinked_ic_count)) + goto out; + + for (head = &sdp->sd_unlinked_list, tmp = head->next; + tmp != head; + tmp = tmp->next) { + ul = list_entry(tmp, struct gfs_unlinked, ul_list); + + if (test_bit(ULF_LOCK, &ul->ul_flags)) + continue; + if (!test_bit(ULF_IC_LIST, &ul->ul_flags)) + continue; + + list_move_tail(&ul->ul_list, &sdp->sd_unlinked_list); + + set_bit(ULF_LOCK, &ul->ul_flags); + ul->ul_count++; + + goto out; + } + + ul = NULL; + + out: + spin_unlock(&sdp->sd_unlinked_lock); + gfs_log_unlock(sdp); + + return ul; +} + +/** + * gfs_unlinked_lock - lock a unlinked structure + * @sdp: the filesystem + * @ul: the unlinked inode structure + * + */ + +void +gfs_unlinked_lock(struct gfs_sbd *sdp, struct gfs_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_lock); + + GFS_ASSERT_SBD(!test_bit(ULF_LOCK, &ul->ul_flags), sdp,); + set_bit(ULF_LOCK, &ul->ul_flags); + + ul->ul_count++; + + spin_unlock(&sdp->sd_unlinked_lock); +} + +/** + * gfs_unlinked_unlock - drop and a reference on a unlinked structure + * @sdp: the filesystem + * @ul: the unlinked inode structure + * + */ + +void +gfs_unlinked_unlock(struct gfs_sbd *sdp, struct gfs_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_lock); + + GFS_ASSERT_SBD(test_bit(ULF_LOCK, &ul->ul_flags), sdp,); + clear_bit(ULF_LOCK, &ul->ul_flags); + + GFS_ASSERT_SBD(ul->ul_count, sdp,); + ul->ul_count--; + + if (!ul->ul_count) { + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) && + !test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,); + list_del(&ul->ul_list); + spin_unlock(&sdp->sd_unlinked_lock); + kfree(ul); + } else + spin_unlock(&sdp->sd_unlinked_lock); +} + +/** + * gfs_unlinked_merge - add/remove a unlinked inode from the in-memory list + * @sdp: the filesystem + * @type: is this a unlink tag or a dealloc tag + * @inum: the inode number + * + */ + +void +gfs_unlinked_merge(struct gfs_sbd *sdp, unsigned int type, + struct gfs_inum *inum) +{ + struct gfs_unlinked *ul; + + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count) == + atomic_read(&sdp->sd_unlinked_od_count), sdp,); + + ul = gfs_unlinked_get(sdp, inum, CREATE); + + gfs_log_lock(sdp); + + switch (type) { + case GFS_LOG_DESC_IUL: + gfs_unlinked_hold(sdp, ul); + gfs_unlinked_hold(sdp, ul); + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) && + !test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,); + set_bit(ULF_IC_LIST, &ul->ul_flags); + set_bit(ULF_OD_LIST, &ul->ul_flags); + atomic_inc(&sdp->sd_unlinked_ic_count); + atomic_inc(&sdp->sd_unlinked_od_count); + + break; + + case GFS_LOG_DESC_IDA: + GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags) && + test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,); + clear_bit(ULF_IC_LIST, &ul->ul_flags); + clear_bit(ULF_OD_LIST, &ul->ul_flags); + gfs_unlinked_put(sdp, ul); + gfs_unlinked_put(sdp, ul); + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count), sdp,); + atomic_dec(&sdp->sd_unlinked_ic_count); + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_od_count), sdp,); + atomic_dec(&sdp->sd_unlinked_od_count); + + break; + } + + gfs_log_unlock(sdp); + + gfs_unlinked_put(sdp, ul); +} + +/** + * gfs_unlinked_cleanup - get rid of any extra struct gfs_unlinked structures + * @sdp: the filesystem + * + */ + +void +gfs_unlinked_cleanup(struct gfs_sbd *sdp) +{ + struct gfs_unlinked *ul; + + restart: + gfs_log_lock(sdp); + + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count) == + atomic_read(&sdp->sd_unlinked_od_count), sdp,); + + spin_lock(&sdp->sd_unlinked_lock); + + while (!list_empty(&sdp->sd_unlinked_list)) { + ul = list_entry(sdp->sd_unlinked_list.next, + struct gfs_unlinked, ul_list); + + if (ul->ul_count > 2) { + spin_unlock(&sdp->sd_unlinked_lock); + gfs_log_unlock(sdp); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto restart; + } + GFS_ASSERT_SBD(ul->ul_count == 2, sdp,); + + GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags) && + test_bit(ULF_OD_LIST, &ul->ul_flags) && + !test_bit(ULF_LOCK, &ul->ul_flags), sdp,); + + list_del(&ul->ul_list); + + atomic_dec(&sdp->sd_unlinked_ic_count); + atomic_dec(&sdp->sd_unlinked_od_count); + + spin_unlock(&sdp->sd_unlinked_lock); + kfree(ul); + spin_lock(&sdp->sd_unlinked_lock); + } + + spin_unlock(&sdp->sd_unlinked_lock); + + GFS_ASSERT_SBD(!atomic_read(&sdp->sd_unlinked_ic_count) && + !atomic_read(&sdp->sd_unlinked_od_count), sdp,); + + gfs_log_unlock(sdp); +} + +/** + * gfs_unlinked_limit - limit the number of inodes waiting to be deallocated + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure; + */ + +void +gfs_unlinked_limit(struct gfs_sbd *sdp) +{ + unsigned int tries = 0, min = 0; + int error; + + if (atomic_read(&sdp->sd_unlinked_ic_count) >= + sdp->sd_tune.gt_ilimit2) { + tries = sdp->sd_tune.gt_ilimit2_tries; + min = sdp->sd_tune.gt_ilimit2_min; + } else if (atomic_read(&sdp->sd_unlinked_ic_count) >= + sdp->sd_tune.gt_ilimit1) { + tries = sdp->sd_tune.gt_ilimit1_tries; + min = sdp->sd_tune.gt_ilimit1_min; + } + + while (tries--) { + struct gfs_unlinked *ul = unlinked_find(sdp); + if (!ul) + break; + + error = gfs_inode_dealloc(sdp, &ul->ul_inum); + + gfs_unlinked_unlock(sdp, ul); + + if (!error) { + if (!--min) + break; + } else if (error != 1) + break; + } +} + +/** + * gfs_unlinked_dealloc - Go through the list of inodes to be deallocated + * @sdp: the filesystem + * + * Returns: 0 on success, -EXXX on failure + */ + +void +gfs_unlinked_dealloc(struct gfs_sbd *sdp) +{ + unsigned int hits, strikes; + int error; + + for (;;) { + hits = 0; + strikes = 0; + + for (;;) { + struct gfs_unlinked *ul = unlinked_find(sdp); + if (!ul) + return; + + error = gfs_inode_dealloc(sdp, &ul->ul_inum); + + gfs_unlinked_unlock(sdp, ul); + + if (!error) { + hits++; + if (strikes) + strikes--; + } else if (error == 1) { + strikes++; + if (strikes >= atomic_read(&sdp->sd_unlinked_ic_count)) { + error = 0; + break; + } + } else + goto out; + } + + if (!hits || !test_bit(SDF_INODED_RUN, &sdp->sd_flags)) + break; + + cond_resched(); + } + + out: + if (error && error != -EROFS) + printk("GFS: fsid=%s: error deallocating inodes: %d\n", + sdp->sd_fsname, error); +} diff -urN linux-orig/fs/gfs/unlinked.h linux-patched/fs/gfs/unlinked.h --- linux-orig/fs/gfs/unlinked.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/unlinked.h 2004-06-30 13:27:49.360707187 -0500 @@ -0,0 +1,32 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __UNLINKED_DOT_H__ +#define __UNLINKED_DOT_H__ + +struct gfs_unlinked *gfs_unlinked_get(struct gfs_sbd *sdp, + struct gfs_inum *inum, int create); +void gfs_unlinked_hold(struct gfs_sbd *sdp, struct gfs_unlinked *ul); +void gfs_unlinked_put(struct gfs_sbd *sdp, struct gfs_unlinked *ul); + +void gfs_unlinked_lock(struct gfs_sbd *sdp, struct gfs_unlinked *ul); +void gfs_unlinked_unlock(struct gfs_sbd *sdp, struct gfs_unlinked *ul); + +void gfs_unlinked_merge(struct gfs_sbd *sdp, unsigned int type, + struct gfs_inum *inum); +void gfs_unlinked_cleanup(struct gfs_sbd *sdp); + +void gfs_unlinked_limit(struct gfs_sbd *sdp); +void gfs_unlinked_dealloc(struct gfs_sbd *sdp); + +#endif /* __UNLINKED_DOT_H__ */ diff -urN linux-orig/fs/gfs/util.c linux-patched/fs/gfs/util.c --- linux-orig/fs/gfs/util.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/util.c 2004-06-30 13:27:49.360707187 -0500 @@ -0,0 +1,324 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs.h" +#include "glock.h" + +uint32_t gfs_random_number; + +volatile int gfs_in_panic = FALSE; + +kmem_cache_t *gfs_glock_cachep = NULL; +kmem_cache_t *gfs_inode_cachep = NULL; +kmem_cache_t *gfs_bufdata_cachep = NULL; +kmem_cache_t *gfs_mhc_cachep = NULL; + +/** + * gfs_random - Generate a random 32-bit number + * + * Generate a semi-crappy 32-bit pseudo-random number without using + * floating point. + * + * The PRNG is from "Numerical Recipes in C" (second edition), page 284. + * + * Returns: a 32-bit random number + */ + +uint32_t +gfs_random(void) +{ + gfs_random_number = 0x0019660D * gfs_random_number + 0x3C6EF35F; + return gfs_random_number; +} + +/** + * hash_more_internal - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * @hash: the hash from a previous call + * + * Take some data and convert it to a 32-bit hash. + * + * This is the 32-bit FNV-1a hash from: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * Hash guts + * + * Returns: the hash + */ + +static __inline__ uint32_t +hash_more_internal(const void *data, unsigned int len, uint32_t hash) +{ + unsigned char *p = (unsigned char *)data; + unsigned char *e = p + len; + uint32_t h = hash; + + while (p < e) { + h ^= (uint32_t)(*p++); + h *= 0x01000193; + } + + return h; +} + +/** + * gfs_hash - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * + * Take some data and convert it to a 32-bit hash. + * + * This is the 32-bit FNV-1a hash from: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * Returns: the hash + */ + +uint32_t +gfs_hash(const void *data, unsigned int len) +{ + uint32_t h = 0x811C9DC5; + h = hash_more_internal(data, len, h); + return h; +} + +/** + * gfs_hash_more - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * @hash: the hash from a previous call + * + * Take some data and convert it to a 32-bit hash. + * + * This is the 32-bit FNV-1a hash from: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * This version let's you hash together discontinuous regions. + * For example, to compute the combined hash of the memory in + * (data1, len1), (data2, len2), and (data3, len3) you: + * + * h = gfs_hash(data1, len1); + * h = gfs_hash_more(data2, len2, h); + * h = gfs_hash_more(data3, len3, h); + * + * Returns: the hash + */ + +uint32_t +gfs_hash_more(const void *data, unsigned int len, uint32_t hash) +{ + uint32_t h; + h = hash_more_internal(data, len, hash); + return h; +} + +/* Byte-wise swap two items of size SIZE. */ + +#define SWAP(a, b, size) \ +do { \ + register size_t __size = (size); \ + register char *__a = (a), *__b = (b); \ + do { \ + char __tmp = *__a; \ + *__a++ = *__b; \ + *__b++ = __tmp; \ + } while (__size-- > 1); \ +} while (0) + +/** + * gfs_sort - Sort base array using shell sort algorithm + * @base: the input array + * @num_elem: number of elements in array + * @size: size of each element in array + * @compar: fxn to compare array elements (returns negative + * for lt, 0 for eq, and positive for gt + * + * Sorts the array passed in using the compar fxn to compare elements using + * the shell sort algorithm + */ + +void +gfs_sort(void *base, unsigned int num_elem, unsigned int size, + int (*compar) (const void *, const void *)) +{ + register char *pbase = (char *)base; + int i, j, k, h; + int cols[16] = {1391376, 463792, 198768, 86961, 33936, 13776, 4592, + 1968, 861, 336, 112, 48, 21, 7, 3, 1}; + + for (k = 0; k < 16; k++) { + h = cols[k]; + for (i = h; i < num_elem; i++) { + j = i; + while (j >= h && + (*compar)((void *)(pbase + size * (j - h)), + (void *)(pbase + size * j)) > 0) { + SWAP(pbase + size * j, + pbase + size * (j - h), + size); + j = j - h; + } + } + } +} + +/** + * bitch_about - + * @sdp: the filesystem + * @last: the last time we bitched + * @about: + * + */ + +void +bitch_about(struct gfs_sbd *sdp, unsigned long *last, char *about) +{ + if (time_after_eq(jiffies, *last + sdp->sd_tune.gt_complain_secs * HZ)) { + printk("GFS: fsid=%s: %s by program \"%s\"\n", + sdp->sd_fsname, about, current->comm); + *last = jiffies; + } +} + +/** + * gfs_assert_i - Stop the machine + * @assertion: the assertion that failed + * @file: the file that called us + * @line: the line number of the file that called us + * + * Don't do ENTER() and EXIT() here. + * + */ + +void +gfs_assert_i(char *assertion, + unsigned int type, void *ptr, + char *file, unsigned int line) +{ + gfs_in_panic = TRUE; + + printk("\nGFS: Assertion failed on line %d of file %s\n" + "GFS: assertion: \"%s\"\n" + "GFS: time = %lu\n", + line, file, assertion, get_seconds()); + + switch (type) { + case GFS_ASSERT_TYPE_SBD: + { + struct gfs_sbd *sdp = (struct gfs_sbd *)ptr; + printk("GFS: fsid=%s\n", sdp->sd_fsname); + } + break; + + case GFS_ASSERT_TYPE_GLOCK: + { + struct gfs_glock *gl = (struct gfs_glock *)ptr; + struct gfs_sbd *sdp = gl->gl_sbd; + printk("GFS: fsid=%s: glock = (%u, %"PRIu64")\n", + sdp->sd_fsname, + gl->gl_name.ln_type, + gl->gl_name.ln_number); + } + break; + + case GFS_ASSERT_TYPE_INODE: + { + struct gfs_inode *ip = (struct gfs_inode *)ptr; + struct gfs_sbd *sdp = ip->i_sbd; + printk("GFS: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n", + sdp->sd_fsname, + ip->i_num.no_formal_ino, ip->i_num.no_addr); + } + break; + + case GFS_ASSERT_TYPE_RGRPD: + { + struct gfs_rgrpd *rgd = (struct gfs_rgrpd *)ptr; + struct gfs_sbd *sdp = rgd->rd_sbd; + printk("GFS: fsid=%s: rgroup = %"PRIu64"\n", + sdp->sd_fsname, rgd->rd_ri.ri_addr); + } + break; + } + + printk("\n"); +#if 0 + printk("GFS: Record message above and reboot.\n"); + BUG(); +#endif + panic("GFS: Record message above and reboot.\n"); +} + +/** + * gfs_io_errori - handle an I/O error + * @sdp: the filesystem + * @bh: the buffer the error happened on (can be NULL) + * + * This will do something other than panic, eventually. + * + */ + +void gfs_io_error_i(struct gfs_sbd *sdp, + unsigned int type, void *ptr, + char *file, unsigned int line) +{ + switch (type) { + case GFS_IO_ERROR_TYPE_BH: + { + struct buffer_head *bh = (struct buffer_head *)ptr; + printk("GFS: fsid=%s: I/O error on block %"PRIu64"\n", + sdp->sd_fsname, (uint64_t)bh->b_blocknr); + } + break; + + case GFS_IO_ERROR_TYPE_INODE: + { + struct gfs_inode *ip = (struct gfs_inode *)ptr; + printk("GFS: fsid=%s: I/O error in inode %"PRIu64"/%"PRIu64"\n", + sdp->sd_fsname, + ip->i_num.no_formal_ino, ip->i_num.no_addr); + } + break; + + default: + printk("GFS: fsid=%s: I/O error\n", sdp->sd_fsname); + break; + } + + GFS_ASSERT_SBD(FALSE, sdp,); +} + +/** + * gmalloc - malloc a small amount of memory + * @size: the number of bytes to malloc + * + * Returns: the memory + */ + +void * +gmalloc(unsigned int size) +{ + void *p; + RETRY_MALLOC(p = kmalloc(size, GFP_KERNEL), p); + return p; +} + diff -urN linux-orig/fs/gfs/util.h linux-patched/fs/gfs/util.h --- linux-orig/fs/gfs/util.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs/util.h 2004-06-30 13:27:49.360707187 -0500 @@ -0,0 +1,156 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __UTIL_DOT_H__ +#define __UTIL_DOT_H__ + + +/* Utility functions */ + +extern uint32_t gfs_random_number; +uint32_t gfs_random(void); + +uint32_t gfs_hash(const void *data, unsigned int len); +uint32_t gfs_hash_more(const void *data, unsigned int len, uint32_t hash); + +void gfs_sort(void *base, unsigned int num_elem, unsigned int size, + int (*compar) (const void *, const void *)); + +void bitch_about(struct gfs_sbd *sdp, unsigned long *last, char *about); + + + +/* Assertion stuff */ + +#define GFS_ASSERT_TYPE_NONE (18) +#define GFS_ASSERT_TYPE_SBD (19) +#define GFS_ASSERT_TYPE_GLOCK (20) +#define GFS_ASSERT_TYPE_INODE (21) +#define GFS_ASSERT_TYPE_RGRPD (22) + +#define GFS_ASSERT(x, todo) \ +do \ +{ \ + if (!(x)) \ + { \ + {todo} \ + gfs_assert_i(#x, GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__); \ + } \ +} \ +while (0) + +#define GFS_ASSERT_SBD(x, sdp, todo) \ +do \ +{ \ + if (!(x)) \ + { \ + struct gfs_sbd *gfs_assert_sbd = (sdp); \ + {todo} \ + gfs_assert_i(#x, GFS_ASSERT_TYPE_SBD, gfs_assert_sbd, __FILE__, __LINE__); \ + } \ +} \ +while (0) + +#define GFS_ASSERT_GLOCK(x, gl, todo) \ +do \ +{ \ + if (!(x)) \ + { \ + struct gfs_glock *gfs_assert_glock = (gl); \ + {todo} \ + gfs_assert_i(#x, GFS_ASSERT_TYPE_GLOCK, gfs_assert_glock, __FILE__, __LINE__); \ + } \ +} \ +while (0) + +#define GFS_ASSERT_INODE(x, ip, todo) \ +do \ +{ \ + if (!(x)) \ + { \ + struct gfs_inode *gfs_assert_inode = (ip); \ + {todo} \ + gfs_assert_i(#x, GFS_ASSERT_TYPE_INODE, gfs_assert_inode, __FILE__, __LINE__); \ + } \ +} \ +while (0) + +#define GFS_ASSERT_RGRPD(x, rgd, todo) \ +do \ +{ \ + if (!(x)) \ + { \ + struct gfs_rgrpd *gfs_assert_rgrpd = (rgd); \ + {todo} \ + gfs_assert_i(#x, GFS_ASSERT_TYPE_RGRPD, gfs_assert_rgrpd, __FILE__, __LINE__); \ + } \ +} \ +while (0) + +extern volatile int gfs_in_panic; +void gfs_assert_i(char *assertion, + unsigned int type, void *ptr, + char *file, unsigned int line) __attribute__ ((noreturn)); + + +/* I/O error stuff */ + +#define GFS_IO_ERROR_TYPE_NONE (118) +#define GFS_IO_ERROR_TYPE_BH (119) +#define GFS_IO_ERROR_TYPE_INODE (120) + +#define gfs_io_error(sdp) \ +gfs_io_error_i((sdp), GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__); + +#define gfs_io_error_bh(sdp, bh) \ +do \ +{ \ + struct buffer_head *gfs_io_error_bh = (bh); \ + gfs_io_error_i((sdp), GFS_IO_ERROR_TYPE_BH, gfs_io_error_bh, __FILE__, __LINE__); \ +} \ +while (0) + +#define gfs_io_error_inode(ip) \ +do \ +{ \ + struct gfs_inode *gfs_io_error_inode = (ip); \ + gfs_io_error_i((ip)->i_sbd, GFS_IO_ERROR_TYPE_INODE, gfs_io_error_inode, __FILE__, __LINE__); \ +} \ +while (0) + +void gfs_io_error_i(struct gfs_sbd *sdp, + unsigned int type, void *ptr, + char *file, unsigned int line); + + +/* Memory stuff */ + +#define RETRY_MALLOC(do_this, until_this) \ +for (;;) \ +{ \ + do { do_this; } while (0); \ + if (until_this) \ + break; \ + printk("GFS: out of memory: %s, %u\n", __FILE__, __LINE__); \ + yield();\ +} + +extern kmem_cache_t *gfs_glock_cachep; +extern kmem_cache_t *gfs_inode_cachep; +extern kmem_cache_t *gfs_bufdata_cachep; +extern kmem_cache_t *gfs_mhc_cachep; + +void *gmalloc(unsigned int size); + + +#endif /* __UTIL_DOT_H__ */ diff -urN linux-orig/include/linux/gfs_ioctl.h linux-patched/include/linux/gfs_ioctl.h --- linux-orig/include/linux/gfs_ioctl.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/include/linux/gfs_ioctl.h 2004-06-30 13:27:49.340711826 -0500 @@ -0,0 +1,218 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __GFS_IOCTL_DOT_H__ +#define __GFS_IOCTL_DOT_H__ + +#define GFS_IOCTL_VERSION (0) + +#define _GFSC_(x) (('G' << 8) | (x)) + +/* + Ioctls implemented + + Reserved Ioctls: 3, 7, 8, 9, 10, 4, 13 + Next Ioctl: 44 + */ + +#define GFS_STACK_PRINT _GFSC_(40) + +#define GFS_GET_META _GFSC_(31) +#define GFS_FILE_STAT _GFSC_(30) + +#define GFS_SHRINK _GFSC_(5) + +#define GFS_GET_ARGS _GFSC_(29) +#define GFS_GET_LOCKSTRUCT _GFSC_(39) +#define GFS_GET_SUPER _GFSC_(19) +#define GFS_JREAD _GFSC_(23) +#define GFS_JWRITE _GFSC_(24) +#define GFS_JSTAT _GFSC_(20) +#define GFS_JTRUNC _GFSC_(33) + +#define GFS_LOCK_DUMP _GFSC_(11) + +#define GFS_STATGFS _GFSC_(12) + +#define GFS_FREEZE _GFSC_(14) +#define GFS_UNFREEZE _GFSC_(15) + +#define GFS_RECLAIM_METADATA _GFSC_(16) + +#define GFS_QUOTA_SYNC _GFSC_(17) +#define GFS_QUOTA_REFRESH _GFSC_(18) +#define GFS_QUOTA_READ _GFSC_(32) + +#define GFS_GET_TUNE _GFSC_(21) +#define GFS_SET_TUNE _GFSC_(22) + +#define GFS_EATTR_GET _GFSC_(26) +#define GFS_EATTR_SET _GFSC_(27) + +#define GFS_WHERE_ARE_YOU _GFSC_(35) + +#define GFS_SET_FLAG _GFSC_(36) +#define GFS_CLEAR_FLAG _GFSC_(37) + +#define GFS_GET_COUNTERS _GFSC_(43) + +#define GFS_FILE_FLUSH _GFSC_(42) + +struct gfs_user_buffer { + char *ub_data; + unsigned int ub_size; + unsigned int ub_count; +}; + +/* Structure for jread/jwrite */ + +#define GFS_HIDDEN_JINDEX (0x10342345) +#define GFS_HIDDEN_RINDEX (0x10342346) +#define GFS_HIDDEN_QUOTA (0x10342347) +#define GFS_HIDDEN_LICENSE (0x10342348) + +struct gfs_jio { + unsigned int jio_file; + + uint32_t jio_size; + uint64_t jio_offset; + char *jio_data; + + uint32_t jio_count; +}; + +/* Structure for better GFS-specific df */ + +struct gfs_usage { + unsigned int gu_block_size; + uint64_t gu_total_blocks; + uint64_t gu_free; + uint64_t gu_used_dinode; + uint64_t gu_free_dinode; + uint64_t gu_used_meta; + uint64_t gu_free_meta; +}; + +struct gfs_reclaim_stats { + uint64_t rc_inodes; + uint64_t rc_metadata; +}; + +struct gfs_quota_name { + int qn_user; + uint32_t qn_id; +}; + +/* + * You can tune a filesystem, but you can't tune a yak. + */ + +#define GFS_TUNE_VERSION ((GFS_IOCTL_VERSION << 16) | (138)) + +struct gfs_tune { + unsigned int gt_tune_version; + + unsigned int gt_ilimit1; + unsigned int gt_ilimit1_tries; + unsigned int gt_ilimit1_min; + unsigned int gt_ilimit2; + unsigned int gt_ilimit2_tries; + unsigned int gt_ilimit2_min; + unsigned int gt_demote_secs; + unsigned int gt_incore_log_blocks; + unsigned int gt_jindex_refresh_secs; + unsigned int gt_depend_secs; + unsigned int gt_scand_secs; + unsigned int gt_recoverd_secs; + unsigned int gt_logd_secs; + unsigned int gt_quotad_secs; + unsigned int gt_inoded_secs; + unsigned int gt_quota_simul_sync; + unsigned int gt_quota_warn_period; + unsigned int gt_atime_quantum; + unsigned int gt_quota_quantum; + unsigned int gt_quota_scale_num; + unsigned int gt_quota_scale_den; + unsigned int gt_quota_enforce; + unsigned int gt_quota_account; + unsigned int gt_new_files_jdata; + unsigned int gt_new_files_directio; + unsigned int gt_max_atomic_write; + unsigned int gt_max_readahead; + unsigned int gt_lockdump_size; + unsigned int gt_stall_secs; + unsigned int gt_complain_secs; + unsigned int gt_reclaim_limit; + unsigned int gt_entries_per_readdir; + unsigned int gt_prefetch_secs; + unsigned int gt_statfs_slots; + unsigned int gt_max_mhc; +}; + +/* + * Extended Attribute Ioctl structures + * + * Note: The name_len does not include a null character. + * + * Getting and setting EAs return the following errors that aren't + * what they seem + * + * ENODATA - No such extended attribute + * ERANGE - Extended attribute data is too large for the buffer + * ENOSPC - No space left for extended attributes + * EEXIST - Extended attribute already exists + */ + +#define GFS_EACMD_SET (0) +#define GFS_EACMD_CREATE (1) +#define GFS_EACMD_REPLACE (2) +#define GFS_EACMD_REMOVE (3) + +struct gfs_eaget_io { + char *eg_data; + char *eg_name; + char *eg_len; + uint32_t eg_data_len; + uint8_t eg_name_len; + uint8_t eg_type; /* GFS_EATYPE_... */ +}; + +struct gfs_easet_io { + const char *es_data; + char *es_name; + uint16_t es_data_len; + uint8_t es_name_len; /* not counting the NULL */ + uint8_t es_cmd; /* GFS_EACMD_... */ + uint8_t es_type; /* GFS_EATYPE_... */ +}; + +#define GFS_GLOCKD_DEFAULT (1) +#define GFS_GLOCKD_MAX (32) + +struct gfs_args { + char ar_lockproto[256]; /* The name of the Lock Protocol */ + char ar_locktable[256]; /* The name of the Lock Table */ + char ar_hostdata[256]; /* The host specific data */ + + int ar_ignore_local_fs; /* Ignore the local_fs field in the struct lm_lockops */ + int ar_localflocks; /* let the VFS do flock|fcntl locks for us */ + int ar_localcaching; /* Local-style caching (dangerous on mulithost) */ + + int ar_upgrade; /* Upgrade ondisk/multihost format */ + + unsigned int ar_num_glockd; + + int ar_posixacls; /* Enable posix acls */ +}; + +#endif /* ___GFS_IOCTL_DOT_H__ */ diff -urN linux-orig/include/linux/gfs_ondisk.h linux-patched/include/linux/gfs_ondisk.h --- linux-orig/include/linux/gfs_ondisk.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/include/linux/gfs_ondisk.h 2004-06-30 13:27:49.341711594 -0500 @@ -0,0 +1,1720 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* +* NOTE: +* If you add 8 byte fields to these structures, they must be 8 byte +* aligned. 4 byte field must be 4 byte aligned, etc... +* +* All structures must be a multiple of 8 bytes long. +* +* GRIPES: +* We should have forgetten about supporting 512B FS block sizes +* and made the di_reserved field in the struct gfs_dinode structure +* much bigger. +* +* de_rec_len in struct gfs_dirent should really have been a 32-bit value +* as it now limits us to a 64k FS block size (with the current code +* in dir.c). +*/ + +#ifndef __GFS_ONDISK_DOT_H__ +#define __GFS_ONDISK_DOT_H__ + +#define GFS_MAGIC (0x01161970) +#define GFS_BASIC_BLOCK (512) +#define GFS_BASIC_BLOCK_SHIFT (9) +#define GFS_DUMPS_PER_LOG (4) + +/* Lock numbers of the LM_TYPE_NONDISK type */ + +#define GFS_MOUNT_LOCK (0) +#define GFS_LIVE_LOCK (1) +#define GFS_TRANS_LOCK (2) +#define GFS_RENAME_LOCK (3) + +/* Format numbers for various metadata types */ + +#define GFS_FORMAT_SB (100) +#define GFS_FORMAT_RG (200) +#define GFS_FORMAT_RB (300) +#define GFS_FORMAT_DI (400) +#define GFS_FORMAT_IN (500) +#define GFS_FORMAT_LF (600) +#define GFS_FORMAT_JD (700) +#define GFS_FORMAT_LH (800) +#define GFS_FORMAT_LD (900) +/* These don't have actual struct gfs_meta_header structures to go with them */ +#define GFS_FORMAT_JI (1000) +#define GFS_FORMAT_RI (1100) +#define GFS_FORMAT_DE (1200) +#define GFS_FORMAT_QU (1500) +#define GFS_FORMAT_EA (1600) +/* These are part of the superblock */ +#define GFS_FORMAT_FS (1309) +#define GFS_FORMAT_MULTI (1401) + +/* + * An on-disk inode number + */ + +#define gfs_inum_equal(ino1, ino2) \ +(((ino1)->no_formal_ino == (ino2)->no_formal_ino) && \ + ((ino1)->no_addr == (ino2)->no_addr)) + +struct gfs_inum { + uint64_t no_formal_ino; + uint64_t no_addr; +}; + +/* + * Generic metadata head structure + * + * Every inplace buffer logged in the journal must start with this. + */ + +#define GFS_METATYPE_NONE (0) +#define GFS_METATYPE_SB (1) +#define GFS_METATYPE_RG (2) +#define GFS_METATYPE_RB (3) +#define GFS_METATYPE_DI (4) +#define GFS_METATYPE_IN (5) +#define GFS_METATYPE_LF (6) +#define GFS_METATYPE_JD (7) +#define GFS_METATYPE_LH (8) +#define GFS_METATYPE_LD (9) +#define GFS_METATYPE_EA (10) + +#define GFS_META_CLUMP (64) + +struct gfs_meta_header { + uint32_t mh_magic; /* Magic number */ + uint32_t mh_type; /* GFS_METATYPE_XX */ + uint64_t mh_generation; /* Generation number */ + uint32_t mh_format; /* GFS_FORMAT_XX */ + uint32_t mh_incarn; +}; + +/* + * super-block structure + * + * It's probably good if SIZEOF_SB <= GFS_BASIC_BLOCK + */ + +/* Address of SuperBlock in GFS basic blocks */ +#define GFS_SB_ADDR (128) +/* The lock number for the superblock (must be zero) */ +#define GFS_SB_LOCK (0) +#define GFS_CRAP_LOCK (1) + +/* Requirement: GFS_LOCKNAME_LEN % 8 == 0 + Includes: the fencing zero at the end */ +#define GFS_LOCKNAME_LEN (64) + +struct gfs_sb { + /* Order is important */ + struct gfs_meta_header sb_header; + + uint32_t sb_fs_format; + uint32_t sb_multihost_format; + uint32_t sb_flags; + + /* Important information */ + uint32_t sb_bsize; /* fundamental fs block size in bytes */ + uint32_t sb_bsize_shift; /* log2(sb_bsize) */ + uint32_t sb_seg_size; /* Journal segment size in FS blocks */ + + struct gfs_inum sb_jindex_di; /* journal index inode number (GFS_SB_LOCK) */ + struct gfs_inum sb_rindex_di; /* resource index inode number (GFS_SB_LOCK) */ + struct gfs_inum sb_root_di; /* root directory inode number (GFS_ROOT_LOCK) */ + + char sb_lockproto[GFS_LOCKNAME_LEN]; /* Type of locking this FS uses */ + char sb_locktable[GFS_LOCKNAME_LEN]; /* Name of lock table for this FS */ + + struct gfs_inum sb_quota_di; + struct gfs_inum sb_license_di; + + char sb_reserved[96]; +}; + +/* + * journal index structure + */ + +struct gfs_jindex { + uint64_t ji_addr; /* starting block of the journal */ + uint32_t ji_nsegment; /* number of segments in journal */ + uint32_t ji_pad; + + char ji_reserved[64]; +}; + +/* + * resource index structure + */ + +struct gfs_rindex { + uint64_t ri_addr; /* rgrp block disk address */ + uint32_t ri_length; /* length of rgrp header in fs blocks */ + uint32_t ri_pad; + + uint64_t ri_data1; /* first data location */ + uint32_t ri_data; /* num of data blocks in rgrp */ + + uint32_t ri_bitbytes; /* number of bytes in data bitmaps */ + + char ri_reserved[64]; +}; + +/* + * resource group header structure + * + */ + +/* Number of blocks per byte in rgrp */ +#define GFS_NBBY (4) +#define GFS_BIT_SIZE (2) +#define GFS_BIT_MASK (0x00000003) + +#define GFS_BLKST_FREE (0) +#define GFS_BLKST_USED (1) +#define GFS_BLKST_FREEMETA (2) +#define GFS_BLKST_USEDMETA (3) + +struct gfs_rgrp { + struct gfs_meta_header rg_header; + + uint32_t rg_flags; /* flags */ + + uint32_t rg_free; /* number of free data blocks */ + + uint32_t rg_useddi; /* number of dinodes */ + uint32_t rg_freedi; /* number of unused dinodes */ + struct gfs_inum rg_freedi_list; /* list of free dinodes */ + + uint32_t rg_usedmeta; /* number of used metadata blocks (not including dinodes) */ + uint32_t rg_freemeta; /* number of unused metadata blocks */ + + char rg_reserved[64]; +}; + +/* + * Quota Structures + */ + +struct gfs_quota { + uint64_t qu_limit; + uint64_t qu_warn; + int64_t qu_value; + + char qu_reserved[64]; +}; + +/* + * dinode structure + */ + +#define GFS_MAX_META_HEIGHT (10) +#define GFS_DIR_MAX_DEPTH (17) + +/* Dinode types */ +#define GFS_FILE_NON (0) +#define GFS_FILE_REG (1) +#define GFS_FILE_DIR (2) +#define GFS_FILE_LNK (5) +#define GFS_FILE_BLK (7) +#define GFS_FILE_CHR (8) +#define GFS_FILE_FIFO (101) +#define GFS_FILE_SOCK (102) + +/* Dinode flags */ +#define GFS_DIF_JDATA (0x00000001) +#define GFS_DIF_EXHASH (0x00000002) +#define GFS_DIF_UNUSED (0x00000004) +#define GFS_DIF_EA_INDIRECT (0x00000008) +#define GFS_DIF_DIRECTIO (0x00000010) +#define GFS_DIF_IMMUTABLE (0x00000020) +#define GFS_DIF_APPENDONLY (0x00000040) +#define GFS_DIF_NOATIME (0x00000080) +#define GFS_DIF_SYNC (0x00000100) +#define GFS_DIF_INHERIT_DIRECTIO (0x40000000) +#define GFS_DIF_INHERIT_JDATA (0x80000000) + +struct gfs_dinode { + struct gfs_meta_header di_header; + + struct gfs_inum di_num; + + uint32_t di_mode; /* mode of file */ + uint32_t di_uid; /* owner's user id */ + uint32_t di_gid; /* owner's group id */ + uint32_t di_nlink; /* number of links to this file */ + uint64_t di_size; /* number of bytes in file */ + uint64_t di_blocks; /* number of blocks in file */ + int64_t di_atime; /* time last accessed */ + int64_t di_mtime; /* time last modified */ + int64_t di_ctime; /* time last changed */ + uint32_t di_major; /* device major number */ + uint32_t di_minor; /* device minor number */ + + uint64_t di_rgrp; /* dinode rgrp block number */ + uint64_t di_goal_rgrp; /* rgrp to alloc from next */ + uint32_t di_goal_dblk; /* data block goal */ + uint32_t di_goal_mblk; /* metadata block goal */ + uint32_t di_flags; /* flags */ + uint32_t di_payload_format; /* struct gfs_rindex, struct gfs_jindex, or struct gfs_dirent */ + uint16_t di_type; /* type of file */ + uint16_t di_height; /* height of metadata */ + uint32_t di_incarn; /* incarnation number */ + uint16_t di_pad; + + /* These only apply to directories */ + uint16_t di_depth; /* Number of bits in the table */ + uint32_t di_entries; /* The number of entries in the directory */ + + /* This only applies to unused inodes */ + struct gfs_inum di_next_unused; + + uint64_t di_eattr; /* extended attribute block number */ + + char di_reserved[56]; +}; + +/* + * indirect block header + */ + +struct gfs_indirect { + struct gfs_meta_header in_header; + + char in_reserved[64]; +}; + +/* + * directory structure - many of these per directory file + */ + +#define GFS_FNAMESIZE (255) +#define GFS_DIRENT_SIZE(name_len) ((sizeof(struct gfs_dirent) + (name_len) + 7) & ~7) + +struct gfs_dirent { + struct gfs_inum de_inum; /* Inode number */ + uint32_t de_hash; /* hash of the filename */ + uint16_t de_rec_len; /* the length of the dirent */ + uint16_t de_name_len; /* the length of the name */ + uint16_t de_type; /* type of dinode this points to */ + + char de_reserved[14]; +}; + +/* + * Header of leaf directory nodes + */ + +struct gfs_leaf { + struct gfs_meta_header lf_header; + + uint16_t lf_depth; /* Depth of leaf */ + uint16_t lf_entries; /* Number of dirents in leaf */ + uint32_t lf_dirent_format; /* Format of the dirents */ + uint64_t lf_next; /* Next leaf, if overflow */ + + char lf_reserved[64]; +}; + +/* + * Log header structure + */ + +#define GFS_LOG_HEAD_UNMOUNT (0x00000001) + +struct gfs_log_header { + struct gfs_meta_header lh_header; + + uint32_t lh_flags; /* Flags */ + uint32_t lh_pad; + + uint64_t lh_first; /* Block number of first header in this trans */ + uint64_t lh_sequence; /* Sequence number of this transaction */ + + uint64_t lh_tail; /* Block number of log tail */ + uint64_t lh_last_dump; /* block number of last dump */ + + char lh_reserved[64]; +}; + +/* + * Log type descriptor + */ + +#define GFS_LOG_DESC_METADATA (300) +/* ld_data1 is the number of metadata blocks in the descriptor. + ld_data2 is unused. + */ + +#define GFS_LOG_DESC_IUL (400) +/* ld_data1 is TRUE if this is a dump. + ld_data2 is unused. + FixMe!!! ld_data1 should be the number of entries. + ld_data2 should be "TRUE if this is a dump". + */ + +#define GFS_LOG_DESC_IDA (401) +/* ld_data1 is unused. + ld_data2 is unused. + FixMe!!! ld_data1 should be the number of entries. + */ + +#define GFS_LOG_DESC_Q (402) +/* ld_data1 is the number of quota changes in the descriptor. + ld_data2 is TRUE if this is a dump. + */ + +#define GFS_LOG_DESC_LAST (500) +/* ld_data1 is unused. + ld_data2 is unused. + */ + +struct gfs_log_descriptor { + struct gfs_meta_header ld_header; + + uint32_t ld_type; /* Type of data in this log chunk */ + uint32_t ld_length; /* Number of buffers in this chunk */ + uint32_t ld_data1; /* descriptor specific field */ + uint32_t ld_data2; /* descriptor specific field */ + + char ld_reserved[64]; +}; + +/* + * Metadata block tags + */ + +struct gfs_block_tag { + uint64_t bt_blkno; /* inplace block number */ + uint32_t bt_flags; /* flags */ + uint32_t bt_pad; +}; + +/* + * Quota Journal Tag + */ + +#define GFS_QTF_USER (0x00000001) + +struct gfs_quota_tag { + int64_t qt_change; + uint32_t qt_flags; + uint32_t qt_id; +}; + +/* + * Extended attribute header format + */ + +#define GFS_EA_MAX_NAME_LEN (255) +#define GFS_EA_MAX_DATA_LEN (65535) + +#define GFS_EATYPE_LAST (2) + +#define GFS_EATYPE_UNUSED (0) +#define GFS_EATYPE_USR (1) +#define GFS_EATYPE_SYS (2) +#define GFS_EATYPE_VALID(x) ((x) && (x) <= GFS_EATYPE_LAST) /* this is only + for requests */ + +#define GFS_EAFLAG_LAST (0x01) /* last ea in block */ + +struct gfs_ea_header { + uint32_t ea_rec_len; + uint32_t ea_data_len; + uint8_t ea_name_len; /* no NULL pointer after the string */ + uint8_t ea_type; /* GFS_EATYPE_... */ + uint8_t ea_flags; + uint8_t ea_num_ptrs; + uint32_t ea_pad; +}; + +/* Endian functions */ + +#define GFS_ENDIAN_BIG + +#ifdef GFS_ENDIAN_BIG + +#define gfs16_to_cpu be16_to_cpu +#define gfs32_to_cpu be32_to_cpu +#define gfs64_to_cpu be64_to_cpu + +#define cpu_to_gfs16 cpu_to_be16 +#define cpu_to_gfs32 cpu_to_be32 +#define cpu_to_gfs64 cpu_to_be64 + +#else /* GFS_ENDIAN_BIG */ + +#define gfs16_to_cpu le16_to_cpu +#define gfs32_to_cpu le32_to_cpu +#define gfs64_to_cpu le64_to_cpu + +#define cpu_to_gfs16 cpu_to_le16 +#define cpu_to_gfs32 cpu_to_le32 +#define cpu_to_gfs64 cpu_to_le64 + +#endif /* GFS_ENDIAN_BIG */ + +/* Translation functions */ + +void gfs_inum_in(struct gfs_inum *no, char *buf); +void gfs_inum_out(struct gfs_inum *no, char *buf); +void gfs_meta_header_in(struct gfs_meta_header *mh, char *buf); +void gfs_meta_header_out(struct gfs_meta_header *mh, char *buf); +void gfs_sb_in(struct gfs_sb *sb, char *buf); +void gfs_sb_out(struct gfs_sb *sb, char *buf); +void gfs_jindex_in(struct gfs_jindex *jindex, char *buf); +void gfs_jindex_out(struct gfs_jindex *jindex, char *buf); +void gfs_rindex_in(struct gfs_rindex *rindex, char *buf); +void gfs_rindex_out(struct gfs_rindex *rindex, char *buf); +void gfs_rgrp_in(struct gfs_rgrp *rgrp, char *buf); +void gfs_rgrp_out(struct gfs_rgrp *rgrp, char *buf); +void gfs_quota_in(struct gfs_quota *quota, char *buf); +void gfs_quota_out(struct gfs_quota *quota, char *buf); +void gfs_dinode_in(struct gfs_dinode *dinode, char *buf); +void gfs_dinode_out(struct gfs_dinode *dinode, char *buf); +void gfs_indirect_in(struct gfs_indirect *indirect, char *buf); +void gfs_indirect_out(struct gfs_indirect *indirect, char *buf); +void gfs_dirent_in(struct gfs_dirent *dirent, char *buf); +void gfs_dirent_out(struct gfs_dirent *dirent, char *buf); +void gfs_leaf_in(struct gfs_leaf *leaf, char *buf); +void gfs_leaf_out(struct gfs_leaf *leaf, char *buf); +void gfs_log_header_in(struct gfs_log_header *head, char *buf); +void gfs_log_header_out(struct gfs_log_header *head, char *buf); +void gfs_desc_in(struct gfs_log_descriptor *desc, char *buf); +void gfs_desc_out(struct gfs_log_descriptor *desc, char *buf); +void gfs_block_tag_in(struct gfs_block_tag *btag, char *buf); +void gfs_block_tag_out(struct gfs_block_tag *btag, char *buf); +void gfs_quota_tag_in(struct gfs_quota_tag *qtag, char *buf); +void gfs_quota_tag_out(struct gfs_quota_tag *qtag, char *buf); +void gfs_ea_header_in(struct gfs_ea_header *qtag, char *buf); +void gfs_ea_header_out(struct gfs_ea_header *qtag, char *buf); + +/* Printing functions */ + +void gfs_inum_print(struct gfs_inum *no); +void gfs_meta_header_print(struct gfs_meta_header *mh); +void gfs_sb_print(struct gfs_sb *sb); +void gfs_jindex_print(struct gfs_jindex *jindex); +void gfs_rindex_print(struct gfs_rindex *rindex); +void gfs_rgrp_print(struct gfs_rgrp *rgrp); +void gfs_quota_print(struct gfs_quota *quota); +void gfs_dinode_print(struct gfs_dinode *dinode); +void gfs_indirect_print(struct gfs_indirect *indirect); +void gfs_dirent_print(struct gfs_dirent *dirent, char *name); +void gfs_leaf_print(struct gfs_leaf *leaf); +void gfs_log_header_print(struct gfs_log_header *head); +void gfs_desc_print(struct gfs_log_descriptor *desc); +void gfs_block_tag_print(struct gfs_block_tag *tag); +void gfs_quota_tag_print(struct gfs_quota_tag *tag); +void gfs_ea_header_print(struct gfs_ea_header *tag); + +/* The hash function for ExHash directories */ + +uint32_t gfs_dir_hash(const char *data, int len); + +#endif /* __GFS_ONDISK_DOT_H__ */ + + + +#ifdef WANT_GFS_CONVERSION_FUNCTIONS + +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));} +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));} +#define CPIN_16(s1, s2, member) {(s1->member) = gfs16_to_cpu((s2->member));} +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs16((s1->member));} +#define CPIN_32(s1, s2, member) {(s1->member) = gfs32_to_cpu((s2->member));} +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs32((s1->member));} +#define CPIN_64(s1, s2, member) {(s1->member) = gfs64_to_cpu((s2->member));} +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs64((s1->member));} + +#define pa(struct, member, count) print_array(#member, struct->member, count); + +/** + * print_array - Print out an array of bytes + * @title: what to print before the array + * @buf: the array + * @count: the number of bytes + * + */ + +static void +print_array(char *title, char *buf, int count) +{ + int x; + + printk(" %s =\n", title); + for (x = 0; x < count; x++) { + printk("%.2X ", (unsigned char)buf[x]); + if (x % 16 == 15) + printk("\n"); + } + if (x % 16) + printk("\n"); +} + +/** + * gfs_inum_in - Read in an inode number + * @no: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_inum_in(struct gfs_inum *no, char *buf) +{ + struct gfs_inum *str = (struct gfs_inum *)buf; + + CPIN_64(no, str, no_formal_ino); + CPIN_64(no, str, no_addr); +} + +/** + * gfs_inum_out - Write out an inode number + * @no: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_inum_out(struct gfs_inum *no, char *buf) +{ + struct gfs_inum *str = (struct gfs_inum *)buf; + + CPOUT_64(no, str, no_formal_ino); + CPOUT_64(no, str, no_addr); +} + +/** + * gfs_inum_print - Print out a inode number + * @no: the cpu-order buffer + * + */ + +void +gfs_inum_print(struct gfs_inum *no) +{ + pv(no, no_formal_ino, "%"PRIu64); + pv(no, no_addr, "%"PRIu64); +} + +/** + * gfs_meta_header_in - Read in a metadata header + * @mh: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_meta_header_in(struct gfs_meta_header *mh, char *buf) +{ + struct gfs_meta_header *str = (struct gfs_meta_header *)buf; + + CPIN_32(mh, str, mh_magic); + CPIN_32(mh, str, mh_type); + CPIN_64(mh, str, mh_generation); + CPIN_32(mh, str, mh_format); + CPIN_32(mh, str, mh_incarn); +} + +/** + * gfs_meta_header_in - Write out a metadata header + * @mh: the cpu-order structure + * @buf: the disk-order buffer + * + * Don't ever change the generation number in this routine. + * It's done manually in increment_generation(). + */ + +void +gfs_meta_header_out(struct gfs_meta_header *mh, char *buf) +{ + struct gfs_meta_header *str = (struct gfs_meta_header *)buf; + + CPOUT_32(mh, str, mh_magic); + CPOUT_32(mh, str, mh_type); +#if 0 + /* Don't do this! + Mh_generation should only be change manually. */ + CPOUT_64(mh, str, mh_generation); +#endif + CPOUT_32(mh, str, mh_format); + CPOUT_32(mh, str, mh_incarn); +} + +/** + * gfs_meta_header_print - Print out a metadata header + * @mh: the cpu-order buffer + * + */ + +void +gfs_meta_header_print(struct gfs_meta_header *mh) +{ + pv(mh, mh_magic, "0x%.8X"); + pv(mh, mh_type, "%u"); + pv(mh, mh_generation, "%"PRIu64); + pv(mh, mh_format, "%u"); + pv(mh, mh_incarn, "%u"); +} + +/** + * gfs_sb_in - Read in a superblock + * @sb: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_sb_in(struct gfs_sb *sb, char *buf) +{ + struct gfs_sb *str = (struct gfs_sb *)buf; + + gfs_meta_header_in(&sb->sb_header, buf); + + CPIN_32(sb, str, sb_fs_format); + CPIN_32(sb, str, sb_multihost_format); + CPIN_32(sb, str, sb_flags); + + CPIN_32(sb, str, sb_bsize); + CPIN_32(sb, str, sb_bsize_shift); + CPIN_32(sb, str, sb_seg_size); + + gfs_inum_in(&sb->sb_jindex_di, (char *)&str->sb_jindex_di); + gfs_inum_in(&sb->sb_rindex_di, (char *)&str->sb_rindex_di); + gfs_inum_in(&sb->sb_root_di, (char *)&str->sb_root_di); + + CPIN_08(sb, str, sb_lockproto, GFS_LOCKNAME_LEN); + CPIN_08(sb, str, sb_locktable, GFS_LOCKNAME_LEN); + + gfs_inum_in(&sb->sb_quota_di, (char *)&str->sb_quota_di); + gfs_inum_in(&sb->sb_license_di, (char *)&str->sb_license_di); + + CPIN_08(sb, str, sb_reserved, 96); +} + +/** + * gfs_sb_out - Write out a superblock + * @sb: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_sb_out(struct gfs_sb *sb, char *buf) +{ + struct gfs_sb *str = (struct gfs_sb *)buf; + + gfs_meta_header_out(&sb->sb_header, buf); + + CPOUT_32(sb, str, sb_fs_format); + CPOUT_32(sb, str, sb_multihost_format); + CPOUT_32(sb, str, sb_flags); + + CPOUT_32(sb, str, sb_bsize); + CPOUT_32(sb, str, sb_bsize_shift); + CPOUT_32(sb, str, sb_seg_size); + + gfs_inum_out(&sb->sb_jindex_di, (char *)&str->sb_jindex_di); + gfs_inum_out(&sb->sb_rindex_di, (char *)&str->sb_rindex_di); + gfs_inum_out(&sb->sb_root_di, (char *)&str->sb_root_di); + + CPOUT_08(sb, str, sb_lockproto, GFS_LOCKNAME_LEN); + CPOUT_08(sb, str, sb_locktable, GFS_LOCKNAME_LEN); + + gfs_inum_out(&sb->sb_quota_di, (char *)&str->sb_quota_di); + gfs_inum_out(&sb->sb_license_di, (char *)&str->sb_license_di); + + CPOUT_08(sb, str, sb_reserved, 96); +} + +/** + * gfs_sb_print - Print out a superblock + * @sb: the cpu-order buffer + * + */ + +void +gfs_sb_print(struct gfs_sb *sb) +{ + gfs_meta_header_print(&sb->sb_header); + + pv(sb, sb_fs_format, "%u"); + pv(sb, sb_multihost_format, "%u"); + pv(sb, sb_flags, "%u"); + + pv(sb, sb_bsize, "%u"); + pv(sb, sb_bsize_shift, "%u"); + pv(sb, sb_seg_size, "%u"); + + gfs_inum_print(&sb->sb_jindex_di); + gfs_inum_print(&sb->sb_rindex_di); + gfs_inum_print(&sb->sb_root_di); + + pv(sb, sb_lockproto, "%s"); + pv(sb, sb_locktable, "%s"); + + gfs_inum_print(&sb->sb_quota_di); + gfs_inum_print(&sb->sb_license_di); + + pa(sb, sb_reserved, 96); +} + +/** + * gfs_jindex_in - Read in a journal index structure + * @jindex: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_jindex_in(struct gfs_jindex *jindex, char *buf) +{ + struct gfs_jindex *str = (struct gfs_jindex *)buf; + + CPIN_64(jindex, str, ji_addr); + CPIN_32(jindex, str, ji_nsegment); + CPIN_32(jindex, str, ji_pad); + + CPIN_08(jindex, str, ji_reserved, 64); +} + +/** + * gfs_jindex_out - Write out a journal index structure + * @jindex: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_jindex_out(struct gfs_jindex *jindex, char *buf) +{ + struct gfs_jindex *str = (struct gfs_jindex *)buf; + + CPOUT_64(jindex, str, ji_addr); + CPOUT_32(jindex, str, ji_nsegment); + CPOUT_32(jindex, str, ji_pad); + + CPOUT_08(jindex, str, ji_reserved, 64); +} + +/** + * gfs_jindex_print - Print out a journal index structure + * @ji: the cpu-order buffer + * + */ + +void +gfs_jindex_print(struct gfs_jindex *ji) +{ + pv(ji, ji_addr, "%"PRIu64); + pv(ji, ji_nsegment, "%u"); + pv(ji, ji_pad, "%u"); + + pa(ji, ji_reserved, 64); +} + +/** + * gfs_rindex_in - Read in a resource index structure + * @rindex: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_rindex_in(struct gfs_rindex *rindex, char *buf) +{ + struct gfs_rindex *str = (struct gfs_rindex *)buf; + + CPIN_64(rindex, str, ri_addr); + CPIN_32(rindex, str, ri_length); + CPIN_32(rindex, str, ri_pad); + + CPIN_64(rindex, str, ri_data1); + CPIN_32(rindex, str, ri_data); + + CPIN_32(rindex, str, ri_bitbytes); + + CPIN_08(rindex, str, ri_reserved, 64); +} + +/** + * gfs_rindex_out - Write out a resource index structure + * @rindex: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_rindex_out(struct gfs_rindex *rindex, char *buf) +{ + struct gfs_rindex *str = (struct gfs_rindex *)buf; + + CPOUT_64(rindex, str, ri_addr); + CPOUT_32(rindex, str, ri_length); + CPOUT_32(rindex, str, ri_pad); + + CPOUT_64(rindex, str, ri_data1); + CPOUT_32(rindex, str, ri_data); + + CPOUT_32(rindex, str, ri_bitbytes); + + CPOUT_08(rindex, str, ri_reserved, 64); +} + +/** + * gfs_rindex_print - Print out a resource index structure + * @ri: the cpu-order buffer + * + */ + +void +gfs_rindex_print(struct gfs_rindex *ri) +{ + pv(ri, ri_addr, "%"PRIu64); + pv(ri, ri_length, "%u"); + pv(ri, ri_pad, "%u"); + + pv(ri, ri_data1, "%"PRIu64); + pv(ri, ri_data, "%u"); + + pv(ri, ri_bitbytes, "%u"); + + pa(ri, ri_reserved, 64); +} + +/** + * gfs_rgrp_in - Read in a resource group header + * @rgrp: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_rgrp_in(struct gfs_rgrp *rgrp, char *buf) +{ + struct gfs_rgrp *str = (struct gfs_rgrp *)buf; + + gfs_meta_header_in(&rgrp->rg_header, buf); + + CPIN_32(rgrp, str, rg_flags); + + CPIN_32(rgrp, str, rg_free); + + CPIN_32(rgrp, str, rg_useddi); + CPIN_32(rgrp, str, rg_freedi); + gfs_inum_in(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list); + + CPIN_32(rgrp, str, rg_usedmeta); + CPIN_32(rgrp, str, rg_freemeta); + + CPIN_08(rgrp, str, rg_reserved, 64); +} + +/** + * gfs_rgrp_out - Write out a resource group header + * @rgrp: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_rgrp_out(struct gfs_rgrp *rgrp, char *buf) +{ + struct gfs_rgrp *str = (struct gfs_rgrp *)buf; + + gfs_meta_header_out(&rgrp->rg_header, buf); + + CPOUT_32(rgrp, str, rg_flags); + + CPOUT_32(rgrp, str, rg_free); + + CPOUT_32(rgrp, str, rg_useddi); + CPOUT_32(rgrp, str, rg_freedi); + gfs_inum_out(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list); + + CPOUT_32(rgrp, str, rg_usedmeta); + CPOUT_32(rgrp, str, rg_freemeta); + + CPOUT_08(rgrp, str, rg_reserved, 64); +} + +/** + * gfs_rgrp_print - Print out a resource group header + * @rg: the cpu-order buffer + * + */ + +void +gfs_rgrp_print(struct gfs_rgrp *rg) +{ + gfs_meta_header_print(&rg->rg_header); + + pv(rg, rg_flags, "%u"); + + pv(rg, rg_free, "%u"); + + pv(rg, rg_useddi, "%u"); + pv(rg, rg_freedi, "%u"); + gfs_inum_print(&rg->rg_freedi_list); + + pv(rg, rg_usedmeta, "%u"); + pv(rg, rg_freemeta, "%u"); + + pa(rg, rg_reserved, 64); +} + +/** + * gfs_quota_in - Read in a quota structures + * @quota: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_quota_in(struct gfs_quota *quota, char *buf) +{ + struct gfs_quota *str = (struct gfs_quota *)buf; + + CPIN_64(quota, str, qu_limit); + CPIN_64(quota, str, qu_warn); + CPIN_64(quota, str, qu_value); + + CPIN_08(quota, str, qu_reserved, 64); +} + +/** + * gfs_quota_out - Write out a quota structure + * @quota: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_quota_out(struct gfs_quota *quota, char *buf) +{ + struct gfs_quota *str = (struct gfs_quota *)buf; + + CPOUT_64(quota, str, qu_limit); + CPOUT_64(quota, str, qu_warn); + CPOUT_64(quota, str, qu_value); + + CPOUT_08(quota, str, qu_reserved, 64); +} + +/** + * gfs_quota_print - Print out a quota structure + * @quota: the cpu-order buffer + * + */ + +void +gfs_quota_print(struct gfs_quota *quota) +{ + pv(quota, qu_limit, "%"PRIu64); + pv(quota, qu_warn, "%"PRIu64); + pv(quota, qu_value, "%"PRId64); + + pa(quota, qu_reserved, 64); +} + +/** + * gfs_dinode_in - Read in a dinode + * @dinode: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_dinode_in(struct gfs_dinode *dinode, char *buf) +{ + struct gfs_dinode *str = (struct gfs_dinode *)buf; + + gfs_meta_header_in(&dinode->di_header, buf); + + gfs_inum_in(&dinode->di_num, (char *)&str->di_num); + + CPIN_32(dinode, str, di_mode); + CPIN_32(dinode, str, di_uid); + CPIN_32(dinode, str, di_gid); + CPIN_32(dinode, str, di_nlink); + CPIN_64(dinode, str, di_size); + CPIN_64(dinode, str, di_blocks); + CPIN_64(dinode, str, di_atime); + CPIN_64(dinode, str, di_mtime); + CPIN_64(dinode, str, di_ctime); + CPIN_32(dinode, str, di_major); + CPIN_32(dinode, str, di_minor); + + CPIN_64(dinode, str, di_rgrp); + CPIN_64(dinode, str, di_goal_rgrp); + CPIN_32(dinode, str, di_goal_dblk); + CPIN_32(dinode, str, di_goal_mblk); + CPIN_32(dinode, str, di_flags); + CPIN_32(dinode, str, di_payload_format); + CPIN_16(dinode, str, di_type); + CPIN_16(dinode, str, di_height); + CPIN_32(dinode, str, di_incarn); + CPIN_16(dinode, str, di_pad); + + CPIN_16(dinode, str, di_depth); + CPIN_32(dinode, str, di_entries); + + gfs_inum_in(&dinode->di_next_unused, (char *)&str->di_next_unused); + + CPIN_64(dinode, str, di_eattr); + + CPIN_08(dinode, str, di_reserved, 56); +} + +/** + * gfs_dinode_out - Write out a dinode + * @dinode: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_dinode_out(struct gfs_dinode *dinode, char *buf) +{ + struct gfs_dinode *str = (struct gfs_dinode *)buf; + + gfs_meta_header_out(&dinode->di_header, buf); + + gfs_inum_out(&dinode->di_num, (char *)&str->di_num); + + CPOUT_32(dinode, str, di_mode); + CPOUT_32(dinode, str, di_uid); + CPOUT_32(dinode, str, di_gid); + CPOUT_32(dinode, str, di_nlink); + CPOUT_64(dinode, str, di_size); + CPOUT_64(dinode, str, di_blocks); + CPOUT_64(dinode, str, di_atime); + CPOUT_64(dinode, str, di_mtime); + CPOUT_64(dinode, str, di_ctime); + CPOUT_32(dinode, str, di_major); + CPOUT_32(dinode, str, di_minor); + + CPOUT_64(dinode, str, di_rgrp); + CPOUT_64(dinode, str, di_goal_rgrp); + CPOUT_32(dinode, str, di_goal_dblk); + CPOUT_32(dinode, str, di_goal_mblk); + CPOUT_32(dinode, str, di_flags); + CPOUT_32(dinode, str, di_payload_format); + CPOUT_16(dinode, str, di_type); + CPOUT_16(dinode, str, di_height); + CPOUT_32(dinode, str, di_incarn); + CPOUT_16(dinode, str, di_pad); + + CPOUT_16(dinode, str, di_depth); + CPOUT_32(dinode, str, di_entries); + + gfs_inum_out(&dinode->di_next_unused, (char *)&str->di_next_unused); + + CPOUT_64(dinode, str, di_eattr); + + CPOUT_08(dinode, str, di_reserved, 56); +} + +/** + * gfs_dinode_print - Print out a dinode + * @di: the cpu-order buffer + * + */ + +void +gfs_dinode_print(struct gfs_dinode *di) +{ + gfs_meta_header_print(&di->di_header); + + gfs_inum_print(&di->di_num); + + pv(di, di_mode, "0%o"); + pv(di, di_uid, "%u"); + pv(di, di_gid, "%u"); + pv(di, di_nlink, "%u"); + pv(di, di_size, "%"PRIu64); + pv(di, di_blocks, "%"PRIu64); + pv(di, di_atime, "%"PRId64); + pv(di, di_mtime, "%"PRId64); + pv(di, di_ctime, "%"PRId64); + pv(di, di_major, "%u"); + pv(di, di_minor, "%u"); + + pv(di, di_rgrp, "%"PRIu64); + pv(di, di_goal_rgrp, "%"PRIu64); + pv(di, di_goal_dblk, "%u"); + pv(di, di_goal_mblk, "%u"); + pv(di, di_flags, "0x%.8X"); + pv(di, di_payload_format, "%u"); + pv(di, di_type, "%u"); + pv(di, di_height, "%u"); + pv(di, di_incarn, "%u"); + pv(di, di_pad, "%u"); + + pv(di, di_depth, "%u"); + pv(di, di_entries, "%u"); + + gfs_inum_print(&di->di_next_unused); + + pv(di, di_eattr, "%"PRIu64); + + pa(di, di_reserved, 56); +} + +/** + * gfs_indirect_in - copy in the header of an indirect block + * @indirect: the in memory copy + * @buf: the buffer copy + * + */ + +void +gfs_indirect_in(struct gfs_indirect *indirect, char *buf) +{ + struct gfs_indirect *str = (struct gfs_indirect *)buf; + + gfs_meta_header_in(&indirect->in_header, buf); + + CPIN_08(indirect, str, in_reserved, 64); +} + +/** + * gfs_indirect_out - copy out the header of an indirect block + * @indirect: the in memory copy + * @buf: the buffer copy + * + */ + +void +gfs_indirect_out(struct gfs_indirect *indirect, char *buf) +{ + struct gfs_indirect *str = (struct gfs_indirect *)buf; + + gfs_meta_header_out(&indirect->in_header, buf); + + CPOUT_08(indirect, str, in_reserved, 64); +} + +/** + * gfs_indirect_print - Print out a indirect block header + * @indirect: the cpu-order buffer + * + */ + +void +gfs_indirect_print(struct gfs_indirect *indirect) +{ + gfs_meta_header_print(&indirect->in_header); + + pa(indirect, in_reserved, 64); +} + +/** + * gfs_dirent_in - Read in a directory entry + * @dirent: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_dirent_in(struct gfs_dirent *dirent, char *buf) +{ + struct gfs_dirent *str = (struct gfs_dirent *)buf; + + gfs_inum_in(&dirent->de_inum, (char *)&str->de_inum); + CPIN_32(dirent, str, de_hash); + CPIN_16(dirent, str, de_rec_len); + CPIN_16(dirent, str, de_name_len); + CPIN_16(dirent, str, de_type); + + CPIN_08(dirent, str, de_reserved, 14); +} + +/** + * gfs_dirent_out - Write out a directory entry + * @dirent: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_dirent_out(struct gfs_dirent *dirent, char *buf) +{ + struct gfs_dirent *str = (struct gfs_dirent *)buf; + + gfs_inum_out(&dirent->de_inum, (char *)&str->de_inum); + CPOUT_32(dirent, str, de_hash); + CPOUT_16(dirent, str, de_rec_len); + CPOUT_16(dirent, str, de_name_len); + CPOUT_16(dirent, str, de_type); + + CPOUT_08(dirent, str, de_reserved, 14); +} + +/** + * gfs_dirent_print - Print out a directory entry + * @de: the cpu-order buffer + * @name: the filename + * + */ + +void +gfs_dirent_print(struct gfs_dirent *de, char *name) +{ + char buf[GFS_FNAMESIZE + 1]; + + gfs_inum_print(&de->de_inum); + pv(de, de_hash, "0x%.8X"); + pv(de, de_rec_len, "%u"); + pv(de, de_name_len, "%u"); + pv(de, de_type, "%u"); + + pa(de, de_reserved, 14); + + memset(buf, 0, GFS_FNAMESIZE + 1); + memcpy(buf, name, de->de_name_len); + printk(" name = %s\n", buf); +} + +/** + * gfs_leaf_in - Read in a directory leaf header + * @leaf: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_leaf_in(struct gfs_leaf *leaf, char *buf) +{ + struct gfs_leaf *str = (struct gfs_leaf *)buf; + + gfs_meta_header_in(&leaf->lf_header, buf); + + CPIN_16(leaf, str, lf_depth); + CPIN_16(leaf, str, lf_entries); + CPIN_32(leaf, str, lf_dirent_format); + CPIN_64(leaf, str, lf_next); + + CPIN_08(leaf, str, lf_reserved, 64); +} + +/** + * gfs_leaf_out - Write out a directory leaf header + * @leaf: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_leaf_out(struct gfs_leaf *leaf, char *buf) +{ + struct gfs_leaf *str = (struct gfs_leaf *)buf; + + gfs_meta_header_out(&leaf->lf_header, buf); + + CPOUT_16(leaf, str, lf_depth); + CPOUT_16(leaf, str, lf_entries); + CPOUT_32(leaf, str, lf_dirent_format); + CPOUT_64(leaf, str, lf_next); + + CPOUT_08(leaf, str, lf_reserved, 64); +} + +/** + * gfs_leaf_print - Print out a directory leaf header + * @lf: the cpu-order buffer + * + */ + +void +gfs_leaf_print(struct gfs_leaf *lf) +{ + gfs_meta_header_print(&lf->lf_header); + + pv(lf, lf_depth, "%u"); + pv(lf, lf_entries, "%u"); + pv(lf, lf_dirent_format, "%u"); + pv(lf, lf_next, "%"PRIu64); + + pa(lf, lf_reserved, 64); +} + +/** + * gfs_log_header_in - Read in a log header + * @head: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_log_header_in(struct gfs_log_header *head, char *buf) +{ + struct gfs_log_header *str = (struct gfs_log_header *)buf; + + gfs_meta_header_in(&head->lh_header, buf); + + CPIN_32(head, str, lh_flags); + CPIN_32(head, str, lh_pad); + + CPIN_64(head, str, lh_first); + CPIN_64(head, str, lh_sequence); + + CPIN_64(head, str, lh_tail); + CPIN_64(head, str, lh_last_dump); + + CPIN_08(head, str, lh_reserved, 64); +} + +/** + * gfs_log_header_out - Write out a log header + * @head: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_log_header_out(struct gfs_log_header *head, char *buf) +{ + struct gfs_log_header *str = (struct gfs_log_header *)buf; + + gfs_meta_header_out(&head->lh_header, buf); + + CPOUT_32(head, str, lh_flags); + CPOUT_32(head, str, lh_pad); + + CPOUT_64(head, str, lh_first); + CPOUT_64(head, str, lh_sequence); + + CPOUT_64(head, str, lh_tail); + CPOUT_64(head, str, lh_last_dump); + + CPOUT_08(head, str, lh_reserved, 64); +} + +/** + * gfs_log_header_print - Print out a log header + * @head: the cpu-order buffer + * + */ + +void +gfs_log_header_print(struct gfs_log_header *lh) +{ + gfs_meta_header_print(&lh->lh_header); + + pv(lh, lh_flags, "0x%.8X"); + pv(lh, lh_pad, "%u"); + + pv(lh, lh_first, "%"PRIu64); + pv(lh, lh_sequence, "%"PRIu64); + + pv(lh, lh_tail, "%"PRIu64); + pv(lh, lh_last_dump, "%"PRIu64); + + pa(lh, lh_reserved, 64); +} + +/** + * gfs_desc_in - Read in a log descriptor + * @desc: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_desc_in(struct gfs_log_descriptor *desc, char *buf) +{ + struct gfs_log_descriptor *str = (struct gfs_log_descriptor *)buf; + + gfs_meta_header_in(&desc->ld_header, buf); + + CPIN_32(desc, str, ld_type); + CPIN_32(desc, str, ld_length); + CPIN_32(desc, str, ld_data1); + CPIN_32(desc, str, ld_data2); + + CPIN_08(desc, str, ld_reserved, 64); +} + +/** + * gfs_desc_out - Write out a log descriptor + * @desc: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_desc_out(struct gfs_log_descriptor *desc, char *buf) +{ + struct gfs_log_descriptor *str = (struct gfs_log_descriptor *)buf; + + gfs_meta_header_out(&desc->ld_header, buf); + + CPOUT_32(desc, str, ld_type); + CPOUT_32(desc, str, ld_length); + CPOUT_32(desc, str, ld_data1); + CPOUT_32(desc, str, ld_data2); + + CPOUT_08(desc, str, ld_reserved, 64); +} + +/** + * gfs_desc_print - Print out a log descriptor + * @ld: the cpu-order buffer + * + */ + +void +gfs_desc_print(struct gfs_log_descriptor *ld) +{ + gfs_meta_header_print(&ld->ld_header); + + pv(ld, ld_type, "%u"); + pv(ld, ld_length, "%u"); + pv(ld, ld_data1, "%u"); + pv(ld, ld_data2, "%u"); + + pa(ld, ld_reserved, 64); +} + +/** + * gfs_block_tag_in - Read in a block tag + * @tag: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_block_tag_in(struct gfs_block_tag *tag, char *buf) +{ + struct gfs_block_tag *str = (struct gfs_block_tag *)buf; + + CPIN_64(tag, str, bt_blkno); + CPIN_32(tag, str, bt_flags); + CPIN_32(tag, str, bt_pad); +} + +/** + * gfs_block_tag_out - Write out a block tag + * @tag: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_block_tag_out(struct gfs_block_tag *tag, char *buf) +{ + struct gfs_block_tag *str = (struct gfs_block_tag *)buf; + + CPOUT_64(tag, str, bt_blkno); + CPOUT_32(tag, str, bt_flags); + CPOUT_32(tag, str, bt_pad); +} + +/** + * gfs_block_tag_print - Print out a block tag + * @tag: the cpu-order buffer + * + */ + +void +gfs_block_tag_print(struct gfs_block_tag *tag) +{ + pv(tag, bt_blkno, "%"PRIu64); + pv(tag, bt_flags, "%u"); + pv(tag, bt_pad, "%u"); +} + +/** + * gfs_quota_tag_in - Read in a quota tag + * @tag: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_quota_tag_in(struct gfs_quota_tag *tag, char *buf) +{ + struct gfs_quota_tag *str = (struct gfs_quota_tag *)buf; + + CPIN_64(tag, str, qt_change); + CPIN_32(tag, str, qt_flags); + CPIN_32(tag, str, qt_id); +} + +/** + * gfs_quota_tag_out - Write out a quota tag + * @tag: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_quota_tag_out(struct gfs_quota_tag *tag, char *buf) +{ + struct gfs_quota_tag *str = (struct gfs_quota_tag *)buf; + + CPOUT_64(tag, str, qt_change); + CPOUT_32(tag, str, qt_flags); + CPOUT_32(tag, str, qt_id); +} + +/** + * gfs_quota_tag_print - Print out a quota tag + * @tag: the cpu-order buffer + * + */ + +void +gfs_quota_tag_print(struct gfs_quota_tag *tag) +{ + pv(tag, qt_change, "%"PRId64); + pv(tag, qt_flags, "0x%.8X"); + pv(tag, qt_id, "%u"); +} + +/** + * gfs_ea_header_in - Read in a Extended Attribute header + * @tag: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_ea_header_in(struct gfs_ea_header *ea, char *buf) +{ + struct gfs_ea_header *str = (struct gfs_ea_header *)buf; + + CPIN_32(ea, str, ea_rec_len); + CPIN_32(ea, str, ea_data_len); + ea->ea_name_len = str->ea_name_len; + ea->ea_type = str->ea_type; + ea->ea_flags = str->ea_flags; + ea->ea_num_ptrs = str->ea_num_ptrs; + CPIN_32(ea, str, ea_pad); +} + +/** + * gfs_ea_header_out - Write out a Extended Attribute header + * @ea: the cpu-order structure + * @buf: the disk-order buffer + * + */ + +void +gfs_ea_header_out(struct gfs_ea_header *ea, char *buf) +{ + struct gfs_ea_header *str = (struct gfs_ea_header *)buf; + + CPOUT_32(ea, str, ea_rec_len); + CPOUT_32(ea, str, ea_data_len); + str->ea_name_len = ea->ea_name_len; + str->ea_type = ea->ea_type; + str->ea_flags = ea->ea_flags; + str->ea_num_ptrs = ea->ea_num_ptrs; + CPOUT_32(ea, str, ea_pad); +} + +/** + * gfs_ea_header_printt - Print out a Extended Attribute header + * @ea: the cpu-order buffer + * + */ + +void +gfs_ea_header_print(struct gfs_ea_header *ea) +{ + pv(ea, ea_rec_len, "%u"); + pv(ea, ea_data_len, "%u"); + pv(ea, ea_name_len, "%u"); + pv(ea, ea_type, "%u"); + pv(ea, ea_flags, "%u"); + pv(ea, ea_num_ptrs, "%u"); + pv(ea, ea_pad, "%u"); +} + +static const uint32_t crc_32_tab[] = +{ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +/** + * gfs_dir_hash - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * + * Take some data and convert it to a 32-bit hash. + * + * The hash function is a 32-bit CRC of the data. The algorithm uses + * the crc_32_tab table above. + * + * This may not be the fastest hash function, but it does a fair bit better + * at providing uniform results than the others I've looked at. That's + * really important for efficient directories. + * + * Returns: the hash + */ + +uint32_t +gfs_dir_hash(const char *data, int len) +{ + uint32_t hash = 0xFFFFFFFF; + + for (; len--; data++) + hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8); + + hash = ~hash; + + return hash; +} + +#endif /* WANT_GFS_CONVERSION_FUNCTIONS */ + diff -urN linux-orig/fs/gfs_locking/lock_dlm/group.c linux-patched/fs/gfs_locking/lock_dlm/group.c --- linux-orig/fs/gfs_locking/lock_dlm/group.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/group.c 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,776 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include + +#include "lock_dlm.h" +#include +#include + + +struct kcl_service_ops mg_ops; + +/* + * Get the node struct for a given nodeid. + */ + +static dlm_node_t *find_node_by_nodeid(dlm_t *dlm, uint32_t nodeid) +{ + dlm_node_t *node; + + list_for_each_entry(node, &dlm->mg_nodes, list) { + if (node->nodeid == nodeid) + return node; + } + return NULL; +} + +/* + * Get the node struct for a given journalid. + */ + +static dlm_node_t *find_node_by_jid(dlm_t *dlm, uint32_t jid) +{ + dlm_node_t *node; + + list_for_each_entry(node, &dlm->mg_nodes, list) { + if (node->jid == jid) + return node; + } + return NULL; +} + +/* + * If the given ID is clear, get it, setting to the given VALUE. The ID is a + * journalid, the VALUE is our nodeid. When successful, the held ID-lock is + * returned (in shared mode). As long as this ID-lock is held, the journalid + * is owned. + */ + +static int id_test_and_set(dlm_t *dlm, uint32_t id, uint32_t val, + dlm_lock_t **lp_set) +{ + dlm_lock_t *lp = NULL; + struct lm_lockname name; + lm_lock_t *lock; + char *lvb; + uint32_t exist_val, beval; + int error; + + name.ln_type = LM_TYPE_JID; + name.ln_number = id; + + error = lm_dlm_get_lock(dlm, &name, &lock); + if (error) + goto fail; + + error = lm_dlm_hold_lvb(lock, &lvb); + if (error) + goto fail_put; + + lp = (dlm_lock_t *) lock; + set_bit(LFL_IDLOCK, &lp->flags); + + retry: + + error = lm_dlm_lock_sync(lock, LM_ST_UNLOCKED, LM_ST_SHARED, + LM_FLAG_TRY | LM_FLAG_NOEXP); + if (error == -EAGAIN) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto retry; + } + if (error) + goto fail_unhold; + + memcpy(&beval, lvb, sizeof(beval)); + exist_val = be32_to_cpu(beval); + + if (!exist_val) { + /* + * This id is unused. Attempt to claim it by getting EX mode + * and writing our nodeid into the lvb. + */ + error = lm_dlm_lock_sync(lock, LM_ST_SHARED, LM_ST_EXCLUSIVE, + LM_FLAG_TRY | LM_FLAG_NOEXP); + if (error == -EAGAIN) { + lm_dlm_unlock_sync(lock, LM_ST_SHARED); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto retry; + } + if (error) + goto fail_unlock; + + beval = cpu_to_be32(val); + memcpy(lvb, &beval, sizeof(beval)); + + error = lm_dlm_lock_sync(lock, LM_ST_EXCLUSIVE, LM_ST_SHARED, + LM_FLAG_NOEXP); + DLM_ASSERT(!error,); + + *lp_set = lp; + error = 0; + } else { + /* + * This id is already used. It has a non-zero nodeid in the lvb + */ + lm_dlm_unlock_sync(lock, LM_ST_SHARED); + lm_dlm_unhold_lvb(lock, lvb); + lm_dlm_put_lock(lock); + error = exist_val; + } + + return error; + + fail_unlock: + lm_dlm_unlock_sync(lock, LM_ST_SHARED); + + fail_unhold: + lm_dlm_unhold_lvb(lock, lvb); + + fail_put: + lm_dlm_put_lock(lock); + + fail: + return error; +} + +/* + * Release a held ID-lock clearing its VALUE. We have to acquire the lock in + * EX again so we can write out a zeroed lvb. + */ + +static void id_clear(dlm_t *dlm, dlm_lock_t *lp) +{ + lm_lock_t *lock = (lm_lock_t *) lp; + int error; + + /* + * This flag means that DLM_LKF_CONVDEADLK should not be used. + */ + set_bit(LFL_FORCE_PROMOTE, &lp->flags); + + retry: + + error = lm_dlm_lock_sync(lock, LM_ST_SHARED, LM_ST_EXCLUSIVE, + LM_FLAG_TRY | LM_FLAG_NOEXP); + if (error == -EAGAIN) { + schedule(); + goto retry; + } + if (error) + goto end; + + memset(lp->lvb, 0, DLM_LVB_LEN); + lm_dlm_unlock_sync(lock, LM_ST_EXCLUSIVE); + + end: + lm_dlm_unhold_lvb(lock, lp->lvb); + lm_dlm_put_lock(lock); +} + +/* + * Get the VALUE for a given ID. The ID is a journalid, the VALUE is a nodeid. + */ + +static int id_value(dlm_t *dlm, uint32_t id, uint32_t *val) +{ + dlm_lock_t *lp = NULL; + struct lm_lockname name; + lm_lock_t *lock; + char *lvb; + uint32_t beval; + int error; + + name.ln_type = LM_TYPE_JID; + name.ln_number = id; + + error = lm_dlm_get_lock(dlm, &name, &lock); + if (error) + goto out; + + error = lm_dlm_hold_lvb(lock, &lvb); + if (error) + goto out_put; + + lp = (dlm_lock_t *) lock; + set_bit(LFL_IDLOCK, &lp->flags); + + retry: + + error = lm_dlm_lock_sync(lock, LM_ST_UNLOCKED, LM_ST_SHARED, + LM_FLAG_TRY | LM_FLAG_NOEXP); + if (error == -EAGAIN) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto retry; + } + if (error) + goto out_unhold; + + memcpy(&beval, lvb, sizeof(beval)); + *val = be32_to_cpu(beval); + + lm_dlm_unlock_sync(lock, LM_ST_SHARED); + + error = 0; + + out_unhold: + lm_dlm_unhold_lvb(lock, lvb); + + out_put: + lm_dlm_put_lock(lock); + + out: + return error; +} + +/* + * Find an ID with a given VALUE. The ID is a journalid, the VALUE is a + * nodeid. + */ + +static int id_find(dlm_t *dlm, uint32_t value, uint32_t *id_out) +{ + uint32_t val, id; + int error = 0, found = FALSE; + + for (id = 0; id < dlm->max_nodes; id++) { + error = id_value(dlm, id, &val); + if (error) + break; + + if (val == value) { + *id_out = id; + error = 0; + found = TRUE; + break; + } + } + + if (!error && !found) + error = -ENOENT; + + return error; +} + +/* + * Get a journalid to use. The journalid must be owned exclusively as long as + * this fs is mounted. Other nodes must be able to discover our nodeid as the + * owner of the journalid. The journalid we claim should have the lowest value + * of all unused journalids. + */ + +static int claim_jid(dlm_t *dlm) +{ + dlm_node_t *node; + uint32_t id; + int error = 0; + + DLM_ASSERT(dlm->our_nodeid,); + + /* + * Search an arbitrary number (8) past max nodes so we're sure to find + * one so we can let the GFS handle the "too big jid" error and fail + * the mount. + */ + + for (id = 0; id < dlm->max_nodes + 8; id++) { + error = id_test_and_set(dlm, id, dlm->our_nodeid, &dlm->jid_lock); + if (error < 0) + break; + if (error > 0) + continue; + + dlm->jid = id; + node = find_node_by_nodeid(dlm, dlm->our_nodeid); + node->jid = id; + set_bit(NFL_HAVE_JID, &node->flags); + break; + } + + /* + * If we have a problem getting a jid, pick a bogus one which should + * cause GFS to complain and fail to mount. + */ + + if (error) { + printk("lock_dlm: %s: no journal id available (%d)\n", + dlm->fsname, error); + dlm->jid = dlm->max_nodes + dlm->our_nodeid; + } + + log_debug("claim_jid %u", dlm->jid); + return 0; +} + +/* + * Release our journalid, allowing it to be used by a node subsequently + * mounting the fs. + */ + +static void release_jid(dlm_t *dlm) +{ + id_clear(dlm, dlm->jid_lock); + dlm->jid_lock = NULL; +} + +/* + * For all nodes in the mountgroup, find the journalid being used by each. + */ + +static int discover_jids(dlm_t *dlm) +{ + dlm_node_t *node; + uint32_t id; + int error, notfound = 0; + + list_for_each_entry(node, &dlm->mg_nodes, list) { + if (test_bit(NFL_HAVE_JID, &node->flags)) + continue; + + error = id_find(dlm, node->nodeid, &id); + if (error) { + log_debug("jid for node %d not found", node->nodeid); + notfound++; + continue; + } + + node->jid = id; + set_bit(NFL_HAVE_JID, &node->flags); + } + + return notfound; +} + +/* + * Discover the nodeid that we've been assigned by the cluster manager. + */ + +static int get_our_nodeid(dlm_t *dlm) +{ + LIST_HEAD(cur_memb); + struct kcl_cluster_node *cur_node; + + kcl_get_members(&cur_memb); + + list_for_each_entry(cur_node, &cur_memb, list) { + if (cur_node->us) { + dlm->our_nodeid = cur_node->node_id; + break; + } + } + + while (!list_empty(&cur_memb)) { + cur_node = list_entry(cur_memb.next, struct kcl_cluster_node, + list); + list_del(&cur_node->list); + kfree(cur_node); + } + + return 0; +} + +/* + * Run in dlm_async thread + */ + +void process_start(dlm_t *dlm, dlm_start_t *ds) +{ + dlm_node_t *node; + uint32_t nodeid; + int last_stop, last_start, error, i, new = FALSE, found; + + + log_debug("start c %d type %d e %d", ds->count, ds->type, ds->event_id); + + /* + * gfs won't do journal recoveries once it's sent us an unmount + */ + + if (test_bit(DFL_UMOUNT, &dlm->flags)) { + log_debug("process_start %d skip for umount", ds->event_id); + kcl_start_done(dlm->mg_local_id, ds->event_id); + goto out; + } + + /* + * check if first start + */ + + if (!test_and_set_bit(DFL_GOT_NODEID, &dlm->flags)) { + get_our_nodeid(dlm); + if (ds->count == 1) + set_bit(DFL_FIRST_MOUNT, &dlm->flags); + } + + down(&dlm->mg_nodes_lock); + + /* + * find nodes which are gone + */ + + list_for_each_entry(node, &dlm->mg_nodes, list) { + found = FALSE; + for (i = 0; i < ds->count; i++) { + if (node->nodeid != ds->nodeids[i]) + continue; + found = TRUE; + break; + } + + /* node is still a member */ + if (found) + continue; + + set_bit(NFL_NOT_MEMBER, &node->flags); + + /* no gfs recovery needed for nodes that left cleanly */ + if (ds->type != SERVICE_NODE_FAILED) + continue; + + /* callbacks sent only for nodes in last completed MG */ + if (!test_bit(NFL_LAST_FINISH, &node->flags)) + continue; + + /* only send a single callback per node */ + if (test_and_set_bit(NFL_SENT_CB, &node->flags)) + continue; + + dlm->fscb(dlm->fsdata, LM_CB_NEED_RECOVERY, &node->jid); + set_bit(DFL_NEED_STARTDONE, &dlm->flags); + log_debug("cb_need_recovery jid %u", node->jid); + } + + /* + * add new nodes + */ + + for (i = 0; i < ds->count; i++) { + nodeid = ds->nodeids[i]; + + node = find_node_by_nodeid(dlm, nodeid); + if (node) + continue; + + DLM_RETRY(node = kmalloc(sizeof(dlm_node_t), GFP_KERNEL), node); + + memset(node, 0, sizeof(dlm_node_t)); + + node->nodeid = nodeid; + list_add(&node->list, &dlm->mg_nodes); + new = TRUE; + } + + up(&dlm->mg_nodes_lock); + + /* + * get a jid for ourself when started for first time + */ + + if (!test_and_set_bit(DFL_HAVE_JID, &dlm->flags)) + claim_jid(dlm); + else if (new) { + /* give new nodes a little time to claim a jid */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(HZ); + } + + /* + * find jid's of new nodes + */ + + for (;;) { + /* we don't need to do these jid lookups if this start has been + followed by a stop event (and thus cancelled) */ + + spin_lock(&dlm->async_lock); + last_stop = dlm->mg_last_stop; + last_start = dlm->mg_last_start; + spin_unlock(&dlm->async_lock); + + if (last_stop >= ds->event_id) + break; + + error = discover_jids(dlm); + if (error) { + /* Not all jids were found. Wait for a time to let all + new nodes claim_jid, then try to scan for jids + again. */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(HZ); + continue; + } + break; + } + + /* + * tell SM we're done if there are no GFS recoveries to wait for + */ + + if (last_start > last_stop) { + error = 0; + down(&dlm->mg_nodes_lock); + + list_for_each_entry(node, &dlm->mg_nodes, list) { + if (!test_bit(NFL_SENT_CB, &node->flags)) + continue; + error = 1; + break; + } + up(&dlm->mg_nodes_lock); + + if (!error) + kcl_start_done(dlm->mg_local_id, ds->event_id); + } + + out: + kfree(ds->nodeids); + kfree(ds); +} + +void process_finish(dlm_t *dlm) +{ + struct list_head *tmp, *tmpsafe; + dlm_node_t *node; + dlm_lock_t *lp; + + spin_lock(&dlm->async_lock); + clear_bit(DFL_BLOCK_LOCKS, &dlm->flags); + + list_for_each_safe(tmp, tmpsafe, &dlm->delayed) { + lp = list_entry(tmp, dlm_lock_t, dlist); + + if (lp->type != QUEUE_LOCKS_BLOCKED) + continue; + + lp->type = 0; + list_del(&lp->dlist); + list_add_tail(&lp->slist, &dlm->submit); + + clear_bit(LFL_DLIST, &lp->flags); + set_bit(LFL_SLIST, &lp->flags); + } + spin_unlock(&dlm->async_lock); + + down(&dlm->mg_nodes_lock); + + list_for_each_safe(tmp, tmpsafe, &dlm->mg_nodes) { + node = list_entry(tmp, dlm_node_t, list); + + if (test_bit(NFL_NOT_MEMBER, &node->flags)) { + list_del(&node->list); + kfree(node); + } else + set_bit(NFL_LAST_FINISH, &node->flags); + } + up(&dlm->mg_nodes_lock); + + wake_up(&dlm->wait); +} + +/* + * Run in user process + */ + +int init_mountgroup(dlm_t *dlm) +{ + int error; + int id; + + error = kcl_register_service(dlm->fsname, dlm->fnlen, SERVICE_LEVEL_GFS, + &mg_ops, TRUE, (void *) dlm, &id); + if (error) + goto out; + + dlm->mg_local_id = id; + + /* BLOCK_LOCKS is cleared when the join is finished */ + set_bit(DFL_BLOCK_LOCKS, &dlm->flags); + + error = kcl_join_service(id); + if (error) + goto out_unreg; + + if (test_bit(DFL_START_ERROR, &dlm->flags)) + goto out_leave; + + return 0; + + out_leave: + kcl_leave_service(dlm->mg_local_id); + + out_unreg: + kcl_unregister_service(id); + + out: + printk("lock_dlm: service error %d\n", error); + return error; +} + +void release_mountgroup(dlm_t *dlm) +{ + int last_start, last_stop; + + /* this flag causes a kcl_start_done() to be sent right away for + any start callbacks we get from SM */ + + log_debug("umount flags %lx", dlm->flags); + set_bit(DFL_UMOUNT, &dlm->flags); + + /* gfs has done a unmount and will not call jid_recovery_done() + any longer so make necessary kcl_start_done() calls so + kcl_leave_service() will complete */ + + spin_lock(&dlm->async_lock); + last_start = dlm->mg_last_start; + last_stop = dlm->mg_last_stop; + spin_unlock(&dlm->async_lock); + + if ((last_start > last_stop) && + test_and_clear_bit(DFL_NEED_STARTDONE, &dlm->flags)) { + log_debug("umount doing start_done %d", last_start); + kcl_start_done(dlm->mg_local_id, last_start); + } + + kcl_leave_service(dlm->mg_local_id); + kcl_unregister_service(dlm->mg_local_id); + release_jid(dlm); +} + +/* + * Run in GFS thread + */ + +void jid_recovery_done(dlm_t *dlm, unsigned int jid, unsigned int message) +{ + dlm_node_t *node; + int last_start, last_stop; + int remain = 0; + + log_debug("recovery_done jid %u msg %u", jid, message); + + node = find_node_by_jid(dlm, jid); + if (!node) + goto out; + + log_debug("recovery_done %u,%u f %lx", jid, node->nodeid, node->flags); + + if (!test_bit(NFL_SENT_CB, &node->flags)) + goto out; + + if (!test_bit(NFL_NOT_MEMBER, &node->flags)) + goto out; + + set_bit(NFL_RECOVERY_DONE, &node->flags); + + /* + * when recovery is done for all nodes, we're done with the start + */ + + down(&dlm->mg_nodes_lock); + + list_for_each_entry(node, &dlm->mg_nodes, list) { + if (test_bit(NFL_SENT_CB, &node->flags) && + !test_bit(NFL_RECOVERY_DONE, &node->flags)) + remain++; + } + up(&dlm->mg_nodes_lock); + + if (!remain) { + /* don't send a start_done if there's since been a stop which + * cancels this start */ + + spin_lock(&dlm->async_lock); + last_start = dlm->mg_last_start; + last_stop = dlm->mg_last_stop; + spin_unlock(&dlm->async_lock); + + if (last_start > last_stop) { + log_debug("recovery_done start_done %d", last_start); + kcl_start_done(dlm->mg_local_id, last_start); + clear_bit(DFL_NEED_STARTDONE, &dlm->flags); + } + } + + out: + return; +} + +/* + * Run in CMAN SM thread + */ + +static void queue_start(dlm_t *dlm, uint32_t *nodeids, int count, + int event_id, int type) +{ + dlm_start_t *ds; + + DLM_RETRY(ds = kmalloc(sizeof(dlm_start_t), GFP_KERNEL), ds); + + memset(ds, 0, sizeof(dlm_start_t)); + + ds->nodeids = nodeids; + ds->count = count; + ds->event_id = event_id; + ds->type = type; + + spin_lock(&dlm->async_lock); + dlm->mg_last_start = event_id; + list_add_tail(&ds->list, &dlm->starts); + spin_unlock(&dlm->async_lock); + + wake_up(&dlm->wait); +} + +static int mg_stop(void *data) +{ + dlm_t *dlm = (dlm_t *) data; + + spin_lock(&dlm->async_lock); + set_bit(DFL_BLOCK_LOCKS, &dlm->flags); + dlm->mg_last_stop = dlm->mg_last_start; + spin_unlock(&dlm->async_lock); + + return 0; +} + +static int mg_start(void *data, uint32_t *nodeids, int count, int event_id, + int type) +{ + dlm_t *dlm = (dlm_t *) data; + + queue_start(dlm, nodeids, count, event_id, type); + + return 0; +} + +static void mg_finish(void *data, int event_id) +{ + dlm_t *dlm = (dlm_t *) data; + + spin_lock(&dlm->async_lock); + dlm->mg_last_finish = event_id; + set_bit(DFL_MG_FINISH, &dlm->flags); + spin_unlock(&dlm->async_lock); + + wake_up(&dlm->wait); +} + +struct kcl_service_ops mg_ops = { + .stop = mg_stop, + .start = mg_start, + .finish = mg_finish +}; diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock.c linux-patched/fs/gfs_locking/lock_dlm/lock.c --- linux-orig/fs/gfs_locking/lock_dlm/lock.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/lock.c 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,561 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "lock_dlm.h" + +/* + * Run in DLM thread + */ + +static void queue_complete(dlm_lock_t *lp) +{ + dlm_t *dlm = lp->dlm; + + clear_bit(LFL_WAIT_COMPLETE, &lp->flags); + + spin_lock(&dlm->async_lock); + list_add_tail(&lp->clist, &dlm->complete); + set_bit(LFL_CLIST, &lp->flags); + spin_unlock(&dlm->async_lock); + wake_up(&dlm->wait); +} + +static void queue_blocking(dlm_lock_t *lp, int mode) +{ + dlm_t *dlm = lp->dlm; + + if (test_bit(LFL_WAIT_COMPLETE, &lp->flags)) { + /* We often receive basts for EX while we're promoting + from SH to EX. */ + /* printk("lock_dlm: bast before complete %x,%"PRIx64" " + "gr=%d rq=%d bast=%d\n", lp->lockname.ln_type, + lp->lockname.ln_number, lp->cur, lp->req, mode); */ + return; + } + + spin_lock(&dlm->async_lock); + + if (!lp->bast_mode) { + list_add_tail(&lp->blist, &dlm->blocking); + set_bit(LFL_BLIST, &lp->flags); + lp->bast_mode = mode; + } else if (lp->bast_mode < mode) + lp->bast_mode = mode; + + spin_unlock(&dlm->async_lock); + wake_up(&dlm->wait); +} + +static __inline__ void lock_ast(void *astargs) +{ + dlm_lock_t *lp = (dlm_lock_t *) astargs; + queue_complete(lp); +} + +static __inline__ void lock_bast(void *astargs, int mode) +{ + dlm_lock_t *lp = (dlm_lock_t *) astargs; + queue_blocking(lp, mode); +} + +/* + * Run in GFS or user thread + */ + +/** + * queue_delayed - add request to queue to be submitted later + * @lp: DLM lock + * @type: the reason the lock is blocked + * + * Queue of locks which need submitting sometime later. Locks here + * due to BLOCKED_LOCKS are moved to request queue when recovery is + * done. Locks here due to an ERROR are moved to request queue after + * some delay. This could also be called from dlm_async thread. + */ + +void queue_delayed(dlm_lock_t *lp, int type) +{ + dlm_t *dlm = lp->dlm; + + lp->type = type; + + spin_lock(&dlm->async_lock); + list_add_tail(&lp->dlist, &dlm->delayed); + set_bit(LFL_DLIST, &lp->flags); + spin_unlock(&dlm->async_lock); +} + +/** + * make_mode - convert to DLM_LOCK_ + * @lmstate: GFS lock state + * + * Returns: DLM lock mode + */ + +static int16_t make_mode(int16_t lmstate) +{ + switch (lmstate) { + case LM_ST_UNLOCKED: + return DLM_LOCK_NL; + case LM_ST_EXCLUSIVE: + return DLM_LOCK_EX; + case LM_ST_DEFERRED: + return DLM_LOCK_CW; + case LM_ST_SHARED: + return DLM_LOCK_PR; + default: + DLM_ASSERT(0, printk("unknown LM state %d\n", lmstate);); + } +} + +/** + * make_lmstate - convert to LM_ST_ + * @dlmmode: DLM lock mode + * + * Returns: GFS lock state + */ + +int16_t make_lmstate(int16_t dlmmode) +{ + switch (dlmmode) { + case DLM_LOCK_IV: + case DLM_LOCK_NL: + return LM_ST_UNLOCKED; + case DLM_LOCK_EX: + return LM_ST_EXCLUSIVE; + case DLM_LOCK_CW: + return LM_ST_DEFERRED; + case DLM_LOCK_PR: + return LM_ST_SHARED; + default: + DLM_ASSERT(0, printk("unknown DLM mode %d\n", dlmmode);); + } +} + +/** + * check_cur_state - verify agreement with GFS on the current lock state + * @lp: the DLM lock + * @cur_state: the current lock state from GFS + * + * NB: DLM_LOCK_NL and DLM_LOCK_IV are both considered + * LM_ST_UNLOCKED by GFS. + * + */ + +static void check_cur_state(dlm_lock_t *lp, unsigned int cur_state) +{ + int16_t cur = make_mode(cur_state); + if (lp->cur != DLM_LOCK_IV) + DLM_ASSERT(lp->cur == cur, printk("%d, %d\n", lp->cur, cur);); +} + +/** + * make_flags - put together necessary DLM flags + * @lp: DLM lock + * @gfs_flags: GFS flags + * @cur: current DLM lock mode + * @req: requested DLM lock mode + * + * Returns: DLM flags + */ + +static unsigned int make_flags(dlm_lock_t *lp, unsigned int gfs_flags, + int16_t cur, int16_t req) +{ + unsigned int lkf = 0; + + if (gfs_flags & LM_FLAG_TRY) + lkf |= DLM_LKF_NOQUEUE; + + if (gfs_flags & LM_FLAG_TRY_1CB) { + lkf |= DLM_LKF_NOQUEUE; + lkf |= DLM_LKF_NOQUEUEBAST; + } + + if (lp->lksb.sb_lkid != 0) { + lkf |= DLM_LKF_CONVERT; + + if (gfs_flags & LM_FLAG_PRIORITY) + lkf |= DLM_LKF_EXPEDITE; + else if (req > cur) + lkf |= DLM_LKF_QUECVT; + + /* Conversion deadlock avoidance by DLM */ + + if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) && + cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) + lkf |= DLM_LKF_CONVDEADLK; + } + + if (lp->lvb) + lkf |= DLM_LKF_VALBLK; + + return lkf; +} + +/** + * make_strname - convert GFS lock numbers to string + * @lockname: the lock type/number + * @str: the lock string/length + * + */ + +static __inline__ void make_strname(struct lm_lockname *lockname, + strname_t *str) +{ + sprintf(str->name, "%8x%16"PRIx64, lockname->ln_type, + lockname->ln_number); + str->namelen = LOCK_DLM_STRNAME_BYTES; +} + +int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp) +{ + dlm_lock_t *lp; + + lp = kmalloc(sizeof(dlm_lock_t), GFP_KERNEL); + if (!lp) + return -ENOMEM; + + memset(lp, 0, sizeof(dlm_lock_t)); + lp->lockname = *name; + lp->dlm = dlm; + lp->cur = DLM_LOCK_IV; + init_completion(&lp->uast_wait); + *lpp = lp; + return 0; +} + +/** + * dlm_get_lock - get a lm_lock_t given a descripton of the lock + * @lockspace: the lockspace the lock lives in + * @name: the name of the lock + * @lockp: return the lm_lock_t here + * + * Returns: 0 on success, -EXXX on failure + */ + +int lm_dlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name, + lm_lock_t **lockp) +{ + dlm_lock_t *lp; + int error; + + error = create_lp((dlm_t *) lockspace, name, &lp); + + *lockp = (lm_lock_t *) lp; + return error; +} + +int do_unlock(dlm_lock_t *lp) +{ + int error; + + init_completion(&lp->uast_wait); + + set_bit(LFL_DLM_UNLOCK, &lp->flags); + + error = dlm_unlock(lp->dlm->gdlm_lsp, lp->lksb.sb_lkid, 0, &lp->lksb, + (void *) lp); + + DLM_ASSERT(!error, printk("%s: error=%d num=%x,%"PRIx64"\n", + lp->dlm->fsname, error, lp->lockname.ln_type, + lp->lockname.ln_number);); + + wait_for_completion(&lp->uast_wait); + + spin_lock(&lp->dlm->async_lock); + if (test_bit(LFL_CLIST, &lp->flags)) { + printk("lock_dlm: dlm_put_lock lp on clist num=%x,%"PRIx64"\n", lp->lockname.ln_type, lp->lockname.ln_number); + list_del(&lp->clist); + } + if (test_bit(LFL_BLIST, &lp->flags)) { + printk("lock_dlm: dlm_put_lock lp on blist num=%x,%"PRIx64"\n", + lp->lockname.ln_type, lp->lockname.ln_number); + list_del(&lp->blist); + } + if (test_bit(LFL_DLIST, &lp->flags)) { + printk("lock_dlm: dlm_put_lock lp on dlist num=%x,%"PRIx64"\n", + lp->lockname.ln_type, lp->lockname.ln_number); + list_del(&lp->dlist); + } + if (test_bit(LFL_SLIST, &lp->flags)) { + printk("lock_dlm: dlm_put_lock lp on slist num=%x,%"PRIx64"\n", + lp->lockname.ln_type, lp->lockname.ln_number); + list_del(&lp->slist); + } + spin_unlock(&lp->dlm->async_lock); + + return 0; +} + +/** + * dlm_put_lock - get rid of a lock structure + * @lock: the lock to throw away + * + */ + +void lm_dlm_put_lock(lm_lock_t *lock) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + + if (lp->cur != DLM_LOCK_IV) { + do_unlock(lp); + kfree(lp); + } +} + +/** + * do_lock - acquire a lock + * @lp: the DLM lock + * @range: optional range + */ + +void do_lock(dlm_lock_t *lp, struct dlm_range *range) +{ + dlm_t *dlm = lp->dlm; + strname_t str; + int error; + + /* + * When recovery is in progress, delay lock requests for submission + * once recovery is done. Requests for recovery (NOEXP) and unlocks + * can pass. + */ + + if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) && + !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) { + queue_delayed(lp, QUEUE_LOCKS_BLOCKED); + return; + } + + /* + * Submit the actual lock request. + */ + + make_strname(&lp->lockname, &str); + + set_bit(LFL_WAIT_COMPLETE, &lp->flags); + + error = dlm_lock(dlm->gdlm_lsp, lp->req, &lp->lksb, lp->lkf, str.name, + str.namelen, 0, lock_ast, (void *) lp, + lp->posix ? NULL : lock_bast, range); + + if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { + lp->lksb.sb_status = -EAGAIN; + queue_complete(lp); + error = 0; + } + + DLM_ASSERT(!error, + printk("%s: num=%x,%"PRIx64" err=%d cur=%d req=%d lkf=%x\n", + dlm->fsname, lp->lockname.ln_type, + lp->lockname.ln_number, error, lp->cur, lp->req, + lp->lkf);); +} + +/** + * lm_dlm_lock - acquire a lock + * @lock: the lock to manipulate + * @cur_state: the current state + * @req_state: the requested state + * @flags: modifier flags + * + * Returns: A bitmap of LM_OUT_* on success, -EXXX on failure + */ + +unsigned int lm_dlm_lock(lm_lock_t *lock, unsigned int cur_state, + unsigned int req_state, unsigned int flags) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + + if (flags & LM_FLAG_NOEXP) + set_bit(LFL_NOBLOCK, &lp->flags); + + check_cur_state(lp, cur_state); + lp->req = make_mode(req_state); + lp->lkf = make_flags(lp, flags, lp->cur, lp->req); + + do_lock(lp, NULL); + return LM_OUT_ASYNC; +} + +int lm_dlm_lock_sync(lm_lock_t *lock, unsigned int cur_state, + unsigned int req_state, unsigned int flags) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + + init_completion(&lp->uast_wait); + lm_dlm_lock(lock, cur_state, req_state, flags); + wait_for_completion(&lp->uast_wait); + + return lp->lksb.sb_status; +} + +/** + * lm_dlm_unlock - unlock a lock + * @lock: the lock to manipulate + * @cur_state: the current state + * + * Returns: 0 on success, -EXXX on failure + */ + +unsigned int lm_dlm_unlock(lm_lock_t *lock, unsigned int cur_state) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + + check_cur_state(lp, cur_state); + lp->req = DLM_LOCK_NL; + lp->lkf = make_flags(lp, 0, lp->cur, lp->req); + + do_lock(lp, NULL); + + return LM_OUT_ASYNC; +} + +void lm_dlm_unlock_sync(lm_lock_t *lock, unsigned int cur_state) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + + init_completion(&lp->uast_wait); + lm_dlm_unlock(lock, cur_state); + wait_for_completion(&lp->uast_wait); +} + +/** + * dlm_cancel - cancel a request that is blocked due to DFL_BLOCK_LOCKS + * @lock: the lock to cancel request for + * + */ + +void lm_dlm_cancel(lm_lock_t *lock) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + int dlist = FALSE; + + printk("lock_dlm: cancel num=%x,%"PRIx64"\n", + lp->lockname.ln_type, lp->lockname.ln_number); + + spin_lock(&lp->dlm->async_lock); + if (test_and_clear_bit(LFL_DLIST, &lp->flags)) { + list_del(&lp->dlist); + lp->type = 0; + dlist = TRUE; + } + spin_unlock(&lp->dlm->async_lock); + + if (dlist) { + set_bit(LFL_CANCEL, &lp->flags); + queue_complete(lp); + } +} + +/** + * dlm_hold_lvb - hold on to a lock value block + * @lock: the lock the LVB is associated with + * @lvbp: return the lvb memory here + * + * Returns: 0 on success, -EXXX on failure + */ + +int lm_dlm_hold_lvb(lm_lock_t *lock, char **lvbp) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + char *lvb; + + lvb = kmalloc(DLM_LVB_SIZE, GFP_KERNEL); + if (!lvb) + return -ENOMEM; + + memset(lvb, 0, DLM_LVB_SIZE); + + lp->lksb.sb_lvbptr = lvb; + lp->lvb = lvb; + *lvbp = lvb; + + return 0; +} + +/** + * dlm_unhold_lvb - release a LVB + * @lock: the lock the LVB is associated with + * @lvb: the lock value block + * + */ + +void lm_dlm_unhold_lvb(lm_lock_t *lock, char *lvb) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + kfree(lvb); + lp->lvb = NULL; + lp->lksb.sb_lvbptr = NULL; +} + +/** + * dlm_sync_lvb - sync out the value of a lvb + * @lock: the lock the LVB is associated with + * @lvb: the lock value block + * + */ + +void lm_dlm_sync_lvb(lm_lock_t *lock, char *lvb) +{ + dlm_lock_t *lp = (dlm_lock_t *) lock; + + if (lp->cur != DLM_LOCK_EX) + return; + + init_completion(&lp->uast_wait); + set_bit(LFL_SYNC_LVB, &lp->flags); + + lp->req = DLM_LOCK_EX; + lp->lkf = make_flags(lp, 0, lp->cur, lp->req); + + do_lock(lp, NULL); + wait_for_completion(&lp->uast_wait); +} + +/** + * dlm_recovery_done - reset the expired locks for a given jid + * @lockspace: the lockspace + * @jid: the jid + * + */ + +void lm_dlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid, + unsigned int message) +{ + jid_recovery_done((dlm_t *) lockspace, jid, message); +} + +/* + * Run in dlm_async + */ + +/** + * process_submit - make DLM lock requests from dlm_async thread + * @lp: DLM Lock + * + */ + +void process_submit(dlm_lock_t *lp) +{ + struct dlm_range range, *r = NULL; + + if (lp->posix) { + range.ra_start = lp->posix->start; + range.ra_end = lp->posix->end; + r = ⦥ + } + + do_lock(lp, r); +} diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h --- linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,323 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef LOCK_DLM_DOT_H +#define LOCK_DLM_DOT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* We take a shortcut and use lm_lockname structs for internal locks. This + means we must be careful to keep these types different from those used in + lm_interface.h. */ + +#define LM_TYPE_JID (0x10) +#define LM_TYPE_PLOCK_UPDATE (0x11) + +#define DLM_LVB_SIZE (DLM_LVB_LEN) + +/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number). + We sprintf these numbers into a 24 byte string of hex values to make them + human-readable (to make debugging simpler.) */ + +#define LOCK_DLM_STRNAME_BYTES (24) + +#define LOCK_DLM_MAX_NODES (128) + +struct dlm; +struct dlm_lock; +struct dlm_node; +struct dlm_start; +struct strname; + +typedef struct dlm dlm_t; +typedef struct dlm_lock dlm_lock_t; +typedef struct dlm_node dlm_node_t; +typedef struct dlm_start dlm_start_t; +typedef struct strname strname_t; + +#define DFL_FIRST_MOUNT 0 +#define DFL_THREAD_STOP 1 +#define DFL_GOT_NODEID 2 +#define DFL_MG_FINISH 3 +#define DFL_HAVE_JID 4 +#define DFL_BLOCK_LOCKS 5 +#define DFL_START_ERROR 6 +#define DFL_UMOUNT 7 +#define DFL_NEED_STARTDONE 8 + +struct dlm { + uint32_t jid; + uint32_t our_nodeid; + unsigned long flags; + + int cnlen; + char * clustername; + int fnlen; + char * fsname; + int max_nodes; + + dlm_lockspace_t * gdlm_lsp; + + lm_callback_t fscb; + lm_fsdata_t * fsdata; + dlm_lock_t * jid_lock; + + spinlock_t async_lock; + struct list_head complete; + struct list_head blocking; + struct list_head delayed; + struct list_head submit; + struct list_head starts; + + wait_queue_head_t wait; + atomic_t threads; + + int mg_local_id; + int mg_last_start; + int mg_last_stop; + int mg_last_finish; + struct list_head mg_nodes; + struct semaphore mg_nodes_lock; + + struct list_head resources; + struct semaphore res_lock; +}; + +struct dlm_resource { + dlm_t * dlm; + struct list_head list; /* list of resources */ + struct lm_lockname name; /* the resource name */ + struct semaphore sema; + struct list_head locks; /* one lock for each range */ + int count; + dlm_lock_t * update; + struct list_head async_locks; + spinlock_t async_spin; +}; + +struct posix_lock { + struct list_head list; /* resource locks list */ + struct list_head async_list; /* resource async_locks list */ + struct dlm_resource * resource; + dlm_lock_t * lp; + unsigned long owner; + uint64_t start; + uint64_t end; + int count; + int ex; +}; + +#define LFL_NOBLOCK 0 +#define LFL_NOCACHE 1 +#define LFL_UNLOCK_RECOVERY 2 +#define LFL_DLM_UNLOCK 3 +#define LFL_TRYFAILED 4 +#define LFL_SYNC_LVB 5 +#define LFL_FORCE_PROMOTE 6 +#define LFL_REREQUEST 7 +#define LFL_WAIT_COMPLETE 8 +#define LFL_CLIST 9 +#define LFL_BLIST 10 +#define LFL_DLIST 11 +#define LFL_SLIST 12 +#define LFL_IDLOCK 13 +#define LFL_CANCEL 14 + +struct dlm_lock { + dlm_t * dlm; + struct lm_lockname lockname; + char * lvb; + struct dlm_lksb lksb; + + int16_t cur; + int16_t req; + int16_t prev_req; + unsigned int lkf; + unsigned int type; + unsigned long flags; + + int bast_mode; /* protected by async_lock */ + struct completion uast_wait; + + struct list_head clist; /* complete */ + struct list_head blist; /* blocking */ + struct list_head dlist; /* delayed */ + struct list_head slist; /* submit */ + + struct posix_lock * posix; +}; + +#define NFL_SENT_CB 0 +#define NFL_NOT_MEMBER 1 +#define NFL_RECOVERY_DONE 2 +#define NFL_LAST_FINISH 3 +#define NFL_HAVE_JID 4 + +struct dlm_node { + uint32_t nodeid; + uint32_t jid; + unsigned long flags; + struct list_head list; +}; + +#define QUEUE_LOCKS_BLOCKED 1 +#define QUEUE_ERROR_UNLOCK 2 +#define QUEUE_ERROR_LOCK 3 +#define QUEUE_ERROR_RETRY 4 + +struct strname { + unsigned char name[LOCK_DLM_STRNAME_BYTES]; + unsigned short namelen; +}; + +struct dlm_start { + uint32_t * nodeids; + int count; + int type; + int event_id; + struct list_head list; +}; + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +#if (BITS_PER_LONG == 64) +#define PRIu64 "lu" +#define PRId64 "ld" +#define PRIo64 "lo" +#define PRIx64 "lx" +#define PRIX64 "lX" +#define SCNu64 "lu" +#define SCNd64 "ld" +#define SCNo64 "lo" +#define SCNx64 "lx" +#define SCNX64 "lX" +#else +#define PRIu64 "Lu" +#define PRId64 "Ld" +#define PRIo64 "Lo" +#define PRIx64 "Lx" +#define PRIX64 "LX" +#define SCNu64 "Lu" +#define SCNd64 "Ld" +#define SCNo64 "Lo" +#define SCNx64 "Lx" +#define SCNX64 "LX" +#endif + +extern struct lm_lockops lock_dlm_ops; + +/* group.c */ + +int init_mountgroup(dlm_t * dlm); +void release_mountgroup(dlm_t * dlm); +void process_start(dlm_t * dlm, dlm_start_t * ds); +void process_finish(dlm_t * dlm); +void jid_recovery_done(dlm_t * dlm, unsigned int jid, unsigned int message); + +/* thread.c */ + +int init_async_thread(dlm_t * dlm); +void release_async_thread(dlm_t * dlm); + +/* lock.c */ + +int16_t make_lmstate(int16_t dlmmode); +void queue_delayed(dlm_lock_t * lp, int type); +void process_submit(dlm_lock_t * lp); +int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp); +void do_lock(dlm_lock_t *lp, struct dlm_range *range); +int do_unlock(dlm_lock_t *lp); + +int lm_dlm_get_lock(lm_lockspace_t * lockspace, struct lm_lockname * name, + lm_lock_t ** lockp); +void lm_dlm_put_lock(lm_lock_t * lock); +unsigned int lm_dlm_lock(lm_lock_t * lock, unsigned int cur_state, + unsigned int req_state, unsigned int flags); +int lm_dlm_lock_sync(lm_lock_t * lock, unsigned int cur_state, + unsigned int req_state, unsigned int flags); +unsigned int lm_dlm_unlock(lm_lock_t * lock, unsigned int cur_state); +void lm_dlm_unlock_sync(lm_lock_t * lock, unsigned int cur_state); +void lm_dlm_cancel(lm_lock_t * lock); +int lm_dlm_hold_lvb(lm_lock_t * lock, char **lvbp); +void lm_dlm_unhold_lvb(lm_lock_t * lock, char *lvb); +void lm_dlm_sync_lvb(lm_lock_t * lock, char *lvb); +void lm_dlm_recovery_done(lm_lockspace_t * lockspace, unsigned int jid, + unsigned int message); + +/* plock.c */ + +int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name, + unsigned long owner, int wait, int ex, uint64_t start, + uint64_t end); + +int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name, + unsigned long owner, uint64_t start, uint64_t end); + +int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name, + unsigned long owner, uint64_t *start, uint64_t *end, + int *ex, unsigned long *rowner); + +/* main.c */ + +void lock_dlm_debug_log(const char *fmt, ...); +void lock_dlm_debug_dump(void); + + +#define LOCK_DLM_DEBUG + +#ifdef LOCK_DLM_DEBUG +#define log_debug(fmt, args...) lock_dlm_debug_log(fmt, ##args) +#else +#define log_debug(fmt, args...) +#endif + +#define DLM_ASSERT(x, do) \ +{ \ + if (!(x)) \ + { \ + lock_dlm_debug_dump(); \ + printk("\nlock_dlm: Assertion failed on line %d of file %s\n" \ + "lock_dlm: assertion: \"%s\"\n" \ + "lock_dlm: time = %lu\n", \ + __LINE__, __FILE__, #x, jiffies); \ + {do} \ + printk("\n"); \ + panic("lock_dlm: Record message above and reboot.\n"); \ + } \ +} + +#define DLM_RETRY(do_this, until_this) \ +for (;;) \ +{ \ + do { do_this; } while (0); \ + if (until_this) \ + break; \ + printk("lock_dlm: out of memory: %s, %u\n", __FILE__, __LINE__); \ + schedule();\ +} + +#endif diff -urN linux-orig/fs/gfs_locking/lock_dlm/main.c linux-patched/fs/gfs_locking/lock_dlm/main.c --- linux-orig/fs/gfs_locking/lock_dlm/main.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/main.c 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,192 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "lock_dlm.h" +#include +#include + +#if defined(LOCK_DLM_DEBUG) +#define LOCK_DLM_DEBUG_SIZE (1024) +#define MAX_DEBUG_MSG_LEN (64) +#else +#define LOCK_DLM_DEBUG_SIZE (0) +#define MAX_DEBUG_MSG_LEN (0) +#endif + +static char * debug_buf; +static unsigned int debug_size; +static unsigned int debug_point; +static int debug_wrap; +static spinlock_t debug_lock; +static struct proc_dir_entry * debug_proc_entry = NULL; + + +void lock_dlm_debug_log(const char *fmt, ...) +{ + va_list va; + int i, n, size, len; + char buf[MAX_DEBUG_MSG_LEN+1]; + + spin_lock(&debug_lock); + + if (!debug_buf) + goto out; + + size = MAX_DEBUG_MSG_LEN; + memset(buf, 0, size+1); + + n = 0; + /* n = snprintf(buf, size, "%s ", dlm->fsname); */ + size -= n; + + va_start(va, fmt); + vsnprintf(buf+n, size, fmt, va); + va_end(va); + + len = strlen(buf); + if (len > MAX_DEBUG_MSG_LEN-1) + len = MAX_DEBUG_MSG_LEN-1; + buf[len] = '\n'; + buf[len+1] = '\0'; + + for (i = 0; i < strlen(buf); i++) { + debug_buf[debug_point++] = buf[i]; + + if (debug_point == debug_size) { + debug_point = 0; + debug_wrap = 1; + } + } + out: + spin_unlock(&debug_lock); +} + +static void debug_setup(int size) +{ + char *b = NULL; + + if (size > PAGE_SIZE) + size = PAGE_SIZE; + if (size) + b = kmalloc(size, GFP_KERNEL); + + spin_lock(&debug_lock); + if (debug_buf) + kfree(debug_buf); + if (!size || !b) + goto out; + debug_size = size; + debug_point = 0; + debug_wrap = 0; + debug_buf = b; + memset(debug_buf, 0, debug_size); + out: + spin_unlock(&debug_lock); +} + +static void debug_init(void) +{ + debug_buf = NULL; + debug_size = 0; + debug_point = 0; + debug_wrap = 0; + spin_lock_init(&debug_lock); + debug_setup(LOCK_DLM_DEBUG_SIZE); +} + +void lock_dlm_debug_dump(void) +{ + int i; + + spin_lock(&debug_lock); + + if (debug_wrap) { + for (i = debug_point; i < debug_size; i++) + printk("%c", debug_buf[i]); + } + for (i = 0; i < debug_point; i++) + printk("%c", debug_buf[i]); + + spin_unlock(&debug_lock); +} + +#ifdef CONFIG_PROC_FS +int lock_dlm_debug_info(char *b, char **start, off_t offset, int length) +{ + int i, n = 0; + + spin_lock(&debug_lock); + + if (debug_wrap) { + for (i = debug_point; i < debug_size; i++) + n += sprintf(b + n, "%c", debug_buf[i]); + } + for (i = 0; i < debug_point; i++) + n += sprintf(b + n, "%c", debug_buf[i]); + + spin_unlock(&debug_lock); + + return n; +} +#endif + +/** + * init_dlm - Initialize the dlm module + * + * Returns: 0 on success, -EXXX on failure + */ + +int __init init_lock_dlm(void) +{ + int error; + + error = lm_register_proto(&lock_dlm_ops); + if (error) { + printk("lock_dlm: can't register protocol: (%d)\n", error); + return error; + } + +#ifdef CONFIG_PROC_FS + debug_proc_entry = create_proc_entry("cluster/lock_dlm_debug", S_IRUGO, + NULL); + if (debug_proc_entry) + debug_proc_entry->get_info = &lock_dlm_debug_info; +#endif + debug_init(); + + printk("Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); + return 0; +} + +/** + * exit_dlm - cleanup the dlm module + * + */ + +void __exit exit_lock_dlm(void) +{ + lm_unregister_proto(&lock_dlm_ops); + +#ifdef CONFIG_PROC_FS + if (debug_proc_entry) + remove_proc_entry("cluster/lock_dlm_debug", NULL); +#endif + debug_setup(0); +} + +module_init(init_lock_dlm); +module_exit(exit_lock_dlm); + +MODULE_DESCRIPTION("GFS DLM Locking Module"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); diff -urN linux-orig/fs/gfs_locking/lock_dlm/mount.c linux-patched/fs/gfs_locking/lock_dlm/mount.c --- linux-orig/fs/gfs_locking/lock_dlm/mount.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/mount.c 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,335 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include + +#include "lock_dlm.h" +#include +#include + +static int init_cman(dlm_t *dlm) +{ + int error = -1; + char *name = NULL; + + if (!dlm->clustername) + goto fail; + + error = kcl_addref_cluster(); + if (error) { + printk("lock_dlm: cannot get cman reference %d\n", error); + goto fail; + } + + error = kcl_cluster_name(&name); + if (error) { + printk("lock_dlm: cannot get cman cluster name %d\n", error); + goto fail_ref; + } + + if (strcmp(name, dlm->clustername)) { + error = -1; + printk("lock_dlm: cman cluster name \"%s\" does not match " + "file system cluster name \"%s\"\n", + name, dlm->clustername); + goto fail_ref; + } + + kfree(name); + return 0; + + fail_ref: + kcl_releaseref_cluster(); + fail: + if (name) + kfree(name); + return error; +} + +static int release_cman(dlm_t *dlm) +{ + return kcl_releaseref_cluster(); +} + +static int init_cluster(dlm_t *dlm, char *table_name) +{ + char *buf, *c, *clname, *fsname; + int len, error = -1; + + /* + * Parse superblock lock table : + */ + + len = strlen(table_name) + 1; + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + goto out; + memset(buf, 0, len); + memcpy(buf, table_name, strlen(table_name)); + + c = strstr(buf, ":"); + if (!c) + goto out_buf; + + *c = '\0'; + clname = buf; + fsname = ++c; + + dlm->max_nodes = LOCK_DLM_MAX_NODES; + + len = strlen(clname) + 1; + c = kmalloc(len, GFP_KERNEL); + if (!c) + goto out_buf; + memset(c, 0, len); + memcpy(c, clname, len-1); + dlm->cnlen = len-1; + dlm->clustername = c; + + len = strlen(fsname) + 1; + c = kmalloc(len, GFP_KERNEL); + if (!c) + goto out_cn; + memset(c, 0, len); + memcpy(c, fsname, len-1); + dlm->fnlen = len-1; + dlm->fsname = c; + + error = init_cman(dlm); + if (error) + goto out_fn; + + kfree(buf); + return 0; + + out_fn: + kfree(dlm->fsname); + out_cn: + kfree(dlm->clustername); + out_buf: + kfree(buf); + out: + printk("lock_dlm: init_cluster error %d\n", error); + return error; +} + +static int release_cluster(dlm_t *dlm) +{ + release_cman(dlm); + kfree(dlm->clustername); + kfree(dlm->fsname); + return 0; +} + +static int init_fence(dlm_t *dlm) +{ + LIST_HEAD(head); + struct kcl_service *s, *safe; + int error, found = FALSE; + + error = kcl_get_services(&head, SERVICE_LEVEL_FENCE); + if (error < 0) + goto out; + + list_for_each_entry_safe(s, safe, &head, list) { + list_del(&s->list); + if (!found && !strcmp(s->name, "default")) + found = TRUE; + kfree(s); + } + + if (found) + return 0; + + error = -1; + out: + printk("lock_dlm: init_fence error %d\n", error); + return error; +} + +static int release_fence(dlm_t *dlm) +{ + return 0; +} + +static int init_gdlm(dlm_t *dlm) +{ + int error; + + error = dlm_new_lockspace(dlm->fsname, dlm->fnlen, &dlm->gdlm_lsp, + DLM_LSF_NOTIMERS); + if (error) + printk("lock_dlm: new lockspace error %d\n", error); + + return error; +} + +static int release_gdlm(dlm_t *dlm) +{ + dlm_release_lockspace(dlm->gdlm_lsp, 1); + return 0; +} + +static dlm_t *init_dlm(lm_callback_t cb, lm_fsdata_t *fsdata) +{ + dlm_t *dlm; + + dlm = kmalloc(sizeof(dlm_t), GFP_KERNEL); + if (!dlm) + return NULL; + + memset(dlm, 0, sizeof(dlm_t)); + + dlm->fscb = cb; + dlm->fsdata = fsdata; + + spin_lock_init(&dlm->async_lock); + + INIT_LIST_HEAD(&dlm->complete); + INIT_LIST_HEAD(&dlm->blocking); + INIT_LIST_HEAD(&dlm->delayed); + INIT_LIST_HEAD(&dlm->submit); + INIT_LIST_HEAD(&dlm->starts); + INIT_LIST_HEAD(&dlm->resources); + + init_waitqueue_head(&dlm->wait); + + INIT_LIST_HEAD(&dlm->mg_nodes); + init_MUTEX(&dlm->mg_nodes_lock); + init_MUTEX(&dlm->res_lock); + + return dlm; +} + +/** + * dlm_mount - mount a dlm lockspace + * @table_name: the name of the space to mount + * @host_data: host specific data + * @cb: the callback + * @lockstruct: the structure of crap to fill in + * + * Returns: 0 on success, -EXXX on failure + */ + +static int lm_dlm_mount(char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t *fsdata, + unsigned int min_lvb_size, + struct lm_lockstruct *lockstruct) +{ + dlm_t *dlm; + int error = -ENOMEM; + + if (min_lvb_size > DLM_LVB_SIZE) + goto out; + + dlm = init_dlm(cb, fsdata); + if (!dlm) + goto out; + + error = init_cluster(dlm, table_name); + if (error) + goto out_free; + + error = init_fence(dlm); + if (error) + goto out_cluster; + + error = init_gdlm(dlm); + if (error) + goto out_fence; + + error = init_async_thread(dlm); + if (error) + goto out_gdlm; + + error = init_mountgroup(dlm); + if (error) + goto out_thread; + + lockstruct->ls_jid = dlm->jid; + lockstruct->ls_first = test_bit(DFL_FIRST_MOUNT, &dlm->flags); + lockstruct->ls_lockspace = dlm; + lockstruct->ls_ops = &lock_dlm_ops; + lockstruct->ls_flags = LM_LSFLAG_ASYNC; + lockstruct->ls_lvb_size = DLM_LVB_SIZE; + return 0; + + out_thread: + release_async_thread(dlm); + + out_gdlm: + release_gdlm(dlm); + + out_fence: + release_fence(dlm); + + out_cluster: + release_cluster(dlm); + + out_free: + kfree(dlm); + + out: + return error; +} + +/** + * dlm_others_may_mount + * @lockspace: the lockspace to unmount + * + */ + +static void lm_dlm_others_may_mount(lm_lockspace_t *lockspace) +{ + /* Do nothing. The first node to join the Mount Group will complete + * before Service Manager allows another node to join. */ +} + +/** + * dlm_unmount - unmount a lock space + * @lockspace: the lockspace to unmount + * + */ + +static void lm_dlm_unmount(lm_lockspace_t *lockspace) +{ + dlm_t *dlm = (dlm_t *) lockspace; + + release_mountgroup(dlm); + release_async_thread(dlm); + release_gdlm(dlm); + release_fence(dlm); + release_cluster(dlm); + kfree(dlm); +} + +struct lm_lockops lock_dlm_ops = { + lm_proto_name:"lock_dlm", + lm_mount:lm_dlm_mount, + lm_others_may_mount:lm_dlm_others_may_mount, + lm_unmount:lm_dlm_unmount, + lm_get_lock:lm_dlm_get_lock, + lm_put_lock:lm_dlm_put_lock, + lm_lock:lm_dlm_lock, + lm_unlock:lm_dlm_unlock, + lm_plock:lm_dlm_plock, + lm_punlock:lm_dlm_punlock, + lm_plock_get:lm_dlm_plock_get, + lm_cancel:lm_dlm_cancel, + lm_hold_lvb:lm_dlm_hold_lvb, + lm_unhold_lvb:lm_dlm_unhold_lvb, + lm_sync_lvb:lm_dlm_sync_lvb, + lm_recovery_done:lm_dlm_recovery_done, + lm_owner:THIS_MODULE, +}; diff -urN linux-orig/fs/gfs_locking/lock_dlm/plock.c linux-patched/fs/gfs_locking/lock_dlm/plock.c --- linux-orig/fs/gfs_locking/lock_dlm/plock.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/plock.c 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,1037 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "lock_dlm.h" + +#define MIN(a,b) ((a) <= (b)) ? (a) : (b) +#define MAX(a,b) ((a) >= (b)) ? (a) : (b) + +#define CREATE 1 +#define NO_CREATE 0 + +#define WAIT 1 +#define NO_WAIT 0 +#define X_WAIT -1 + +#define EX 1 +#define NO_EX 0 +#define SH NO_EX + + +static int check_conflict(dlm_t *dlm, struct dlm_resource *r, + struct lm_lockname *name, unsigned long owner, + uint64_t start, uint64_t end, int ex); + + +static int lock_resource(struct dlm_resource *r) +{ + dlm_lock_t *lp; + struct lm_lockname name; + int error; + + name.ln_type = LM_TYPE_PLOCK_UPDATE; + name.ln_number = r->name.ln_number; + + error = create_lp(r->dlm, &name, &lp); + if (error) + return error; + + set_bit(LFL_IDLOCK, &lp->flags); + lp->req = DLM_LOCK_EX; + do_lock(lp, NULL); + wait_for_completion(&lp->uast_wait); + + error = lp->lksb.sb_status; + if (error) { + kfree(lp); + lp = NULL; + } + + r->update = lp; + return error; +} + +static void unlock_resource(struct dlm_resource *r) +{ + do_unlock(r->update); + kfree(r->update); +} + +static struct dlm_resource *search_resource(dlm_t *dlm, struct lm_lockname *name) +{ + struct dlm_resource *r; + + list_for_each_entry(r, &dlm->resources, list) { + if (lm_name_equal(&r->name, name)) + return r; + } + return NULL; +} + +static int get_resource(dlm_t *dlm, struct lm_lockname *name, int create, + struct dlm_resource **res) +{ + struct dlm_resource *r, *r2; + int error = -ENOMEM; + + down(&dlm->res_lock); + r = search_resource(dlm, name); + if (r) + r->count++; + up(&dlm->res_lock); + + if (r) + goto out; + + if (create == NO_CREATE) { + error = -ENOENT; + goto fail; + } + + r = kmalloc(sizeof(struct dlm_resource), GFP_KERNEL); + if (!r) + goto fail; + + memset(r, 0, sizeof(struct dlm_resource)); + r->dlm = dlm; + r->name = *name; + r->count = 1; + INIT_LIST_HEAD(&r->locks); + INIT_LIST_HEAD(&r->async_locks); + init_MUTEX(&r->sema); + spin_lock_init(&r->async_spin); + + down(&dlm->res_lock); + r2 = search_resource(dlm, name); + if (r2) { + r2->count++; + up(&dlm->res_lock); + kfree(r); + r = r2; + goto out; + } + + list_add_tail(&r->list, &dlm->resources); + up(&dlm->res_lock); + + out: + *res = r; + return 0; + fail: + return error; +} + +static void put_resource(struct dlm_resource *r) +{ + dlm_t *dlm = r->dlm; + + down(&dlm->res_lock); + r->count--; + if (r->count == 0) { + DLM_ASSERT(list_empty(&r->locks), ); + DLM_ASSERT(list_empty(&r->async_locks), ); + list_del(&r->list); + kfree(r); + } + up(&dlm->res_lock); +} + +static inline void hold_resource(struct dlm_resource *r) +{ + down(&r->dlm->res_lock); + r->count++; + up(&r->dlm->res_lock); +} + +static inline int ranges_overlap(uint64_t start1, uint64_t end1, + uint64_t start2, uint64_t end2) +{ + if (end1 < start2 || start1 > end2) + return FALSE; + return TRUE; +} + +/** + * overlap_type - returns a value based on the type of overlap + * @s1 - start of new lock range + * @e1 - end of new lock range + * @s2 - start of existing lock range + * @e2 - end of existing lock range + * + */ + +static int overlap_type(uint64_t s1, uint64_t e1, uint64_t s2, uint64_t e2) +{ + int ret; + + /* + * ---r1--- + * ---r2--- + */ + + if (s1 == s2 && e1 == e2) + ret = 0; + + /* + * --r1-- + * ---r2--- + */ + + else if (s1 == s2 && e1 < e2) + ret = 1; + + /* + * --r1-- + * ---r2--- + */ + + else if (s1 > s2 && e1 == e2) + ret = 1; + + /* + * --r1-- + * ---r2--- + */ + + else if (s1 > s2 && e1 < e2) + ret = 2; + + /* + * ---r1--- or ---r1--- or ---r1--- + * --r2-- --r2-- --r2-- + */ + + else if (s1 <= s2 && e1 >= e2) + ret = 3; + + /* + * ---r1--- + * ---r2--- + */ + + else if (s1 > s2 && e1 > e2) + ret = 4; + + /* + * ---r1--- + * ---r2--- + */ + + else if (s1 < s2 && e1 < e2) + ret = 4; + + else + ret = -1; + + return ret; +} + +/* shrink the range start2:end2 by the partially overlapping start:end */ + +static int shrink_range2(uint64_t *start2, uint64_t *end2, + uint64_t start, uint64_t end) +{ + int error = 0; + + if (*start2 < start) + *end2 = start - 1; + else if (*end2 > end) + *start2 = end + 1; + else + error = -1; + return error; +} + +static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end) +{ + return shrink_range2(&po->start, &po->end, start, end); +} + +static void put_lock(dlm_lock_t *lp) +{ + struct posix_lock *po = lp->posix; + + po->count--; + if (po->count == 0) { + kfree(po); + kfree(lp); + } +} + +static int create_lock(struct dlm_resource *r, unsigned long owner, int ex, + uint64_t start, uint64_t end, dlm_lock_t **lpp) +{ + dlm_lock_t *lp; + struct posix_lock *po; + int error; + + error = create_lp(r->dlm, &r->name, &lp); + if (error) + return error; + + po = kmalloc(sizeof(struct posix_lock), GFP_KERNEL); + if (!po) { + kfree(lp); + return -ENOMEM; + } + memset(po, 0, sizeof(struct posix_lock)); + + lp->posix = po; + po->lp = lp; + po->resource = r; + po->count = 1; + po->start = start; + po->end = end; + po->owner = owner; + po->ex = ex; + list_add_tail(&po->list, &r->locks); + + *lpp = lp; + return 0; +} + +static unsigned int make_flags_posix(dlm_lock_t *lp, int wait) +{ + unsigned int lkf = 0; + + if (wait == NO_WAIT || wait == X_WAIT) + lkf |= DLM_LKF_NOQUEUE; + + if (lp->lksb.sb_lkid != 0) { + lkf |= DLM_LKF_CONVERT; + if (wait == WAIT) + lkf |= DLM_LKF_EXPEDITE; + } + return lkf; +} + +static void do_range_lock(dlm_lock_t *lp) +{ + struct dlm_range range = { lp->posix->start, lp->posix->end }; + do_lock(lp, &range); +} + +static void request_lock(dlm_lock_t *lp, int wait) +{ + log_debug("req %x,%"PRIx64" %s %"PRIx64"-%"PRIx64" %u w %u", + lp->lockname.ln_type, lp->lockname.ln_number, + lp->posix->ex ? "ex" : "sh", lp->posix->start, + lp->posix->end, current->pid, wait); + + set_bit(LFL_IDLOCK, &lp->flags); + lp->req = lp->posix->ex ? DLM_LOCK_EX : DLM_LOCK_PR; + lp->lkf = make_flags_posix(lp, wait); + + do_range_lock(lp); +} + +static void add_async(struct posix_lock *po, struct dlm_resource *r) +{ + spin_lock(&r->async_spin); + list_add_tail(&po->async_list, &r->async_locks); + spin_unlock(&r->async_spin); +} + +static void del_async(struct posix_lock *po, struct dlm_resource *r) +{ + spin_lock(&r->async_spin); + list_del(&po->async_list); + spin_unlock(&r->async_spin); +} + +static int wait_async(dlm_lock_t *lp) +{ + wait_for_completion(&lp->uast_wait); + del_async(lp->posix, lp->posix->resource); + return lp->lksb.sb_status; +} + +static void wait_async_list(struct dlm_resource *r, unsigned long owner) +{ + struct posix_lock *po; + int error, found; + + restart: + found = FALSE; + spin_lock(&r->async_spin); + list_for_each_entry(po, &r->async_locks, async_list) { + if (po->owner != owner) + continue; + found = TRUE; + break; + } + spin_unlock(&r->async_spin); + + if (found) { + DLM_ASSERT(po->lp, ); + error = wait_async(po->lp); + DLM_ASSERT(!error, ); + goto restart; + } +} + +static void update_lock(dlm_lock_t *lp, int wait) +{ + request_lock(lp, wait); + add_async(lp->posix, lp->posix->resource); + + if (wait == NO_WAIT || wait == X_WAIT) { + int error = wait_async(lp); + DLM_ASSERT(!error, printk("error=%d\n", error);); + } +} + +static void add_lock(struct dlm_resource *r, unsigned long owner, int wait, + int ex, uint64_t start, uint64_t end) +{ + dlm_lock_t *lp; + int error; + + error = create_lock(r, owner, ex, start, end, &lp); + DLM_ASSERT(!error, ); + + hold_resource(r); + update_lock(lp, wait); +} + +static int remove_lock(dlm_lock_t *lp) +{ + struct dlm_resource *r = lp->posix->resource; + + log_debug("remove %x,%"PRIx64" %u", + r->name.ln_type, r->name.ln_number, current->pid); + + do_unlock(lp); + put_lock(lp); + put_resource(r); + return 0; +} + +/* RN within RE (and starts or ends on RE boundary) + 1. add new lock for non-overlap area of RE, orig mode + 2. convert RE to RN range and mode */ + +static int lock_case1(struct posix_lock *po, struct dlm_resource *r, + unsigned long owner, int wait, int ex, uint64_t start, + uint64_t end) +{ + uint64_t start2, end2; + + /* non-overlapping area start2:end2 */ + start2 = po->start; + end2 = po->end; + shrink_range2(&start2, &end2, start, end); + + po->start = start; + po->end = end; + po->ex = ex; + + if (ex) { + add_lock(r, owner, X_WAIT, SH, start2, end2); + update_lock(po->lp, wait); + } else { + add_lock(r, owner, WAIT, EX, start2, end2); + update_lock(po->lp, X_WAIT); + } + return 0; +} + +/* RN within RE (RE overlaps RN on both sides) + 1. add new lock for front fragment, orig mode + 2. add new lock for back fragment, orig mode + 3. convert RE to RN range and mode */ + +static int lock_case2(struct posix_lock *po, struct dlm_resource *r, + unsigned long owner, int wait, int ex, uint64_t start, + uint64_t end) +{ + if (ex) { + add_lock(r, owner, X_WAIT, SH, po->start, start-1); + add_lock(r, owner, X_WAIT, SH, end+1, po->end); + + po->start = start; + po->end = end; + po->ex = ex; + + update_lock(po->lp, wait); + } else { + add_lock(r, owner, WAIT, EX, po->start, start-1); + add_lock(r, owner, WAIT, EX, end+1, po->end); + + po->start = start; + po->end = end; + po->ex = ex; + + update_lock(po->lp, X_WAIT); + } + return 0; +} + +/* returns ranges from exist list in order of their start values */ + +static int next_exist(struct list_head *exist, uint64_t *start, uint64_t *end) +{ + struct posix_lock *po; + int first = TRUE, first_call = FALSE; + + if (!*start && !*end) + first_call = TRUE; + + list_for_each_entry(po, exist, list) { + if (!first_call && (po->start <= *start)) + continue; + + if (first) { + *start = po->start; + *end = po->end; + first = FALSE; + } else if (po->start < *start) { + *start = po->start; + *end = po->end; + } + } + + return (first ? -1 : 0); +} + +/* adds locks in gaps between existing locks from start to end */ + +static int fill_gaps(struct list_head *exist, struct dlm_resource *r, + unsigned long owner, int wait, int ex, uint64_t start, + uint64_t end) +{ + uint64_t exist_start = 0, exist_end = 0; + + /* cover gaps in front of each existing lock */ + for (;;) { + if (next_exist(exist, &exist_start, &exist_end)) + break; + if (start < exist_start) + add_lock(r, owner, wait, ex, start, exist_start-1); + start = exist_end + 1; + } + + /* cover gap after last existing lock */ + if (exist_end < end) + add_lock(r, owner, wait, ex, exist_end+1, end); + + return 0; +} + +/* RE within RN (possibly more than one RE lock, all within RN) */ + +static int lock_case3(struct list_head *exist, struct dlm_resource *r, + unsigned long owner, int wait, int ex, uint64_t start, + uint64_t end) +{ + struct posix_lock *po, *safe; + + fill_gaps(exist, r, owner, wait, ex, start, end); + + if (!ex) + wait = X_WAIT; + + /* update existing locks to new mode and put back in locks list */ + list_for_each_entry_safe(po, safe, exist, list) { + list_move_tail(&po->list, &r->locks); + if (po->ex == ex) + continue; + po->ex = ex; + update_lock(po->lp, wait); + } + + return 0; +} + +/* RE within RN (possibly more than one RE lock, one RE partially overlaps RN) + 1. add new locks with new mode for RN gaps not covered by RE's + 2. convert RE locks' mode to new mode + other steps deal with the partial-overlap fragment and depend on whether + the request is sh->ex or ex->sh */ + +static int lock_case4(struct posix_lock *opo, struct list_head *exist, + struct dlm_resource *r, unsigned long owner, int wait, + int ex, uint64_t start, uint64_t end) +{ + struct posix_lock *po, *safe; + uint64_t over_start = 0, over_end = 0; + uint64_t frag_start = 0, frag_end = 0; + + /* fragment (non-overlap) range of opo */ + if (opo->start < start) { + frag_start = opo->start; + frag_end = start - 1; + } else { + frag_start = end + 1; + frag_end = opo->end; + } + + /* overlap range of opo */ + if (opo->start < start) { + over_start = start; + over_end = opo->end; + } else { + over_start = opo->start; + opo->end = end; + } + + /* cut off the non-overlap portion of opo so fill_gaps will work */ + opo->start = over_start; + opo->end = over_end; + + fill_gaps(exist, r, owner, wait, ex, start, end); + + /* update existing locks to new mode and put back in locks list */ + list_for_each_entry_safe(po, safe, exist, list) { + list_move_tail(&po->list, &r->locks); + if (po == opo) + continue; + if (po->ex == ex) + continue; + po->ex = ex; + update_lock(po->lp, wait); + } + + /* deal with the RE that partially overlaps the requested range */ + + if (ex == opo->ex) + return 0; + + if (ex) { + /* 1. add a shared lock in the non-overlap range + 2. convert RE to overlap range and requested mode */ + + add_lock(r, owner, X_WAIT, SH, frag_start, frag_end); + + opo->start = over_start; + opo->end = over_end; + opo->ex = ex; + + update_lock(opo->lp, wait); + } else { + /* 1. request a shared lock in the overlap range + 2. convert RE to non-overlap range + 3. wait for shared lock to complete */ + + add_lock(r, owner, WAIT, SH, over_start, over_end); + + opo->start = frag_start; + opo->end = frag_end; + + update_lock(opo->lp, X_WAIT); + } + + return 0; +} + +/* go through r->locks to find what needs to be done to extend, + shrink, shift, split, etc existing locks (this often involves adding new + locks in addition to modifying existing locks. */ + +static int plock_internal(struct dlm_resource *r, unsigned long owner, + int wait, int ex, uint64_t start, uint64_t end) +{ + LIST_HEAD(exist); + struct posix_lock *po, *safe, *case4_po = NULL; + int error = 0; + + list_for_each_entry_safe(po, safe, &r->locks, list) { + if (po->owner != owner) + continue; + if (!ranges_overlap(po->start, po->end, start, end)) + continue; + + /* existing range (RE) overlaps new range (RN) */ + + switch(overlap_type(start, end, po->start, po->end)) { + + case 0: + if (po->ex == ex) + goto out; + + /* ranges the same - just update the existing lock */ + po->ex = ex; + update_lock(po->lp, wait); + goto out; + + case 1: + if (po->ex == ex) + goto out; + + error = lock_case1(po, r, owner, wait, ex, start, end); + goto out; + + case 2: + if (po->ex == ex) + goto out; + + error = lock_case2(po, r, owner, wait, ex, start, end); + goto out; + + case 3: + list_move_tail(&po->list, &exist); + break; + + case 4: + DLM_ASSERT(!case4_po, ); + case4_po = po; + list_move_tail(&po->list, &exist); + break; + + default: + error = -1; + goto out; + } + } + + if (case4_po) + error = lock_case4(case4_po, &exist, r, owner, wait, ex, + start, end); + else if (!list_empty(&exist)) + error = lock_case3(&exist, r, owner, wait, ex, start, end); + else + add_lock(r, owner, wait, ex, start, end); + + out: + return error; +} + +static int punlock_internal(struct dlm_resource *r, unsigned long owner, + uint64_t start, uint64_t end) +{ + struct posix_lock *po, *safe; + int error = 0; + + list_for_each_entry_safe(po, safe, &r->locks, list) { + if (po->owner != owner) + continue; + if (!ranges_overlap(po->start, po->end, start, end)) + continue; + + /* existing range (RE) overlaps new range (RN) */ + + switch(overlap_type(start, end, po->start, po->end)) { + + case 0: + /* ranges the same - just remove the existing lock */ + + list_del(&po->list); + remove_lock(po->lp); + goto out; + + case 1: + /* RN within RE and starts or ends on RE boundary - + * shrink and update RE */ + + shrink_range(po, start, end); + update_lock(po->lp, X_WAIT); + goto out; + + case 2: + /* RN within RE - shrink and update RE to be front + * fragment, and add a new lock for back fragment */ + + add_lock(r, owner, po->ex ? WAIT : X_WAIT, po->ex, + end+1, po->end); + + po->end = start - 1; + update_lock(po->lp, X_WAIT); + goto out; + + case 3: + /* RE within RN - remove RE, then continue checking + * because RN could cover other locks */ + + list_del(&po->list); + remove_lock(po->lp); + continue; + + case 4: + /* front of RE in RN, or end of RE in RN - shrink and + * update RE, then continue because RN could cover + * other locks */ + + shrink_range(po, start, end); + update_lock(po->lp, X_WAIT); + continue; + + default: + error = -1; + goto out; + } + } + + out: + return error; +} + +int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name, + unsigned long owner, int wait, int ex, uint64_t start, + uint64_t end) +{ + dlm_t *dlm = (dlm_t *) lockspace; + struct dlm_resource *r; + int error; + + log_debug("en plock %u %x,%"PRIx64"", current->pid, + name->ln_type, name->ln_number); + + error = get_resource(dlm, name, CREATE, &r); + if (error) + goto out; + +#if 0 + /* Wait, without holding any locks, until this plock request is not + blocked by plocks of *other* *local* processes. Then, none of the + dlm requests below will wait on a lock from a local process. + + This should not be necessary since we wait for completion after + up(). This means a local process p1 can unlock lkb X while local p2 + is waiting for X (in wait_async_list). */ + error = wait_local(r, owner, wait, ex, start, end); + if (error) + goto out_put; +#endif + + down(&r->sema); + error = lock_resource(r); + if (error) + goto out_up; + + /* check_conflict() checks for conflicts with plocks from other local + processes and other nodes. */ + + if (!wait && check_conflict(dlm, r, name, owner, start, end, ex)) { + error = -1; + unlock_resource(r); + goto out_up; + } + + /* If NO_WAIT all requests should return immediately. + If WAIT all requests go on r->async_locks which we wait on in + wait_async_locks(). This means DLM should not return -EAGAIN and we + should never block waiting for a plock to be released (by a local or + remote process) until we call wait_async_list(). */ + + error = plock_internal(r, owner, wait, ex, start, end); + unlock_resource(r); + + /* wait_async_list() must follow the up() because we must be able + to punlock a range on this resource while there's a blocked plock + request to prevent deadlock between nodes (and processes). */ + + out_up: + up(&r->sema); + wait_async_list(r, owner); + put_resource(r); + out: + log_debug("ex plock %u error %d", current->pid, error); + return error; +} + +int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name, + unsigned long owner, uint64_t start, uint64_t end) +{ + dlm_t *dlm = (dlm_t *) lockspace; + struct dlm_resource *r; + int error; + + log_debug("en punlock %u %x,%"PRIx64"", current->pid, + name->ln_type, name->ln_number); + + error = get_resource(dlm, name, NO_CREATE, &r); + if (error) + goto out; + + down(&r->sema); + error = lock_resource(r); + if (error) + goto out_up; + + error = punlock_internal(r, owner, start, end); + unlock_resource(r); + + out_up: + up(&r->sema); + wait_async_list(r, owner); + put_resource(r); + out: + log_debug("ex punlock %u error %d", current->pid, error); + return error; +} + +static void query_ast(void *astargs) +{ + dlm_lock_t *lp = (dlm_lock_t *) astargs;; + complete(&lp->uast_wait); +} + +static int get_conflict_global(dlm_t *dlm, struct lm_lockname *name, + unsigned long owner, uint64_t *start, + uint64_t *end, int *ex, unsigned long *rowner) +{ + dlm_lock_t *lp; + struct dlm_queryinfo qinfo; + struct dlm_lockinfo *lki; + int query = 0, s, error; + + /* acquire a null lock on which base the query */ + + error = create_lp(dlm, name, &lp); + if (error) + goto ret; + + lp->req = DLM_LOCK_NL; + set_bit(LFL_IDLOCK, &lp->flags); + do_lock(lp, NULL); + wait_for_completion(&lp->uast_wait); + + /* do query, repeating if insufficient space */ + + query = DLM_LOCK_THIS | DLM_QUERY_QUEUE_GRANTED | + DLM_QUERY_LOCKS_HIGHER; + + for (s = 16; s < dlm->max_nodes + 1; s += 16) { + + lki = kmalloc(s * sizeof(struct dlm_lockinfo), GFP_KERNEL); + if (!lki) { + error = -ENOMEM; + goto out; + } + memset(lki, 0, s * sizeof(struct dlm_lockinfo)); + memset(&qinfo, 0, sizeof(qinfo)); + qinfo.gqi_locksize = s; + qinfo.gqi_lockinfo = lki; + + init_completion(&lp->uast_wait); + error = dlm_query(dlm->gdlm_lsp, &lp->lksb, query, &qinfo, + query_ast, (void *) lp); + if (error) { + kfree(lki); + goto out; + } + wait_for_completion(&lp->uast_wait); + error = lp->lksb.sb_status; + + if (!error) + break; + kfree(lki); + if (error != -E2BIG) + goto out; + } + + /* check query results for blocking locks */ + + for (s = 0; s < qinfo.gqi_lockcount; s++) { + + lki = &qinfo.gqi_lockinfo[s]; + + if (!ranges_overlap(*start, *end, lki->lki_grrange.ra_start, + lki->lki_grrange.ra_end)) + continue; + + if (lki->lki_node == dlm->our_nodeid) + continue; + + if (lki->lki_grmode == DLM_LOCK_EX || *ex) { + *start = lki->lki_grrange.ra_start; + *end = lki->lki_grrange.ra_end; + *ex = (lki->lki_grmode == DLM_LOCK_EX) ? 1 : 0; + *rowner = lki->lki_node; + error = -EAGAIN; + break; + } + } + + kfree(qinfo.gqi_lockinfo); + + out: + do_unlock(lp); + kfree(lp); + ret: + return error; +} + +static int get_conflict_local(dlm_t *dlm, struct dlm_resource *r, + struct lm_lockname *name, unsigned long owner, + uint64_t *start, uint64_t *end, int *ex, + unsigned long *rowner) +{ + struct posix_lock *po; + int found = FALSE; + + list_for_each_entry(po, &r->locks, list) { + if (po->owner == owner) + continue; + if (!ranges_overlap(po->start, po->end, *start, *end)) + continue; + + if (*ex || po->ex) { + *start = po->start; + *end = po->end; + *ex = po->ex; + *rowner = po->owner; + found = TRUE; + break; + } + } + return found; +} + +int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name, + unsigned long owner, uint64_t *start, uint64_t *end, + int *ex, unsigned long *rowner) +{ + dlm_t *dlm = (dlm_t *) lockspace; + struct dlm_resource *r; + int error, found; + + error = get_resource(dlm, name, NO_CREATE, &r); + if (!error) { + down(&r->sema); + found = get_conflict_local(dlm, r, name, owner, start, end, ex, + rowner); + up(&r->sema); + put_resource(r); + if (found) + goto out; + } + + error = get_conflict_global(dlm, name, owner, start, end, ex, rowner); + out: + return error; +} + +static int check_conflict(dlm_t *dlm, struct dlm_resource *r, + struct lm_lockname *name, unsigned long owner, + uint64_t start, uint64_t end, int ex) +{ + uint64_t get_start = start, get_end = end; + unsigned long get_owner = 0; + int get_ex = ex, error; + + error = get_conflict_local(dlm, r, name, owner, + &get_start, &get_end, &get_ex, &get_owner); + if (error) + goto out; + + error = get_conflict_global(dlm, name, owner, + &get_start, &get_end, &get_ex, &get_owner); + out: + log_debug("check_conflict %d %"PRIx64"-%"PRIx64" %"PRIx64"-%"PRIx64" " + "ex %d %d own %lu %lu pid %u", error, start, end, + get_start, get_end, ex, get_ex, owner, get_owner, + current->pid); + return error; +} + diff -urN linux-orig/fs/gfs_locking/lock_dlm/thread.c linux-patched/fs/gfs_locking/lock_dlm/thread.c --- linux-orig/fs/gfs_locking/lock_dlm/thread.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_dlm/thread.c 2004-06-16 12:03:17.967822065 -0500 @@ -0,0 +1,388 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "lock_dlm.h" + +/* + * Run in dlm_async thread + */ + +/** + * queue_submit - add lock request to queue for dlm_async thread + * @lp: DLM lock + * + * A lock placed on this queue is re-submitted to DLM as soon as + * dlm_async thread gets to it. + */ + +static void queue_submit(dlm_lock_t *lp) +{ + dlm_t *dlm = lp->dlm; + + spin_lock(&dlm->async_lock); + list_add_tail(&lp->slist, &dlm->submit); + set_bit(LFL_SLIST, &lp->flags); + spin_unlock(&dlm->async_lock); + wake_up(&dlm->wait); +} + +/** + * process_blocking - processing of blocking callback + * @lp: DLM lock + * + */ + +static void process_blocking(dlm_lock_t *lp, int bast_mode) +{ + dlm_t *dlm = lp->dlm; + unsigned int cb; + + switch (make_lmstate(bast_mode)) { + case LM_ST_EXCLUSIVE: + cb = LM_CB_NEED_E; + break; + case LM_ST_DEFERRED: + cb = LM_CB_NEED_D; + break; + case LM_ST_SHARED: + cb = LM_CB_NEED_S; + break; + default: + DLM_ASSERT(0, printk("unknown bast mode %u\n", lp->bast_mode);); + } + + dlm->fscb(dlm->fsdata, cb, &lp->lockname); +} + +/** + * process_complete - processing of completion callback for a lock request + * @lp: DLM lock + * + */ + +static void process_complete(dlm_lock_t *lp) +{ + dlm_t *dlm = lp->dlm; + struct lm_async_cb acb; + int16_t prev_mode = lp->cur; + + memset(&acb, 0, sizeof(acb)); + + /* + * This is an AST for an unlock. + */ + + if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { + + /* FIXME: Add an assertion to catch NOFAIL promotions from + * non-NL modes? */ + + if (lp->lksb.sb_status == -DLM_ECANCEL) { + + /* lp->cur remains the same, is there anything to clear + * or reset to put this lp into an "ordinary" state? */ + + printk("lock_dlm: -DLM_ECANCEL num=%x,%"PRIx64"\n", + lp->lockname.ln_type, lp->lockname.ln_number); + } else { + DLM_ASSERT(lp->lksb.sb_status == -DLM_EUNLOCK, + printk("num=%x,%"PRIx64" status=%d\n", + lp->lockname.ln_type, + lp->lockname.ln_number, + lp->lksb.sb_status);); + lp->cur = DLM_LOCK_IV; + } + + complete(&lp->uast_wait); + return; + } + + /* + * A canceled lock request. The lock was just taken off the delayed + * list and was never even submitted to dlm. + */ + + if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { + lp->req = lp->cur; + acb.lc_ret |= LM_OUT_CANCELED; + goto out; + } + + /* + * An error occured. + */ + + if (lp->lksb.sb_status) { + lp->req = lp->cur; + if (lp->cur == DLM_LOCK_IV) + lp->lksb.sb_lkid = 0; + + if ((lp->lksb.sb_status == -EAGAIN) && + (lp->lkf & DLM_LKF_NOQUEUE)) { + /* a "normal" error */ + } else + printk("lock_dlm: process_complete error id=%x " + "status=%d\n", lp->lksb.sb_lkid, + lp->lksb.sb_status); + goto out; + } + + /* + * This is an AST for an EX->EX conversion for sync_lvb from GFS. + */ + + if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { + complete(&lp->uast_wait); + return; + } + + /* + * A lock has been demoted to NL because it initially completed during + * BLOCK_LOCKS. Now it must be requested in the originally requested + * mode. + */ + + if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { + + DLM_ASSERT(lp->req == DLM_LOCK_NL,); + DLM_ASSERT(lp->prev_req > DLM_LOCK_NL,); + + lp->cur = DLM_LOCK_NL; + lp->req = lp->prev_req; + lp->prev_req = DLM_LOCK_IV; + lp->lkf &= ~DLM_LKF_CONVDEADLK; + lp->lkf |= DLM_LKF_QUECVT; + + set_bit(LFL_NOCACHE, &lp->flags); + + if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) && + !test_bit(LFL_NOBLOCK, &lp->flags)) + queue_delayed(lp, QUEUE_LOCKS_BLOCKED); + else + queue_submit(lp); + return; + } + + /* + * A request is granted during dlm recovery. It may be granted + * because the locks of a failed node were cleared. In that case, + * there may be inconsistent data beneath this lock and we must wait + * for recovery to complete to use it. When gfs recovery is done this + * granted lock will be converted to NL and then reacquired in this + * granted state. + */ + + if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) && + !test_bit(LFL_NOBLOCK, &lp->flags) && + lp->req != DLM_LOCK_NL) { + + lp->cur = lp->req; + lp->prev_req = lp->req; + lp->req = DLM_LOCK_NL; + lp->lkf |= DLM_LKF_CONVERT; + lp->lkf &= ~DLM_LKF_CONVDEADLK; + lp->lkf &= ~DLM_LKF_QUECVT; + + set_bit(LFL_REREQUEST, &lp->flags); + queue_submit(lp); + return; + } + + /* + * DLM demoted the lock to NL before it was granted so GFS must be + * told it cannot cache data for this lock. + */ + + if (lp->lksb.sb_flags == DLM_SBF_DEMOTED) + set_bit(LFL_NOCACHE, &lp->flags); + + out: + + /* + * This is an internal lock_dlm lock used for managing JIDs. + */ + + if (test_bit(LFL_IDLOCK, &lp->flags)) { + clear_bit(LFL_NOBLOCK, &lp->flags); + lp->cur = lp->req; + complete(&lp->uast_wait); + return; + } + + /* + * Normal completion of a lock request. Tell GFS it now has the lock. + */ + + clear_bit(LFL_NOBLOCK, &lp->flags); + lp->cur = lp->req; + + acb.lc_name = lp->lockname; + acb.lc_ret |= make_lmstate(lp->cur); + + if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) && + (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL)) + acb.lc_ret |= LM_OUT_CACHEABLE; + + dlm->fscb(dlm->fsdata, LM_CB_ASYNC, &acb); +} + +/** + * no_work - determine if there's work for the dlm_async thread + * @dlm: + * + * Returns: 1 if no work, 0 otherwise + */ + +static __inline__ int no_work(dlm_t * dlm) +{ + int ret; + + spin_lock(&dlm->async_lock); + + ret = list_empty(&dlm->complete) && + list_empty(&dlm->blocking) && + list_empty(&dlm->submit) && + list_empty(&dlm->starts) && !test_bit(DFL_MG_FINISH, &dlm->flags); + + spin_unlock(&dlm->async_lock); + + return ret; +} + +/** + * dlm_async - thread for a variety of asynchronous processing + * @data: + * + * Returns: 0 on success, -EXXX on failure + */ + +static int dlm_async(void *data) +{ + dlm_t *dlm = (dlm_t *) data; + dlm_lock_t *lp = NULL; + dlm_start_t *ds = NULL; + uint8_t complete, blocking, submit, start, finish; + DECLARE_WAITQUEUE(wait, current); + + daemonize("lock_dlm"); + atomic_inc(&dlm->threads); + + do { + current->state = TASK_INTERRUPTIBLE; + add_wait_queue(&dlm->wait, &wait); + if (no_work(dlm)) + schedule(); + remove_wait_queue(&dlm->wait, &wait); + current->state = TASK_RUNNING; + + complete = blocking = submit = start = finish = 0; + + spin_lock(&dlm->async_lock); + + if (!list_empty(&dlm->complete)) { + lp = list_entry(dlm->complete.next, dlm_lock_t, clist); + list_del(&lp->clist); + clear_bit(LFL_CLIST, &lp->flags); + complete = 1; + } else if (!list_empty(&dlm->blocking)) { + lp = list_entry(dlm->blocking.next, dlm_lock_t, blist); + list_del(&lp->blist); + clear_bit(LFL_BLIST, &lp->flags); + blocking = lp->bast_mode; + lp->bast_mode = 0; + } else if (!list_empty(&dlm->submit)) { + lp = list_entry(dlm->submit.next, dlm_lock_t, slist); + list_del(&lp->slist); + clear_bit(LFL_SLIST, &lp->flags); + submit = 1; + } else if (!list_empty(&dlm->starts)) { + ds = list_entry(dlm->starts.next, dlm_start_t, list); + list_del(&ds->list); + start = 1; + } else if (test_and_clear_bit(DFL_MG_FINISH, &dlm->flags)) { + finish = 1; + } + + spin_unlock(&dlm->async_lock); + + if (complete) + process_complete(lp); + + else if (blocking) + process_blocking(lp, blocking); + + else if (submit) + process_submit(lp); + + else if (start) + process_start(dlm, ds); + + else if (finish) + process_finish(dlm); + + schedule(); + } + while (!test_bit(DFL_THREAD_STOP, &dlm->flags)); + + atomic_dec(&dlm->threads); + return 0; +} + +/** + * init_async_thread + * @dlm: + * + * Returns: 0 on success, -EXXX on failure + */ + +int init_async_thread(dlm_t * dlm) +{ + int error; + + clear_bit(DFL_THREAD_STOP, &dlm->flags); + atomic_set(&dlm->threads, 0); + + error = kernel_thread(dlm_async, dlm, 0); + if (error < 0) + goto out; + + error = kernel_thread(dlm_async, dlm, 0); + if (error < 0) { + release_async_thread(dlm); + goto out; + } + + while (atomic_read(&dlm->threads) != 2) + schedule(); + error = 0; + + out: + if (error) + printk("lock_dlm: can't start async thread %d\n", error); + return error; +} + +/** + * release_async_thread + * @dlm: + * + */ + +void release_async_thread(dlm_t * dlm) +{ + set_bit(DFL_THREAD_STOP, &dlm->flags); + while (atomic_read(&dlm->threads)) { + wake_up(&dlm->wait); + schedule(); + } +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h --- linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h 2004-06-16 12:03:21.956895230 -0500 @@ -0,0 +1,404 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ +#ifndef __gio_wiretypes_h__ +#define __gio_wiretypes_h__ + +/* an attempt to do something about tracking changes to the protocol over + * the wires. + * If I was really cute, this would be effectivily a checksum of this file. + */ +#define GIO_WIREPROT_VERS (0x67000010) + +/*****************Error codes. + * everyone uses these same error codes. + */ +#define gio_Err_Ok (0) +#define gio_Err_BadLogin (1001) +#define gio_Err_BadCluster (1003) +#define gio_Err_BadConfig (1004) +#define gio_Err_BadGeneration (1005) +#define gio_Err_BadWireProto (1019) + +#define gio_Err_NotAllowed (1006) +#define gio_Err_Unknown_Cs (1007) +#define gio_Err_BadStateChg (1008) +#define gio_Err_MemoryIssues (1009) + +#define gio_Err_PushQu (1010) /* client should never see this one */ +#define gio_Err_TryFailed (1011) +#define gio_Err_AlreadyPend (1013) +#define gio_Err_Canceled (1015) + +#define gio_Err_NoSuchFS (1016) +#define gio_Err_NoSuchJID (1017) +#define gio_Err_NoSuchName (1018) + +/* next free error code: 1002 1012 1014 1020 */ + +/* + * Error: just sort of a generic error code thing. + * uint32: gERR + * uint32: opcode that this is in reply to. (can be zeros) + * uint32: error code + */ +#define gulm_err_reply (0x67455252) /* gERR */ + +#define gulm_nop (0x674e4f50) /* gNOP */ + +/********************* Core *****************/ +/* + * login request + * uint32: gCL0 + * uint32: proto version + * string: cluster ID + * string: My Name + * uint64: generation number + * uint32: config CRC + * uint32: rank + * login reply + * uint32: gCL1 + * uint64: generation number + * uint32: error code + * uint32: rank + * uint8: ama + * If I am the Master or Arbitrating and there are no errors, A + * serialization of the current nodelist follows. And a client or slave + * is connecting (not resources). + * + * logout request: + * uint32: gCL2 + * string: node name + * uint8: S/P/A/M/R + * logout reply: Don't seem to use this.... + * uint32: gCL3 + * uint32: error code + * + * resource login request: + * uint32: gCL4 + * uint32: proto version + * string: cluster ID + * string: resource name + * uint32: options + * login reply (gCL1) is sent in return. + * + * beat req + * uint32: gCB0 + * string: My Name + * beat rpl + * uint32: gCB1 + * uint32: error code + * + * Membership Request + * uint32: gCMA + * string: node name + * + * Membership update + * uint32: gCMU + * string: node name + * IPv6: IP + * uint8: Current State + * + * Membership list request info. + * uint32: gCMl + * + * Membership list info. + * uint32: gCML + * list_start_marker + * string: node name + * IPv6: IP + * uint8: state + * uint8: laststate + * uint8: mode (S/P/A/M/C) + * uint32: missed beats + * uint64: last beat + * uint64: delay avg + * uint64: max delay + * list_stop_marker + * + * Request Resource info + * uint32: gCR0 + * + * Resource list info + * uint32: gCR1 + * list_start_marker + * string: name + * list_stop_marker + * + * Force node into Expired: + * uint32: gCFE + * string: node name + * + * Core state request: + * uint32: gCSR + * + * Core state changes: + * uint32: gCSC + * uint8: state (slave, pending, arbitrating, master) + * If state == Slave, then the next two will follow. + * IPv6: MasterIP + * string: MasterName + * + * Core shutdown req: + * uint32: gCSD + * + * Switch core from current state into Pending: + * uint32: gCSP + * + */ +#define gulm_core_login_req (0x67434c00) /* gCL0 */ +#define gulm_core_login_rpl (0x67434c01) /* gCL1 */ +#define gulm_core_logout_req (0x67434c02) /* gCL2 */ +#define gulm_core_logout_rpl (0x67434c03) /* gCL3 */ +#define gulm_core_reslgn_req (0x67434c04) /* gCL4 */ +#define gulm_core_beat_req (0x67434200) /* gCB0 */ +#define gulm_core_beat_rpl (0x67434201) /* gCB1 */ +#define gulm_core_mbr_req (0x67434d41) /* gCMA */ +#define gulm_core_mbr_updt (0x67434d55) /* gCMU */ +#define gulm_core_mbr_lstreq (0x67434d6c) /* gCMl */ +#define gulm_core_mbr_lstrpl (0x67434d4c) /* gCML */ +#define gulm_core_mbr_force (0x67434645) /* gCFE */ +#define gulm_core_res_req (0x67435200) /* gCR0 */ +#define gulm_core_res_list (0x67435201) /* gCR1 */ +#define gulm_core_state_req (0x67435352) /* gCSR */ +#define gulm_core_state_chgs (0x67435343) /* gCSC */ +#define gulm_core_shutdown (0x67435344) /* gCSD */ +#define gulm_core_forcepend (0x67435350) /* gCSP */ + +/* in the st field */ +#define gio_Mbr_Logged_in (0x05) +#define gio_Mbr_Logged_out (0x06) +#define gio_Mbr_Expired (0x07) +#define gio_Mbr_Killed (0x08) +#define gio_Mbr_OM_lgin (0x09) + +/* in the ama field */ +#define gio_Mbr_ama_Slave (0x01) +#define gio_Mbr_ama_Master (0x02) +#define gio_Mbr_ama_Pending (0x03) +#define gio_Mbr_ama_Arbitrating (0x04) +#define gio_Mbr_ama_Resource (0x05) +#define gio_Mbr_ama_Client (0x06) +/* the Client entery is ONLY for mode tracking. + * nodelist reply is the only place it is used. + */ + +/* options that affect behavors on services. (resources) */ +#define gulm_svc_opt_important (0x00000001) + +/********************* Info Traffic ***************** + * + * Note that for many of these, they can be sent to all of the servers and + * will get sane replies. Some of these can only be sent to specific + * servers. + * + * stats req: + * uint32: gIS0 + * stats rpl: + * uint32: gIS1 + * list start: + * string: key + * string: value + * list stop: + * Notes: + * The stats reply is a set of string pairs. This way the server can send + * whatever things it wants, and the same client code will work for + * anything. + * + * set verbosity: + * uint32: gIV0 + * string: verb flags (with -/+) to [un]set + * Note: + * We don't bother with a reply for this. If the server got it, it works. + * If it didn't, it cannot send an error back anyways. + * + * close socket: + * uint32: gSC0 + * Note: + * Tells the server to close this connection cleanly. We're done with + * it. This is *not* the same as loging out. You must login before you + * can logout. And many commands sent from gulm_tool happen without + * logging in. These commands would be useful for clients in many cases, + * so I don't want to put a close at the end of them, but if I don't, + * there will be error messages printed on the console when gulm_tool + * calls them. + * So we need a way to close a connection cleanly that has not been + * logged in. + * + * request slave list: + * uint32: gIL0 + * slave list replay: + * uint32: gIL1 + * list start: + * string: name + * uint32: poller idx + * list stop: + */ +#define gulm_info_stats_req (0x67495300) /* gIS0 */ +#define gulm_info_stats_rpl (0x67495301) /* gIS1 */ +#define gulm_info_set_verbosity (0x67495600) /* gIV0 */ +#define gulm_socket_close (0x67534300) /* gSC0 */ +#define gulm_info_slave_list_req (0x67494c00) /* gIL0 */ +#define gulm_info_slave_list_rpl (0x67494c01) /* gIL1 */ + +/********************* Lock Traffic ***************** + * All lock traffic. + * + * login req: + * uint32: gLL0 + * uint32: proto version + * string: node name + * uint8: Client/Slave + * login rpl: + * uint32: gLL1 + * uint32: error code + * uint8: Slave/Master + * xdr of current lock state if no errors and master sending reply + * and you're a slave. + * + * logout req: + * uint32: gLL2 + * logout rpl: + * uint32: gLL3 + * + * select lockspace: + * uint32: gLS0 + * raw: usually just four bytes for lockspace name. + * but can be most anything. + * + * lock req: + * uint32: gLR0 + * raw: key + * uint8: state + * uint32: flags + * raw: lvb -- Only exists if hasLVB flag is true. + * lock rpl: + * uint32: gLR1 + * raw: key + * uint8: state + * uint32: flags + * uint32: error code + * raw: lvb -- Only exists if hasLVB flag is true. + * + * lock state update: + * uint32: gLRU + * string: node name + * raw: key + * uint8: state + * uint32: flags + * raw: lvb -- Only exists if hasLVB flag is true. + * + * Action req: + * uint32: gLA0 + * raw: key + * uint8: action + * raw: lvb -- Only exists if action is SyncLVB + * Action Rpl: + * uint32: gLA1 + * raw: key + * uint8: action + * uint32: error code + * + * Action update: + * uint32: gLAU + * string: node name + * raw: key + * uint8: action + * raw: lvb -- Only exists if action is SyncLVB + * + * Slave Update Rply: -- for both actions and requests. + * uint32: gLUR + * raw: key + * + * Drop lock Callback: + * uint32: gLC0 + * raw: key + * uint8: state + * + * Drop all locks callback: This is the highwater locks thing + * uint32: gLC2 + * + * Drop expired locks: + * uint32: gLEO + * string: node name if NULL, then drap all exp for mask. + * raw: keymask if keymask & key == key, then dropexp on this lock. + * + * Lock list req: + * uint32: gLD0 + * Lock list rpl: + * uint32: gLD1 + * list start mark + * uint8: key length + * raw: key + * uint8: state + * uint8: lvb length + * if lvb length > 0, raw: LVB + * uint32: Holder count + * list start mark + * string: holders + * list stop mark + * uint32: LVB holder count + * list start mark + * string: LVB Holders + * list stop mark + * uint32: Expired holder count + * list start mark + * string: ExpHolders + * list stop mark + * list stop mark + * + */ +#define gulm_lock_login_req (0x674C4C00) /* gLL0 */ +#define gulm_lock_login_rpl (0x674C4C01) /* gLL1 */ +#define gulm_lock_logout_req (0x674C4C02) /* gLL2 */ +#define gulm_lock_logout_rpl (0x674C4C03) /* gLL3 */ +#define gulm_lock_sel_lckspc (0x674C5300) /* gLS0 */ +#define gulm_lock_state_req (0x674C5200) /* gLR0 */ +#define gulm_lock_state_rpl (0x674C5201) /* gLR1 */ +#define gulm_lock_state_updt (0x674C5255) /* gLRU */ +#define gulm_lock_action_req (0x674C4100) /* gLA0 */ +#define gulm_lock_action_rpl (0x674C4101) /* gLA1 */ +#define gulm_lock_action_updt (0x674C4155) /* gLAU */ +#define gulm_lock_update_rpl (0x674c5552) /* gLUR */ +#define gulm_lock_cb_state (0x674C4300) /* gLC0 */ +#define gulm_lock_cb_dropall (0x674C4302) /* gLC2 */ +#define gulm_lock_drop_exp (0x674C454F) /* gLEO */ +#define gulm_lock_dump_req (0x674c4400) /* gLD0 */ +#define gulm_lock_dump_rpl (0x674c4401) /* gLD1 */ +#define gulm_lock_rerunqueues (0x674c5152) /* gLQR */ + +/* marks for the login */ +#define gio_lck_st_Slave (0x00) +#define gio_lck_st_Client (0x01) + +/* state change requests */ +#define gio_lck_st_Unlock (0x00) +#define gio_lck_st_Exclusive (0x01) +#define gio_lck_st_Deferred (0x02) +#define gio_lck_st_Shared (0x03) +/* actions */ +#define gio_lck_st_Cancel (0x09) +#define gio_lck_st_HoldLVB (0x0b) +#define gio_lck_st_UnHoldLVB (0x0c) +#define gio_lck_st_SyncLVB (0x0d) + +/* flags */ +#define gio_lck_fg_Do_CB (0x00000001) +#define gio_lck_fg_Try (0x00000002) +#define gio_lck_fg_Any (0x00000004) +#define gio_lck_fg_NoExp (0x00000008) +#define gio_lck_fg_hasLVB (0x00000010) +#define gio_lck_fg_Cachable (0x00000020) +#define gio_lck_fg_Piority (0x00000040) + +#endif /*__gio_wiretypes_h__*/ +/* vim: set ai cin et sw=3 ts=3 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm.h linux-patched/fs/gfs_locking/lock_gulm/gulm.h --- linux-orig/fs/gfs_locking/lock_gulm/gulm.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm.h 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,288 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef GULM_DOT_H +#define GULM_DOT_H + +#define GULM_RELEASE_NAME "v6.0.0" + +#ifdef MODVERSIONS +#include +#endif /* MODVERSIONS */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +#if (BITS_PER_LONG == 64) +#define PRIu64 "lu" +#define PRId64 "ld" +#define PRIo64 "lo" +#define PRIx64 "lx" +#define PRIX64 "lX" +#define SCNu64 "lu" +#define SCNd64 "ld" +#define SCNo64 "lo" +#define SCNx64 "lx" +#define SCNX64 "lX" +#else +#define PRIu64 "Lu" +#define PRId64 "Ld" +#define PRIo64 "Lo" +#define PRIx64 "Lx" +#define PRIX64 "LX" +#define SCNu64 "Lu" +#define SCNd64 "Ld" +#define SCNo64 "Lo" +#define SCNx64 "Lx" +#define SCNX64 "LX" +#endif + +#include + +#undef MAX +#define MAX(a,b) ((a>b)?a:b) + +#undef MIN +#define MIN(a,b) ((a + +#include "gulm_prints.h" + +#include "libgulm.h" + +#include "handler.h" + +/* Some fixed length constants. + * Some of these should be made dynamic in size in the future. + */ +#define GIO_KEY_SIZE (46) +#define GIO_LVB_SIZE (32) +#define GIO_NAME_SIZE (32) +#define GIO_NAME_LEN (GIO_NAME_SIZE-1) + +/* What we know about this filesytem */ +struct gulm_fs_s { + struct list_head fs_list; + char fs_name[GIO_NAME_SIZE]; /* lock table name */ + + lm_callback_t cb; /* file system callback function */ + lm_fsdata_t *fsdata; /* private file system data */ + + callback_qu_t cq; + + uint32_t fsJID; + uint32_t lvb_size; + + struct semaphore get_lock; /* I am not 100% sure this is needed. + * But it only hurts performance, + * not correctness if it is + * useless. Sometime post52, need + * to investigate. + */ + + /* Stuff for the first mounter lock and state */ + int firstmounting; + /* the recovery done func needs to behave slightly differnt when we are + * the first node in an fs. + */ + + void *mountlock; /* this lock holds the Firstmounter state of the FS */ + /* this is because all lock traffic is async, and really at this point + * in time we want a sync behavor, so I'm left with doing something to + * achive that. + * + * this works, but it is crufty, but I don't want to build a huge + * queuing system for one lock that we touch twice at the beginning and + * once on the end. + * + * I should change the firstmounter lock to work like the journal locks + * and the node locks do. Things are a lot cleaner now with the libgulm + * interface than before. (when the firstmounter lock code was written) + */ + struct completion sleep; + + /* Stuff for JID mapping locks */ + uint32_t JIDcount; /* how many JID locks are there. */ +}; +typedef struct gulm_fs_s gulm_fs_t; + +/* What we know about each locktable. + * only one now-a-days. (the LTPX) + * */ +typedef struct lock_table_s { + uint32_t magic_one; + + int running; + struct task_struct *recver_task; + struct completion startup; + struct semaphore sender; + + struct task_struct *sender_task; + wait_queue_head_t send_wchan; + spinlock_t queue_sender; + struct list_head to_be_sent; + + int hashbuckets; + spinlock_t *hshlk; + struct list_head *lkhsh; + + /* stats + * it may be wise to make some of these into atomic numbers. + * or something. or not. + * */ + uint32_t locks_total; + uint32_t locks_unl; + uint32_t locks_exl; + uint32_t locks_shd; + uint32_t locks_dfr; + uint32_t locks_lvbs; + atomic_t locks_pending; + /* cannot count expired here. clients don't know this */ + + uint32_t lops; /* just incr on each op */ + +} lock_table_t; + +typedef struct gulm_cm_s { + uint8_t myName[64]; + uint8_t clusterID[256]; /* doesn't need to be 256. */ + uint8_t loaded; /* True|False whether we grabbed the config data */ + uint8_t starts; + + uint32_t handler_threads; /* howmany to have */ + uint32_t verbosity; + + uint64_t GenerationID; + + lock_table_t ltpx; + + gulm_interface_p hookup; + +} gulm_cm_t; + +/* things about each lock. */ +typedef struct gulm_lock_s { + struct list_head gl_list; + atomic_t count; + + uint32_t magic_one; + gulm_fs_t *fs; /* which filesystem we belong to. */ + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen; + uint8_t last_suc_state; /* last state we succesfully got. */ + char *lvb; + + /* this is true when there is a lock request sent out for this lock. + * All it really means is that if we've lost the master, and reconnect + * to another, this lock needs to have it's request resent. + * + * This now has two stages. Since a lock could be pending, but still in + * the send queue. So we don't want to resend requests that haven't + * been sent yet. + * + * we don't handle the master losses here any more. LTPX does that for + * us. Should consider removing the dupicated code then. + */ + int actuallypending; /* may need to be atomic */ + int in_to_be_sent; + + enum { glck_nothing, glck_action, glck_state } req_type; + /* these three for the lock req. We save them here so we can rebuild + * the lock request if there was a server failover. (?still needed?) + */ + unsigned int cur_state; + unsigned int req_state; + unsigned int flags; + + /* these three for actions. First is the action, next is result, last is + * what threads wait on for the reply. + */ + int action; + int result; /* ok, both are using this. */ + struct completion actsleep; + +} gulm_lock_t; + +/*****************************************************************************/ +/* cross pollenate prototypes */ + +/* from gulm_lt.c */ +void lt_logout (void); +int lt_login (void); +int get_mount_lock (gulm_fs_t * fs, int *first); +int downgrade_mount_lock (gulm_fs_t * fs); +int drop_mount_lock (gulm_fs_t * fs); +int send_drop_all_exp (lock_table_t * lt); +int send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name); + +/*from gulm_core.c */ +void cm_logout (void); +int cm_login (void); +void delete_ipnames (struct list_head *namelist); + +/* from gulm_fs.c */ +void init_gulm_fs (void); +void request_journal_replay (uint8_t * name); +void passup_droplocks (void); +gulm_fs_t *get_fs_by_name (uint8_t * name); +void dump_internal_lists (void); +void gulm_recovery_done (lm_lockspace_t * lockspace, + unsigned int jid, unsigned int message); +void gulm_unmount (lm_lockspace_t * lockspace); +void gulm_others_may_mount (lm_lockspace_t * lockspace); +int gulm_mount (char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t * fsdata, + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct); + +extern struct lm_lockops gulm_ops; + +#endif /* GULM_DOT_H */ +/* vim: set ai cin noet sw=8 ts=8 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c --- linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,255 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" + +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include + +#include "util.h" +#include "utils_tostr.h" + +extern gulm_cm_t gulm_cm; + +/* private vars. */ +int cm_thd_running; +struct completion cm_thd_startup; +struct task_struct *cm_thd_task; + +/** + */ +int +gulm_core_login_reply (void *misc, uint64_t gen, uint32_t error, + uint32_t rank, uint8_t corestate) +{ + if (error != 0) { + log_err ("Core returned error %d:%s.\n", error, + gio_Err_to_str (error)); + cm_thd_running = FALSE; + return error; + } + + if( gulm_cm.GenerationID != 0 ) { + GULM_ASSERT(gulm_cm.GenerationID == gen, + printk("us: %"PRIu64" them: %"PRIu64"\n", + gulm_cm.GenerationID,gen); + ); + } + gulm_cm.GenerationID = gen; + + error = lt_login (); + if (error != 0) { + log_err ("lt_login failed. %d\n", error); + lg_core_logout (gulm_cm.hookup); /* XXX is this safe? */ + return error; + } + + log_msg (lgm_Network2, "Logged into local core.\n"); + + return 0; +} + +/** + * gulm_core_logout_reply - + * @misc: + * + * + * Returns: int + */ +int +gulm_core_logout_reply (void *misc) +{ + log_msg (lgm_Network2, "Logged out of local core.\n"); + return 0; +} + +/** + */ +int +gulm_core_nodechange (void *misc, char *nodename, + struct in6_addr *nodeip, uint8_t nodestate) +{ + if (nodestate == lg_core_Fenced) { + request_journal_replay (nodename); + } + /* if me and state is logout, Need to close out things if we can. + */ + if (gulm_cm.starts && nodestate == lg_core_Logged_out && + strcmp(gulm_cm.myName, nodename) == 0 ) { + lt_logout(); + cm_thd_running = FALSE; + lg_core_logout (gulm_cm.hookup); + return -1; + } + return 0; +} + +int gulm_core_statechange (void *misc, uint8_t corestate, + struct in6_addr *masterip, char *mastername) +{ + int *cst = (int *)misc; + if( misc != NULL ) { + if( corestate != lg_core_Slave && + corestate != lg_core_Master ) { + *cst = TRUE; + }else{ + *cst = FALSE; + } + } + return 0; +} + +/** + */ +int +gulm_core_error (void *misc, uint32_t err) +{ + log_err ("Got error code %d %#x back fome some reason!\n", err, err); + return 0; +} + +static lg_core_callbacks_t core_cb = { + login_reply:gulm_core_login_reply, + logout_reply:gulm_core_logout_reply, + nodechange:gulm_core_nodechange, + statechange:gulm_core_statechange, + error:gulm_core_error +}; + +/** + * cm_io_recving_thread - + * @data: + * + * + * Returns: int + */ +int +cm_io_recving_thread (void *data) +{ + int err; + + daemonize ("gulm_res_recvd"); + cm_thd_task = current; + complete (&cm_thd_startup); + + while (cm_thd_running) { + err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, NULL); + if (err != 0) { + log_err + ("Got an error in gulm_res_recvd err: %d\n", err); + if (!cm_thd_running) + break; + /* + * Pause a bit, then try to log back into the local + * lock_gulmd. Keep doing this until an outside force + * stops us. (which I don't think there is any at this + * point. forceunmount would be one, if we ever do + * that.) + * + * If we are still in the gulm_mount() function, we + * should not retry. We should just exit. + */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout (3 * HZ); + + while ((err = + lg_core_login (gulm_cm.hookup, TRUE)) != 0) { + log_err + ("Got a %d trying to login to lock_gulmd. Is it running?\n", + err); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout (3 * HZ); + } + } + } /* while( gulm_cm.cm_thd_running ) */ + + complete (&cm_thd_startup); + return 0; +} + +/** + * cm_logout - + */ +void +cm_logout (void) +{ + + if (cm_thd_running) { + cm_thd_running = FALSE; + lg_core_logout (gulm_cm.hookup); + + /* wait for thread to finish */ + wait_for_completion (&cm_thd_startup); + } + +} + +/** + * cm_login - + * + * Returns: int + */ +int +cm_login (void) +{ + int err = -1; + int cst=TRUE; + + cm_thd_running = FALSE; + init_completion (&cm_thd_startup); + + err = lg_core_login (gulm_cm.hookup, TRUE); + if (err != 0) { + log_err + ("Got a %d trying to login to lock_gulmd. Is it running?\n", + err); + goto exit; + } + /* handle login reply. which will start the lt thread. */ + err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, NULL); + if (err != 0) { + goto exit; + } + + /* do not pass go until Slave(client) or Master */ + while(cst) { + lg_core_corestate(gulm_cm.hookup); + err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, &cst); + if (err != 0) { + goto exit; + } + if(cst) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout (3 * HZ); + /* if interrupted, exit */ + } + } + + /* start recver thread. */ + cm_thd_running = TRUE; + err = kernel_thread (cm_io_recving_thread, NULL, 0); + if (err < 0) { + log_err ("Failed to start gulm_res_recvd. (%d)\n", err); + goto exit; + } + wait_for_completion (&cm_thd_startup); + + err = 0; + exit: + return err; +} +/* vim: set ai cin noet sw=8 ts=8 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c --- linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,613 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" + +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include + +#include "util.h" +#include "load_info.h" +#include "handler.h" +#include "gulm_procinfo.h" +#include "gulm_jid.h" + +/* things about myself */ +extern gulm_cm_t gulm_cm; + +/* globals for this file.*/ +uint32_t filesystems_count = 0; +LIST_HEAD (filesystems_list); +struct semaphore filesystem_lck; /* we use a sema instead of a spin here because + * all of the interruptible things we do inside + * of it. + * If i stop doing nasty things within this it doesn't need + * to be a sema. + */ +struct semaphore start_stop_lock; +atomic_t start_stop_cnt; + +/** + * init_gulm_fs - + */ +void +init_gulm_fs (void) +{ + init_MUTEX (&filesystem_lck); + init_MUTEX (&start_stop_lock); + atomic_set (&start_stop_cnt, 0); +} + +/*****************************************************************************/ +struct rjrpf_s { + gulm_fs_t *fs; + uint8_t *name; +}; + +void +request_journal_replay_per_fs (void *d) +{ + struct rjrpf_s *rf = (struct rjrpf_s *) d; + uint32_t jid; + unsigned int ujid; + + /* lookup jid <=> name mapping */ + if (find_jid_by_name_and_mark_replay (rf->fs, rf->name, &jid) != 0) { + log_msg (lgm_JIDMap, + "In fs (%s), no jid for name (%s) was found.\n", + rf->fs->fs_name, rf->name); + } else { + log_msg (lgm_JIDMap, + "In fs (%s), jid %d was found for name (%s).\n", + rf->fs->fs_name, jid, rf->name); + + /* all that the replay journal call back into gfs does is malloc + * some memory and add it to a list. So we really don't need to + * queue that action. Since that is what gfs is doing. + * + * This will need to change if gfs changes. + * + * Basically, we assume that the callback is non-blocking. + */ + ujid = jid; + rf->fs->cb (rf->fs->fsdata, LM_CB_NEED_RECOVERY, &ujid); + } + + kfree (rf->name); + kfree (rf); + +} + +/** + * request_journal_replay - give a journal replay request to mounted filesystems + * @name: < the name of the node that died. + * + * + * Returns: void + */ +void +request_journal_replay (uint8_t * name) +{ + struct list_head *tmp; + gulm_fs_t *fs; + struct rjrpf_s *rf; + + log_msg (lgm_Always, "Checking for journals for node \"%s\"\n", + name); + + down (&filesystem_lck); + + list_for_each (tmp, &filesystems_list) { + fs = list_entry (tmp, gulm_fs_t, fs_list); + + /* we don't want to process replay requests when we are + * still in the first mounter state. All the journals are + * getting replayed anyways, and there could be some issue + * with stuff happening twice. + */ + if (fs->firstmounting) + continue; + + /* due to the way the new jid mapping code works, we had to + * move it out of here. + */ + + rf = kmalloc (sizeof (struct rjrpf_s), GFP_KERNEL); + GULM_ASSERT (rf != NULL,); + + rf->fs = fs; + rf->name = kmalloc (strlen (name) + 1, GFP_KERNEL); + GULM_ASSERT (rf->name != NULL,); + memcpy (rf->name, name, strlen (name) + 1); + + qu_function_call (&fs->cq, request_journal_replay_per_fs, rf); + + } + up (&filesystem_lck); +} + +/** + * passup_droplocks - + */ +void +passup_droplocks (void) +{ + struct list_head *tmp; + gulm_fs_t *fs; + down (&filesystem_lck); + list_for_each (tmp, &filesystems_list) { + fs = list_entry (tmp, gulm_fs_t, fs_list); + qu_drop_req (&fs->cq, fs->cb, fs->fsdata, LM_CB_DROPLOCKS, 0, + 0); + /* If this decides to block someday, we need to change this function. + */ + } + up (&filesystem_lck); +} + +/** + * dump_internal_lists - + * + */ +void +dump_internal_lists (void) +{ + struct list_head *tmp; + gulm_fs_t *fs; + down (&filesystem_lck); + list_for_each (tmp, &filesystems_list) { + fs = list_entry (tmp, gulm_fs_t, fs_list); + log_msg (lgm_Always, "Handler queue for %s\n", fs->fs_name); + display_handler_queue (&fs->cq); + /* other lists? */ + } + up (&filesystem_lck); +} + +/** + * get_fs_by_name - + * @name: + * + * + * Returns: gulm_fs_t + */ +gulm_fs_t * +get_fs_by_name (uint8_t * name) +{ + struct list_head *tmp; + gulm_fs_t *fs = NULL; + down (&filesystem_lck); + list_for_each (tmp, &filesystems_list) { + fs = list_entry (tmp, gulm_fs_t, fs_list); + if (strcmp (name, fs->fs_name) == 0) { + up (&filesystem_lck); + return fs; + } + } + up (&filesystem_lck); + return NULL; +} + +/*****************************************************************************/ + +/** + * clear_locks - + * + * quick check to see if there was leaking + * should I panic on these? or just complain? + * + * Returns: void + */ +void +clear_locks (void) +{ + int i; + lock_table_t *lt = &gulm_cm.ltpx; + + for (i = 0; i < lt->hashbuckets; i++) { + struct list_head *lcktmp, *lckfoo; + spin_lock (<->hshlk[i]); + list_for_each_safe (lcktmp, lckfoo, <->lkhsh[i]) { + gulm_lock_t *lck = NULL; + lck = list_entry (lcktmp, gulm_lock_t, gl_list); + /* need to relelase it. umm, should any even exist? */ + log_err ("AH! Rogue lock buffer! refcount:%d\n", + atomic_read (&lck->count)); + + if (lck->lvb) { + log_err ("AH! Rogue lock buffer with LVB!\n"); + kfree (lck->lvb); + } + + list_del (lcktmp); + kfree (lck); + + } + spin_unlock (<->hshlk[i]); + } + kfree (lt->hshlk); + lt->hshlk = NULL; + kfree (lt->lkhsh); + lt->lkhsh = NULL; +} + +/*****************************************************************************/ +/** + * start_gulm_threads - + * @host_data: + * + * + * Returns: int + */ +int +start_gulm_threads (char *csnm, char *host_data) +{ + int error = 0; + + down (&start_stop_lock); + atomic_inc (&start_stop_cnt); + if (atomic_read (&start_stop_cnt) == 1) { + /* first one. get stuff going */ + strncpy (gulm_cm.clusterID, csnm, 255); + gulm_cm.clusterID[255] = '\0'; + + error = lg_initialize (&gulm_cm.hookup, gulm_cm.clusterID, + "GFS Kernel Interface"); + if (error != 0) { + log_err ("lg_initialize failed, %d\n", error); + goto fail; + } + gulm_cm.starts = TRUE; + + error = load_info (host_data); + if (error != 0) { + log_err ("load_info failed. %d\n", error); + goto fail; + } + + jid_init (); + + error = cm_login (); + if (error != 0) { + log_err ("cm_login failed. %d\n", error); + goto fail; + } + + /* lt_login() is called after the success packet for cm_login() + * returns. + */ + } + fail: + up (&start_stop_lock); + return error; +} + +/** + * stop_gulm_threads - + */ +void +stop_gulm_threads (void) +{ + down (&start_stop_lock); + atomic_dec (&start_stop_cnt); + if (atomic_read (&start_stop_cnt) == 0) { + /* last one, put it all away. */ + lt_logout (); + cm_logout (); + clear_locks (); + lg_release (gulm_cm.hookup); + gulm_cm.hookup = NULL; + gulm_cm.loaded = FALSE; + gulm_cm.GenerationID = 0; + } + up (&start_stop_lock); +} + +/*****************************************************************************/ + +/** + * gulm_mount + * @table_name: clusterID:FS_Name + * @host_data: + * @cb: GFS callback function + * @fsdata: opaque GFS handle + * @lockstruct: the structure of crap to fill in + * + * Returns: 0 on success, -EXXX on failure + */ +int +gulm_mount (char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t * fsdata, + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct) +{ + gulm_fs_t *gulm; + char work[256], *tbln; + int first; + int error = -1; + struct list_head *lltmp; + + strncpy (work, table_name, 256); + + tbln = strstr (work, ":"); + if (tbln == NULL) { + log_err + ("Malformed table name. Couldn't find separator ':' between " + "clusterID and lockspace name.\n"); + error = -1; + goto fail; + } + *tbln++ = '\0'; + + /* make sure that the cluster name exists. */ + if (strlen (work) <= 0) { + log_err ("Cluster name \"%s\" is too short.\n", work); + error = -EPROTO; + goto fail; + } + if (strlen (work) > 16) { + log_err ("Cluster name \"%s\" is too long.\n", work); + error = -EPROTO; + goto fail; + } + + /* the second one is an artifact of the way I use the name. + * A better fix to this will happen when I actually get dynamic key + * lengths working. + */ + if (strlen (tbln) > MIN (GIO_NAME_LEN, (GIO_KEY_SIZE - 13))) { + log_err + ("Warning! lockspace name (%s) is longer than %d chars!\n", + tbln, MIN (GIO_NAME_LEN, (GIO_KEY_SIZE - 13))); + error = -EPROTO; + goto fail; + } + if (strlen (tbln) <= 0) { + log_err ("Table name \"%s\" is too short.\n", tbln); + error = -EPROTO; + goto fail; + } + + /* Check to make sure this lock table isn't already being used */ + down (&filesystem_lck); + list_for_each (lltmp, &filesystems_list) { + gulm = list_entry (lltmp, gulm_fs_t, fs_list); + if (!strncmp (gulm->fs_name, tbln, GIO_NAME_LEN)) { + log_err ("\"%s\" is already in use\n", tbln); + error = -EEXIST; + up (&filesystem_lck); + goto fail; + } + } + up (&filesystem_lck); + + /* Set up our main structure */ + + gulm = kmalloc (sizeof (gulm_fs_t), GFP_KERNEL); + if (!gulm) { + log_err ("out of memory\n"); + error = -ENOMEM; + goto fail; + } + memset (gulm, 0, sizeof (gulm_fs_t)); + + INIT_LIST_HEAD (&gulm->fs_list); + + strncpy (gulm->fs_name, tbln, GIO_NAME_LEN); + gulm->cb = cb; + gulm->fsdata = fsdata; + gulm->lvb_size = min_lvb_size; + init_completion (&gulm->sleep); + init_MUTEX (&gulm->get_lock); + + if ((error = start_gulm_threads (work, host_data)) != 0) { + log_err ("Got a %d trying to start the threads.\n", error); + goto fail_free_gulm; + } + + if ((error = + start_callback_qu (&gulm->cq, gulm_cm.handler_threads)) < 0) { + log_err ("fsid=%s: Failed to start the callback handler.\n", + gulm->fs_name); + goto fail_free_gulm; + } + + /* the mount lock HAS to be the first thing done in the LTs for this fs. */ + error = get_mount_lock (gulm, &first); + if (error != 0) { + log_err + ("fsid=%s: Error %d while trying to get the mount lock\n", + gulm->fs_name, error); + goto fail_callback; + } + + jid_lockstate_reserve (gulm, first); + jid_fs_init (gulm); + get_journalID (gulm); + + /* things act a bit different until the first mounter is finished. + */ + if (first) + gulm->firstmounting = TRUE; + + /* Success */ + down (&filesystem_lck); + list_add (&gulm->fs_list, &filesystems_list); + filesystems_count++; + up (&filesystem_lck); + + log_msg (lgm_JIDMap, "fsid=%s: We will be using jid %d\n", + gulm->fs_name, gulm->fsJID); + + if (add_to_proc (gulm) != 0) { + /* ignored for now */ + } + + lockstruct->ls_jid = gulm->fsJID; + lockstruct->ls_first = first; + lockstruct->ls_lvb_size = gulm->lvb_size; + lockstruct->ls_lockspace = gulm; + lockstruct->ls_ops = &gulm_ops; +#ifdef USE_SYNC_LOCKING + lockstruct->ls_flags = 0; + + log_msg (lgm_Network2, "Done: %s, sync mode\n", table_name); +#else + lockstruct->ls_flags = LM_LSFLAG_ASYNC; + + log_msg (lgm_Network2, "Done: %s, async mode\n", table_name); +#endif + + gulm_cm.starts = FALSE; + return 0; + + fail_callback: + stop_callback_qu (&gulm->cq); + + fail_free_gulm: + kfree (gulm); + stop_gulm_threads (); + + fail: + + gulm_cm.starts = FALSE; + log_msg (lgm_Always, "fsid=%s: Exiting gulm_mount with errors %d\n", + table_name, error); + return error; +} + +/** + * gulm_others_may_mount + * @lockspace: handle to specific lock space + * + * GFS calls this function if it was the first mounter after it's done + * checking all the journals. + * + */ +void +gulm_others_may_mount (lm_lockspace_t * lockspace) +{ + gulm_fs_t *fs = (gulm_fs_t *) lockspace; + int err = 0; + lock_table_t *lt = &gulm_cm.ltpx; + + /* first send the drop all exp message. + * */ + err = send_drop_exp (fs, lt, NULL); + if (err < 0) + log_err + ("fsid=%s: Problems sending DropExp request to LTPX: %d\n", + fs->fs_name, err); + + /* then move the FirstMountLock to shared so others can mount. */ + err = downgrade_mount_lock (fs); + + if (err < 0) { + log_err ("fsid=%s: error sending Fs_FinMount_Req.(%d)\n", + fs->fs_name, err); + } + + /* first mounter is all done. let the gulm_recovery_done function + * behave as normal now. + */ + fs->firstmounting = FALSE; +} + +/** + * gulm_umount + * @lockspace: handle to specific lock space + * + */ +void +gulm_unmount (lm_lockspace_t * lockspace) +{ + gulm_fs_t *gulm_fs = (gulm_fs_t *) lockspace; + + down (&filesystem_lck); + list_del (&gulm_fs->fs_list); + --filesystems_count; + up (&filesystem_lck); + + /* close and release stuff */ + drop_mount_lock (gulm_fs); + put_journalID (gulm_fs); + jid_fs_release (gulm_fs); + jid_lockstate_release (gulm_fs); + + stop_callback_qu (&gulm_fs->cq); + + remove_from_proc (gulm_fs); + + kfree (gulm_fs); + + stop_gulm_threads (); + +} + +/** + * gulm_recovery_done - + * @lockspace: + * @jid: + * + * Returns: void + */ +void +gulm_recovery_done (lm_lockspace_t * lockspace, unsigned int jid, + unsigned int message) +{ + gulm_fs_t *fs = (gulm_fs_t *) lockspace; + int err; + uint8_t name[256]; + + if (message != LM_RD_SUCCESS) { + /* Need to start thinking about how I want to use this... */ + return; + } + + if (jid == fs->fsJID) { /* this may be drifting crud through. */ + /* hey! its me! */ + strncpy (name, gulm_cm.myName, 256); + } else if (lookup_name_by_jid (fs, jid, name) != 0) { + log_msg (lgm_JIDMap, + "fsid=%s: Could not find a client for jid %d\n", + fs->fs_name, jid); + return; + } + if (strlen (name) == 0) { + log_msg (lgm_JIDMap, "fsid=%s: No one mapped to jid %d\n", + fs->fs_name, jid); + return; + } + log_msg (lgm_JIDMap, "fsid=%s: Found %s for jid %d\n", + fs->fs_name, name, jid); + + err = send_drop_exp (fs, &gulm_cm.ltpx, name); + + if (jid != fs->fsJID) { + /* rather dumb to do this to ourselves right after we mount... */ + log_msg (lgm_JIDMap, + "fsid=%s: Clearing JID %d for use by others\n", + fs->fs_name, jid); + release_JID (fs, jid, FALSE); + } + + /* If someone died while replaying someoneelse's journal, there will be + * stale expired jids. + */ + check_for_stale_expires (fs); + +} +/* vim: set ai cin noet sw=8 ts=8 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c --- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,806 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" + +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include + +#include "util.h" + +extern gulm_cm_t gulm_cm; + +/****************************************************************************/ + +/* jid locks: + * + * Header lock: "JHeader" + \0\0\0 + fsname + * lvb: :number of JIDs + * Mappinglock: "JM" + + \0\0\0\0 + fsname + * lvb: [012] + + * 0: unused + * 1: replaying journal + * 2: Mounted + * list lock : "JL" + "listlock" + fsname + * Node Locks : "JN" + + fsname + * + */ +#define jid_header_lvb_size (8) + +struct jid_lookup_item_s { + struct list_head jp_list; + uint8_t *key; + uint16_t keylen; + uint8_t *lvb; + uint16_t lvblen; + struct completion waitforit; +}; +typedef struct jid_lookup_item_s jid_lookup_item_t; + +LIST_HEAD (jid_pending_locks); +spinlock_t jid_pending; +struct semaphore jid_listlock; + +/** + * jid_init - + */ +void +jid_init (void) +{ + spin_lock_init (&jid_pending); + init_MUTEX (&jid_listlock); +} + +/** + * jid_get_header_name - + * @fs: < + * @key: <> + * @keylen: <> + * + * key is buffer to write to, keylen is size of buffer on input, and real + * length on output. + * + * Returns: int + */ +int +jid_get_header_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen) +{ + int len; + len = strlen (fsname); + if ((len + 11) > *keylen) + return -EINVAL; + memcpy (key, "JHeader\0\0\0", 10); + memcpy (&key[10], fsname, len + 1); + *keylen = len + 11; + return 0; +} + +int +jid_get_listlock_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen) +{ + int len; + len = strlen (fsname); + if ((len + 11) > *keylen) + return -EINVAL; + memcpy (key, "JLlistlock", 10); + memcpy (&key[10], fsname, len + 1); + *keylen = len + 11; + return 0; +} + +/** + * jid_get_lock_name - + * @fs: < + * @jid: < + * @key: <> + * @keylen: <> + * + * key is buffer to write to, keylen is size of buffer on input, and real + * length on output. + * + * Returns: int + */ +int +jid_get_lock_name (uint8_t * fsname, uint32_t jid, uint8_t * key, + uint16_t * keylen) +{ + int len; + len = strlen (fsname); + if ((len + 11) > *keylen) + return -EINVAL; + key[0] = 'J'; + key[1] = 'M'; + key[5] = (jid >> 24) & 0xff; + key[4] = (jid >> 16) & 0xff; + key[3] = (jid >> 8) & 0xff; + key[2] = (jid >> 0) & 0xff; + key[6] = 0; + key[7] = 0; + key[8] = 0; + key[9] = 0; + memcpy (&key[10], fsname, len + 1); + *keylen = len + 11; + return 0; +} + +/** + * jid_hold_lvb - + * @key: + * @keylen: + * + * + */ +void +jid_hold_lvb (uint8_t * key, uint16_t keylen) +{ + jid_lookup_item_t jp; + GULM_ASSERT (keylen > 6,); + jp.key = key; + jp.keylen = keylen; + jp.lvb = NULL; + jp.lvblen = 0; + INIT_LIST_HEAD (&jp.jp_list); + init_completion (&jp.waitforit); + + spin_lock (&jid_pending); + list_add (&jp.jp_list, &jid_pending_locks); + spin_unlock (&jid_pending); + + lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_HoldLVB, + NULL, 0); + + wait_for_completion (&jp.waitforit); +} + +void +jid_unhold_lvb (uint8_t * key, uint16_t keylen) +{ + jid_lookup_item_t jp; + GULM_ASSERT (keylen > 6,); + jp.key = key; + jp.keylen = keylen; + jp.lvb = NULL; + jp.lvblen = 0; + INIT_LIST_HEAD (&jp.jp_list); + init_completion (&jp.waitforit); + + spin_lock (&jid_pending); + list_add (&jp.jp_list, &jid_pending_locks); + spin_unlock (&jid_pending); + + lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_UnHoldLVB, + NULL, 0); + + wait_for_completion (&jp.waitforit); +} + +void +jid_sync_lvb (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen) +{ + jid_lookup_item_t jp; + GULM_ASSERT (keylen > 6,); + jp.key = key; + jp.keylen = keylen; + jp.lvb = NULL; + jp.lvblen = 0; + INIT_LIST_HEAD (&jp.jp_list); + init_completion (&jp.waitforit); + + spin_lock (&jid_pending); + list_add (&jp.jp_list, &jid_pending_locks); + spin_unlock (&jid_pending); + + lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_SyncLVB, + lvb, lvblen); + + wait_for_completion (&jp.waitforit); +} + +/** + * jid_action_reply - + * @key: + * @keylen: + * + * called from the lock handler callback. + * + * Returns: void + */ +void +jid_action_reply (uint8_t * key, uint16_t keylen) +{ + struct list_head *tmp, *nxt; + jid_lookup_item_t *jp, *fnd = NULL; + spin_lock (&jid_pending); + list_for_each_safe (tmp, nxt, &jid_pending_locks) { + jp = list_entry (tmp, jid_lookup_item_t, jp_list); + if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) { + fnd = jp; + list_del (tmp); + break; + } + } + spin_unlock (&jid_pending); + + if (fnd != NULL) + complete (&fnd->waitforit); +} + +/** + * jid_get_lock_state_inr - + * @key: + * @keylen: + * @state: + * @flags: + * @lvb: + * @lvblen: + * + * + */ +void +jid_get_lock_state_inr (uint8_t * key, uint16_t keylen, uint8_t state, + uint32_t flags, uint8_t * lvb, uint16_t lvblen) +{ + jid_lookup_item_t jp; + GULM_ASSERT (keylen > 6,); + jp.key = key; + jp.keylen = keylen; + jp.lvb = lvb; + jp.lvblen = lvblen; + INIT_LIST_HEAD (&jp.jp_list); + init_completion (&jp.waitforit); + + spin_lock (&jid_pending); + list_add (&jp.jp_list, &jid_pending_locks); + spin_unlock (&jid_pending); + + lg_lock_state_req (gulm_cm.hookup, key, keylen, state, flags, lvb, lvblen); + + wait_for_completion (&jp.waitforit); +} + +/** + * jid_get_lock_state_lvb - + * @key: + * @keylen: + * @state: + * @lvb: + * @lvblen: + * + * + */ +void +jid_get_lock_state_lvb (uint8_t * key, uint16_t keylen, uint8_t state, + uint8_t * lvb, uint16_t lvblen) +{ + jid_get_lock_state_inr (key, keylen, state, 0, lvb, lvblen); +} +/** + * jid_get_lock_state - + * @key: + * @keylen: + * @state: + * + * + */ +void +jid_get_lock_state (uint8_t * key, uint16_t keylen, uint8_t state) +{ + jid_get_lock_state_inr (key, keylen, state, 0, NULL, 0); +} + +/** + * jid_state_reply - + * @key: + * @keylen: + * @lvb: + * @lvblen: + * + * + */ +void +jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen) +{ + struct list_head *tmp, *nxt; + jid_lookup_item_t *jp, *fnd = NULL; + spin_lock (&jid_pending); + list_for_each_safe (tmp, nxt, &jid_pending_locks) { + jp = list_entry (tmp, jid_lookup_item_t, jp_list); + if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) { + fnd = jp; + list_del (tmp); + break; + } + } + spin_unlock (&jid_pending); + + if (fnd != NULL) { + if (lvb != NULL && fnd->lvb != NULL) + memcpy (fnd->lvb, lvb, MIN (fnd->lvblen, lvblen)); + complete (&fnd->waitforit); + } +} + +/****************************************************************************/ + +/** + * jid_hold_list_lock - + * @fs: + * + * only make one call to this per node. + * + * Returns: void + */ +void +jid_hold_list_lock (gulm_fs_t * fs) +{ + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen; + + down (&jid_listlock); + + keylen = sizeof (key); + jid_get_listlock_name (fs->fs_name, key, &keylen); + jid_get_lock_state (key, keylen, lg_lock_state_Exclusive); + +} + +/** + * jid_release_list_lock - + * @fs: + * + * + * Returns: void + */ +void +jid_release_list_lock (gulm_fs_t * fs) +{ + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen; + + keylen = sizeof (key); + jid_get_listlock_name (fs->fs_name, key, &keylen); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + + up (&jid_listlock); +} + +/** + * jid_rehold_lvbs - + * @fs: + * + * + */ +void +jid_rehold_lvbs (gulm_fs_t * fs) +{ + int i; + uint32_t oldjcnt; + uint8_t key[GIO_KEY_SIZE], lvb[jid_header_lvb_size]; + uint16_t keylen = GIO_KEY_SIZE; + + oldjcnt = fs->JIDcount; + + jid_get_header_name (fs->fs_name, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb, + jid_header_lvb_size); + fs->JIDcount = (uint32_t) (lvb[0]) << 0; + fs->JIDcount |= (uint32_t) (lvb[1]) << 8; + fs->JIDcount |= (uint32_t) (lvb[2]) << 16; + fs->JIDcount |= (uint32_t) (lvb[3]) << 24; + + for (i = oldjcnt; i < fs->JIDcount; i++) { + keylen = sizeof (key); + jid_get_lock_name (fs->fs_name, i, key, &keylen); + jid_hold_lvb (key, keylen); + } + +} + +void +jid_grow_space (gulm_fs_t * fs) +{ + uint8_t key[GIO_KEY_SIZE], lvb[jid_header_lvb_size]; + uint16_t keylen = GIO_KEY_SIZE; + uint32_t jidc; + + keylen = sizeof (key); + jid_get_header_name (fs->fs_name, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb, + jid_header_lvb_size); + jidc = (uint32_t) (lvb[0]) << 0; + jidc |= (uint32_t) (lvb[1]) << 8; + jidc |= (uint32_t) (lvb[2]) << 16; + jidc |= (uint32_t) (lvb[3]) << 24; + jidc += 10; + lvb[3] = (jidc >> 24) & 0xff; + lvb[2] = (jidc >> 16) & 0xff; + lvb[1] = (jidc >> 8) & 0xff; + lvb[0] = (jidc >> 0) & 0xff; + jid_sync_lvb (key, keylen, lvb, jid_header_lvb_size); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + /* do an unlock here, so that when rehold grabs it shared, there is no + * lvb writing. + */ + + jid_rehold_lvbs (fs); +} + +/** + * lookup_name_by_jid - + * @fs: + * @jid: + * @name: + * + * + * Returns: int + */ +int +lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name) +{ + uint8_t key[GIO_KEY_SIZE], lvb[64]; + uint16_t keylen = 64; + int err = 0; + + if (jid >= fs->JIDcount) { + err = -1; + goto exit; + } + + jid_hold_list_lock (fs); + + jid_get_lock_name (fs->fs_name, jid, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb, 64); + + if (lvb[0] != 0) { + memcpy (name, &lvb[1], strlen (&lvb[1]) + 1); + } else { + err = -1; + } + + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + + jid_release_list_lock (fs); + + exit: + return err; +} + +/** + * Release_JID - + * @fs: + * @jid: + * + * actually may only need to et first byte to zero + * + * Returns: int + */ +int +release_JID (gulm_fs_t * fs, uint32_t jid, int nop) +{ + uint8_t key[GIO_KEY_SIZE], lvb[64]; + uint16_t keylen = 64; + + /* there is no such, so this becomes a nop. */ + if (jid >= fs->JIDcount) + goto exit; + + jid_hold_list_lock (fs); + + jid_get_lock_name (fs->fs_name, jid, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb, 64); + lvb[0] = 0; + jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + + jid_release_list_lock (fs); + + exit: + return 0; +} + +void +put_journalID (gulm_fs_t * fs) +{ + release_JID (fs, fs->fsJID, TRUE); +} + +/** + * get_journalID - + * @fs: + * @jid: + * + * This is broken. + * + * Returns: int + */ +void +get_journalID (gulm_fs_t * fs) +{ + uint32_t i = 0; + uint8_t key[GIO_KEY_SIZE], lvb[64]; + uint16_t keylen; + int first_clear = -1; + + retry: + jid_hold_list_lock (fs); + + /* find an empty space, or ourselves again */ + for (i = 0; i < fs->JIDcount; i++) { + keylen = sizeof (key); + jid_get_lock_name (fs->fs_name, i, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, + lvb, 64); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + if (first_clear == -1 && lvb[0] == 0 ) { + first_clear = i; + } else if (strcmp (gulm_cm.myName, &lvb[1]) == 0) { + first_clear = i; + break; + } + } + if (first_clear >= 0) { + /* take the jid we have found */ + keylen = sizeof (key); + jid_get_lock_name (fs->fs_name, first_clear, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, + lvb, 64); + lvb[0] = 2; + memcpy (&lvb[1], gulm_cm.myName, strlen (gulm_cm.myName) + 1); + jid_sync_lvb (key, keylen, lvb, strlen (gulm_cm.myName) + 2); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + + fs->fsJID = first_clear; + } + + /* unlock the header lock */ + jid_release_list_lock (fs); + + if (first_clear < 0) { + /* nothing found, grow and try again. */ + jid_grow_space (fs); + goto retry; + } + +} + +/** + * find_jid_by_name_and_mark_replay - + * @fs: + * @name: + * @jid: + * + * + * Returns: int + */ +int +find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name, + uint32_t * jid) +{ + uint32_t i, found = -1; + uint8_t key[GIO_KEY_SIZE], lvb[64]; + uint16_t keylen; + + /* grab list lock */ + jid_hold_list_lock (fs); + + for (i = 0; i < fs->JIDcount; i++) { + keylen = sizeof (key); + jid_get_lock_name (fs->fs_name, i, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, + lvb, 64); + if (strcmp (name, &lvb[1]) == 0) { + *jid = i; + found = 0; + lvb[0] = 1; + jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + break; + } + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + + } + /* unlock the list lock */ + jid_release_list_lock (fs); + + return found; +} + +/** + * Check_for_replays - + * @fs: + * + * + * Returns: int + */ +void +check_for_stale_expires (gulm_fs_t * fs) +{ + uint32_t i; + uint8_t key[GIO_KEY_SIZE], lvb[64]; + uint16_t keylen; + unsigned int ujid; + + /* grab list lock */ + jid_hold_list_lock (fs); + + for (i = 0; i < fs->JIDcount; i++) { + keylen = sizeof (key); + jid_get_lock_name (fs->fs_name, i, key, &keylen); + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb, + 64); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + + if (lvb[0] == 1) { + log_msg (lgm_JIDMap, + "fsid=%s: stale JID %d found\n", + fs->fs_name, i); + ujid = i; + fs->cb (fs->fsdata, LM_CB_NEED_RECOVERY, &ujid); + } + } + + /* unlock the list lock */ + jid_release_list_lock (fs); +} + +/** + * jid_fs_init - + * @fs: + * + */ +void +jid_fs_init (gulm_fs_t * fs) +{ + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen = GIO_KEY_SIZE; + + fs->JIDcount = 0; + + jid_get_header_name (fs->fs_name, key, &keylen); + jid_hold_lvb (key, keylen); + jid_rehold_lvbs (fs); +} + +/** + * jid_fs_release - + * @fs: + * + */ +void +jid_fs_release (gulm_fs_t * fs) +{ + uint32_t i; + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen; + for (i = 0; i < fs->JIDcount; i++) { + keylen = sizeof (key); + jid_get_lock_name (fs->fs_name, i, key, &keylen); + jid_unhold_lvb (key, keylen); + } + keylen = sizeof (key); + jid_get_header_name (fs->fs_name, key, &keylen); + jid_unhold_lvb (key, keylen); + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); +} + +/** + * jid_unlock_callback - + * @d: + * + * *MUST* be called from a Handler thread. + * + * Returns: int + */ +void +jid_unlock_callback (void *d) +{ + gulm_fs_t *fs = (gulm_fs_t *) d; + jid_rehold_lvbs (fs); +} + +/** + * jid_header_lock_drop - + * @key: + * @keylen: + * + * Returns: void + */ +void +jid_header_lock_drop (uint8_t * key, uint16_t keylen) +{ + gulm_fs_t *fs; + /* make sure this is the header lock.... */ + if (key[1] == 'H' && (fs = get_fs_by_name (&key[10])) != NULL) { + qu_function_call (&fs->cq, jid_unlock_callback, fs); + } +} + +/****************************************************************************/ +/** + * jid_get_lsresv_name - + * @fsname: + * @key: + * @keylen: + * + * + * Returns: int + */ +int +jid_get_lsresv_name (char *fsname, uint8_t * key, uint16_t * keylen) +{ + int len; + + key[0] = 'J'; + key[1] = 'N'; + len = strlen (gulm_cm.myName) + 1; + memset (&key[2], 0, 8); + memcpy ((&key[2]), gulm_cm.myName, MIN (len, 8)); + /* fsname starts at byte 10 so the dropexp pattern will find it. */ + memcpy ((&key[10]), fsname, strlen (fsname) + 1); + + *keylen = 10 + strlen (fsname) + 1; + + return 0; +} + +/** + * jid_lockstate_reserve - + * @fs: + * + * + * Returns: void + */ +void +jid_lockstate_reserve (gulm_fs_t * fs, int first) +{ + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen; + + jid_get_lsresv_name (fs->fs_name, key, &keylen); + + /* if we are expired, this will block until someone else has cleaned our + * last mess up. + * + * Will very well may need to put in some kind of timeout otherwise this + * may do a forever lockup much like the FirstMounter lock had. + */ + jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive, + first?lg_lock_flag_IgnoreExp:0, NULL, 0); + +} + +/** + * jid_lockstate_release - + * @fs: + * + * + * Returns: void + */ +void +jid_lockstate_release (gulm_fs_t * fs) +{ + uint8_t key[GIO_KEY_SIZE]; + uint16_t keylen; + + jid_get_lsresv_name (fs->fs_name, key, &keylen); + + jid_get_lock_state (key, keylen, lg_lock_state_Unlock); + +} + + +/* vim: set ai cin noet sw=8 ts=8 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h --- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,41 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __GULM_JID_H__ +#define __GULM_JID_H__ +#include "gulm.h" +void jid_init (void); +void jid_fs_init (gulm_fs_t * fs); +void jid_fs_release (gulm_fs_t * fs); +int get_journalID (gulm_fs_t * fs); +int lookup_jid_by_name (gulm_fs_t * fs, uint8_t * name, uint32_t * injid); +int lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name); +void release_JID (gulm_fs_t * fs, uint32_t jid, int owner); +void put_journalID (gulm_fs_t * fs); +void check_for_stale_expires (gulm_fs_t * fs); + +int + find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name, uint32_t * jid); + +void jid_start_journal_reply (gulm_fs_t * fs, uint32_t jid); +void jid_finish_journal_reply (gulm_fs_t * fs, uint32_t jid); + +void jid_lockstate_reserve (gulm_fs_t * fs, int first); +void jid_lockstate_release (gulm_fs_t * fs); + +/* to be called from the lg_lock callbacks. */ +void jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb, + uint16_t lvblen); +void jid_action_reply (uint8_t * key, uint16_t keylen); +void jid_header_lock_drop (uint8_t * key, uint16_t keylen); +#endif /*__GULM_JID_H__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h --- linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,40 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __gulm_log_msg_bits_h__ +#define __gulm_log_msg_bits_h__ +/* log_msg bit flags + * These got thier own file so I can easily include them in both user and + * kernel space. + * */ +#define lgm_Always (0x00000000) /*Print Message no matter what */ +#define lgm_Network (0x00000001) +#define lgm_Network2 (0x00000002) +#define lgm_Stomith (0x00000004) +#define lgm_Heartbeat (0x00000008) +#define lgm_locking (0x00000010) +#define lgm_FuncDebug (0x00000020) +#define lgm_Forking (0x00000040) +#define lgm_JIDMap (0x00000080) +#define lgm_Subscribers (0x00000100) +#define lgm_LockUpdates (0x00000200) +#define lgm_LoginLoops (0x00000400) +#define lgm_Network3 (0x00000800) +#define lgm_JIDUpdates (0x00001000) +#define lgm_ServerState (0x00002000) + +#define lgm_ReallyAll (0xffffffff) + +#define lgm_BitFieldSize (32) + +#endif /*__gulm_log_msg_bits_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c --- linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,1937 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" + +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include + +#include "util.h" +#include "handler.h" +#include "utils_tostr.h" +#include "gulm_jid.h" + +extern gulm_cm_t gulm_cm; + +/****************************************************************************/ +/* A bunch of prints that hopefully contain more information that is also + * useful + * + * these are a mess. + */ + +/** + * lck_key_to_hex - + * @key: + * @len: + * @workspace: <> place to put string. !! better be 2x len !! + * + * + * Returns: char + */ +static char * +lck_key_to_hex (uint8_t * key, uint16_t len, char *workspace) +{ + int i; + for (i = 0; i < len; i++) + sprintf (&workspace[i * 2], "%02x", (key[i] & 0xff)); + return workspace; +} + +static void __inline__ +db_lck_entered (gulm_lock_t * lck) +{ + char bb[GIO_KEY_SIZE * 2 + 3]; + lck_key_to_hex (lck->key, lck->keylen, bb); + printk ("Started lock 0x%s cur:%#x req:%#x flags:%#x\n", bb, + lck->cur_state, lck->req_state, lck->flags); +} +static void __inline__ +db_lck_exited (gulm_lock_t * lck) +{ + char bb[GIO_KEY_SIZE * 2 + 3]; + lck_key_to_hex (lck->key, lck->keylen, bb); + printk ("Finished lock 0x%s result:%#x\n", bb, lck->result); +} + +static void __inline__ +dump_gulm_lock_t (gulm_lock_t * lck) +{ + char bb[GIO_KEY_SIZE * 2 + 3]; + + lck_key_to_hex (lck->key, lck->keylen, bb); + log_msg (lgm_Always, " key = 0x%s\n", bb); + log_msg (lgm_Always, " req_type = %#x\n", lck->req_type); + log_msg (lgm_Always, " last_suc_state = %#x\n", lck->last_suc_state); + log_msg (lgm_Always, " actuallypending = %d\n", lck->actuallypending); + log_msg (lgm_Always, " in_to_be_sent = %d\n", lck->in_to_be_sent); + log_msg (lgm_Always, " cur_state = %d\n", lck->cur_state); + log_msg (lgm_Always, " req_state = %d\n", lck->req_state); + log_msg (lgm_Always, " flags = %#x\n", lck->flags); + log_msg (lgm_Always, " action = %d\n", lck->action); + log_msg (lgm_Always, " result = %d\n", lck->result); +} + +/* DEBUG_BY_LOCK is gone. I may later add something back if needed. + * + * I love the idea of being able to log only certain locks, I just cannot + * think of an easy way to do it. The best I can come up with is some + * pattern (or set of) that are used to decide which locks get logged. But + * that could be expensive if the pattern is checked everytime, and won't + * behave as expected if only applied in get_lock. + * */ + +/* The old log functions. + * These need their own sort of clean up someday as well. + * */ +#define log_msg_lk(key, keylen, fmt, args...) {\ + uint8_t bb[GIO_KEY_SIZE*2 +3]; \ + lck_key_to_hex( key, keylen, bb); \ + printk(PROTO_NAME ": On lock 0x%s " fmt , bb , ## args ); \ + } + +#define log_err_lk(key, keylen, fmt, args...) {\ + uint8_t bb[GIO_KEY_SIZE*2 +3]; \ + lck_key_to_hex( key, keylen, bb); \ + printk(KERN_ERR PROTO_NAME ": ERROR On lock 0x%s " fmt , bb , ## args ); \ + } + +#define log_msg_lck(lck, fmt, args...) {\ + uint8_t bb[GIO_KEY_SIZE*2 +3]; \ + lck_key_to_hex( (lck)->key, (lck)->keylen, bb); \ + printk(PROTO_NAME ": On lock 0x%s " fmt , bb , ## args ); \ + } + +#define log_err_lck(lck, fmt, args...) {\ + uint8_t bb[GIO_KEY_SIZE*2 +3]; \ + lck_key_to_hex( (lck)->key, (lck)->keylen, bb); \ + printk(KERN_ERR PROTO_NAME ": ERROR On lock 0x%s " fmt , bb , ## args ); \ + } + +#ifdef DEBUG_LVB +static void __inline__ +print_lk_lvb (uint8_t * key, uint8_t * lvb, uint8_t st, uint8_t * dir) +{ + uint8_t bk[GIO_KEY_SIZE * 2 + 3]; + uint8_t bl[GIO_LVB_SIZE * 2 + 3]; + int i; + for (i = 0; i < GIO_KEY_SIZE; i++) + sprintf (&bk[(i * 2)], "%02x", (key[i]) & 0xff); + for (i = 0; i < GIO_LVB_SIZE; i++) + sprintf (&bl[(i * 2)], "%02x", (lvb[i]) & 0xff); + printk (PROTO_NAME ": On lock 0x%s with state %d\n\t%s LVB 0x%s\n", + bk, st, dir, bl); +} + +#define lvb_log_msg_lk(k, fmt, args...) log_msg_lk( k , fmt , ## args ) +#define lvb_log_msg(fmt, args...) log_msg(lgm_Always , fmt , ## args ) +#else /*DEBUG_LVB */ +#define print_lk_lvb(k,l,s,d) +#define lvb_log_msg_lk(k, fmt, args...) +#define lvb_log_msg(fmt, args...) +#endif /*DEBUG_LVB */ + +/****************************************************************************/ +/** + * find_and_mark_lock - + * @key: + * @keylen: + * @lockp: + * + * looks for a lock struct of key. If found, marks it. + * + * Returns: TRUE or FALSE + */ +int +find_and_mark_lock (uint8_t * key, uint8_t keylen, gulm_lock_t ** lockp) +{ + int found = FALSE; + uint32_t bkt; + gulm_lock_t *lck = NULL; + struct list_head *tmp; + + /* now find the lock */ + bkt = hash_lock_key (key, keylen); + bkt %= gulm_cm.ltpx.hashbuckets; + + spin_lock (&gulm_cm.ltpx.hshlk[bkt]); + list_for_each (tmp, &gulm_cm.ltpx.lkhsh[bkt]) { + lck = list_entry (tmp, gulm_lock_t, gl_list); + if (memcmp (lck->key, key, keylen) == 0) { + found = TRUE; + atomic_inc (&lck->count); + break; + } + } + spin_unlock (&gulm_cm.ltpx.hshlk[bkt]); + + if (found) + *lockp = lck; + + return found; +} + +/** + * mark_lock - + * @lck: + * + * like above, but since we have the lock, don't search for it. + * + * Returns: int + */ +void __inline__ +mark_lock (gulm_lock_t * lck) +{ + atomic_inc (&lck->count); +} + +/** + * unmark_and_release_lock - + * @lck: + * + * decrement the counter on a lock, freeing it if it reaches 0. + * (also removes it from the hash table) + * + * TRUE if lock was freed. + * + * Returns: TRUE or FALSE + */ +int +unmark_and_release_lock (gulm_lock_t * lck) +{ + uint32_t bkt; + int deld = FALSE; + + bkt = hash_lock_key (lck->key, lck->keylen); + bkt %= gulm_cm.ltpx.hashbuckets; + spin_lock (&gulm_cm.ltpx.hshlk[bkt]); + if (atomic_dec_and_test (&lck->count)) { + list_del (&lck->gl_list); + deld = TRUE; + } + spin_unlock (&gulm_cm.ltpx.hshlk[bkt]); + if (deld) { + gulm_cm.ltpx.locks_total--; + gulm_cm.ltpx.locks_unl--; + if (lck->lvb != NULL) { + kfree (lck->lvb); + } + kfree (lck); + } + + return deld; +} + +/****************************************************************************/ + +void +gulm_key_to_lm_lockname (uint8_t * key, struct lm_lockname *lockname) +{ + (*lockname).ln_number = (u64) (key[9]) << 0; + (*lockname).ln_number |= (u64) (key[8]) << 8; + (*lockname).ln_number |= (u64) (key[7]) << 16; + (*lockname).ln_number |= (u64) (key[6]) << 24; + (*lockname).ln_number |= (u64) (key[5]) << 32; + (*lockname).ln_number |= (u64) (key[4]) << 40; + (*lockname).ln_number |= (u64) (key[3]) << 48; + (*lockname).ln_number |= (u64) (key[2]) << 56; + (*lockname).ln_type = key[1]; +} + +void +do_drop_lock_req (gulm_fs_t * fs, uint8_t state, uint8_t key[GIO_KEY_SIZE]) +{ + unsigned int type; + struct lm_lockname lockname; + /* i might want to shove most of this function into the new lockcallback + * handing queue. + * later. + */ + + /* don't do callbacks on the gulm mount lock. + * I need to someday come up with a cleaner way of seperating the + * firstmounter lock and the rest of gfs's locks. + * i duno, this first byte is pretty clean. + * */ + if (key[0] != 'G') { + return; + } + + switch (state) { + case lg_lock_state_Unlock: + type = LM_CB_DROPLOCKS; + break; + case lg_lock_state_Exclusive: + type = LM_CB_NEED_E; + break; + case lg_lock_state_Shared: + type = LM_CB_NEED_S; + break; + case lg_lock_state_Deferred: + type = LM_CB_NEED_D; + break; + default: + type = LM_CB_DROPLOCKS; + break; + } + gulm_key_to_lm_lockname (key, &lockname); + + qu_drop_req (&fs->cq, fs->cb, fs->fsdata, type, + lockname.ln_type, lockname.ln_number); +} + +/** + * send_async_reply - + * @lck: + * + * + * Returns: void + */ +void +send_async_reply (gulm_lock_t * lck) +{ + gulm_fs_t *fs = lck->fs; + struct lm_lockname lockname; + + if (lck->key[0] == 'F') { + /* whee! it is the first mounter lock. two things: + * A: gfs could care less about this. + * B: we need to up the sleeper in the fs. (hack) + */ + complete (&fs->sleep); + return; + } + + gulm_key_to_lm_lockname (lck->key, &lockname); + + qu_async_rpl (&fs->cq, fs->cb, fs->fsdata, &lockname, lck->result); +} + +/** + * send_drop_exp_inter - + * @lt: + * @name: + * + * + * Returns: int + */ +int +send_drop_exp_inter (gulm_fs_t * fs, lock_table_t * lt, char *name) +{ + int err, len; + uint8_t mask[GIO_KEY_SIZE]; + + memset (mask, 0, GIO_KEY_SIZE); + /* pack key mask */ + mask[0] = 0xff; /* minor lock type. 'G', 'F', 'J'. */ + mask[1] = 0xff; /* GFS lock type. */ + mask[2] = 0xff; /* next 8 are lock number */ + mask[3] = 0xff; + mask[4] = 0xff; + mask[5] = 0xff; + mask[6] = 0xff; + mask[7] = 0xff; + mask[8] = 0xff; + mask[9] = 0xff; + /* Now stick the fsname into the remaining space. */ + len = strlen (fs->fs_name); + strncpy (&mask[10], fs->fs_name, GIO_KEY_SIZE - 16); + len += 11; /* 10 for the encoded buf, 1 for the '\0' after the fs name */ + + err = lg_lock_drop_exp (gulm_cm.hookup, name, mask, len); + + return err; +} + +/** + * send_lock_action - + * @lck: + * + * + * Returns: int + */ +int +send_lock_action (gulm_lock_t * lck, uint8_t action) +{ + int err; + + GULM_ASSERT (lck->req_type == glck_action, dump_gulm_lock_t (lck);); + + err = lg_lock_action_req (gulm_cm.hookup, lck->key, lck->keylen, action, + lck->lvb, lck->fs->lvb_size); + if (err != 0) + log_err ("Issues sending action request. %d\n", err); + + return err; +} + +/** + * send_lock_req - + * @lck: + * + * + * Returns: int + */ +int +send_lock_req (gulm_lock_t * lck) +{ + gulm_fs_t *fs = lck->fs; + int err; + uint32_t flags = 0; + uint8_t state; + + GULM_ASSERT (lck->req_type == glck_state, dump_gulm_lock_t (lck);); + + switch (lck->req_state) { + case LM_ST_EXCLUSIVE: + state = lg_lock_state_Exclusive; + break; + case LM_ST_DEFERRED: + state = lg_lock_state_Deferred; + break; + case LM_ST_SHARED: + state = lg_lock_state_Shared; + break; + case LM_ST_UNLOCKED: + state = lg_lock_state_Unlock; + break; + default: + GULM_ASSERT (0, log_err ("fsid=%s: Anit no lock state %d.\n", + fs->fs_name, lck->req_state);); + break; + } + if (lck->flags & LM_FLAG_TRY) { + flags |= lg_lock_flag_Try; + } + if (lck->flags & LM_FLAG_TRY_1CB) { + flags |= lg_lock_flag_Try | lg_lock_flag_DoCB; + } + if (lck->flags & LM_FLAG_NOEXP) { + flags |= lg_lock_flag_IgnoreExp; + } + if (lck->flags & LM_FLAG_ANY) { + flags |= lg_lock_flag_Any; + } + if (lck->flags & LM_FLAG_PRIORITY) { + flags |= lg_lock_flag_Piority; + } + if (lck->lvb != NULL) { + print_lk_lvb (lck->key, lck->lvb, lck->req_state, "Sending"); + } + + err = lg_lock_state_req (gulm_cm.hookup, lck->key, lck->keylen, + state, flags, lck->lvb, lck->fs->lvb_size); + if (err != 0) + log_err ("Issues sending state request. %d\n", err); + + return err; +} + +/** + * toggle_lock_counters - + * + * called after a succesful request to change lock state. Decrements + * counts for what the lock was, and increments for what it is now. + */ +void +toggle_lock_counters (lock_table_t * lt, int old, int new) +{ + /* what we had it in */ + switch (old) { + case LM_ST_EXCLUSIVE: + lt->locks_exl--; + break; + case LM_ST_DEFERRED: + lt->locks_dfr--; + break; + case LM_ST_SHARED: + lt->locks_shd--; + break; + case LM_ST_UNLOCKED: + lt->locks_unl--; + break; + } + /* what we have it in */ + switch (new) { + case LM_ST_EXCLUSIVE: + lt->locks_exl++; + break; + case LM_ST_DEFERRED: + lt->locks_dfr++; + break; + case LM_ST_SHARED: + lt->locks_shd++; + break; + case LM_ST_UNLOCKED: + lt->locks_unl++; + break; + } +} + +/** + * calc_lock_result - + * @lck: + * @state: + * @error: + * @flags: + * + * This calculates the correct result to return for gfs lock requests. + * + * Returns: int + */ +int +calc_lock_result (gulm_lock_t * lck, + uint8_t state, uint32_t error, uint32_t flags) +{ + gulm_fs_t *fs = lck->fs; + lock_table_t *lt = &gulm_cm.ltpx; + int result = -69; + + /* adjust result based on success status. */ + switch (error) { + case lg_err_Ok: + /* set result to current lock state. */ + if (!(lck->flags & LM_FLAG_ANY)) { + /* simple case, we got what we asked for. */ + result = lck->req_state; + } else { + /* complex case, we got something else, but we said that was ok */ + switch (state) { + case lg_lock_state_Shared: + result = LM_ST_SHARED; + break; + case lg_lock_state_Deferred: + result = LM_ST_DEFERRED; + break; + + case lg_lock_state_Exclusive: + case lg_lock_state_Unlock: + GULM_ASSERT (0, + dump_gulm_lock_t (lck); + log_err + ("fsid=%s: lock state %d is invalid on " + "ANY flag return\n", fs->fs_name, + state); + ); + break; + + default: + GULM_ASSERT (0, + dump_gulm_lock_t (lck); + log_err_lck (lck, + "fsid=%s: Anit no lock state %d.\n", + fs->fs_name, state); + ); + break; + } + } + + /* toggle counters. + * due to ANY flag, new state may not be req_state. + * */ + toggle_lock_counters (lt, lck->cur_state, result); + + /* if no internal unlocks, it is cachable. */ + if (result != LM_ST_UNLOCKED && (flags & lg_lock_flag_Cachable)) + result |= LM_OUT_CACHEABLE; + + /* record and move on + * */ + lck->last_suc_state = result & LM_OUT_ST_MASK; + break; + case lg_err_Canceled: + result = LM_OUT_CANCELED | lck->cur_state; + break; + case lg_err_TryFailed: + result = lck->cur_state; /* if we didn't get it. */ + break; + default: + result = -error; + break; + } + + return result; +} + +/** + * my_strdup - + * @s: + * + * + * Returns: char + */ +char * +my_strdup (char *s) +{ + char *tmp; + int len; + len = strlen (s) + 1; + tmp = kmalloc (len, GFP_KERNEL); + if (tmp == NULL) + return NULL; + memcpy (tmp, s, len); + return tmp; +} + +/* Instead of directly calling the send function below, the functions will + * create of of these. + * Which exist only because I cannot stick the lock_t onto two lists + * at once. + * + * this could use some clean up. + */ +typedef struct send_req_s { + struct list_head sr_list; + enum { sr_lock, sr_act, sr_cancel, sr_drop } type; + gulm_lock_t *who; + gulm_fs_t *fs; + lock_table_t *lt; + char *name; +} send_req_t; + +/** + * alloc_send_req - + * @oid: + * + * + * Returns: send_req_t + */ +send_req_t * +alloc_send_req (void) +{ + send_req_t *tmp; + tmp = kmalloc (sizeof (send_req_t), GFP_KERNEL); + GULM_ASSERT (tmp != NULL,); /* so evil.... */ + return tmp; +} + +/** + * send_drop_exp - + * @fs: + * @lt: + * @name: + * + * + * Returns: int + */ +int +send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name) +{ + send_req_t *sr; + + sr = alloc_send_req (); + INIT_LIST_HEAD (&sr->sr_list); + sr->type = sr_drop; + sr->who = NULL; + sr->fs = fs; + sr->lt = lt; + if (name != NULL) { + sr->name = my_strdup (name); + } else { + sr->name = NULL; + } + + spin_lock (<->queue_sender); + list_add (&sr->sr_list, <->to_be_sent); + spin_unlock (<->queue_sender); + + wake_up (<->send_wchan); + return 0; +} + +/** + * add_lock_to_send_req_queue - + * @lt: + * @lck: + * + * + * Returns: void + */ +void +add_lock_to_send_req_queue (lock_table_t * lt, gulm_lock_t * lck, int type) +{ + send_req_t *sr; + + sr = alloc_send_req (); + INIT_LIST_HEAD (&sr->sr_list); + sr->type = type; + sr->who = lck; + sr->fs = NULL; + sr->lt = NULL; + sr->name = NULL; + if (type != sr_cancel) + lck->in_to_be_sent = TRUE; + + mark_lock (lck); + + spin_lock (<->queue_sender); + list_add (&sr->sr_list, <->to_be_sent); + spin_unlock (<->queue_sender); + + wake_up (<->send_wchan); +} + +/** + * queue_empty - + * @lt: + * + * + * Returns: int + */ +static __inline__ int +queue_empty (lock_table_t * lt) +{ + int ret; + spin_lock (<->queue_sender); + ret = list_empty (<->to_be_sent); + spin_unlock (<->queue_sender); + return ret; +} + +/** + * lt_io_sender_thread - + * @data: + * + * Right now, only gfs lock requests should go through this thread. + * Must look, May not even need this. + * well, it is nice to get the socket io off of what ever process the user + * is running that is going through gfs into here. ?is it? + * + * + * Returns: int + */ +int +lt_io_sender_thread (void *data) +{ + lock_table_t *lt = (lock_table_t *) data; + struct list_head *tmp; + send_req_t *sr = NULL; + int err = 0; + + daemonize ("gulm_LT_sender"); + lt->sender_task = current; + complete (<->startup); + + while (lt->running) { + do { + DECLARE_WAITQUEUE (__wait_chan, current); + current->state = TASK_INTERRUPTIBLE; + add_wait_queue (<->send_wchan, &__wait_chan); + if (queue_empty (lt)) + schedule (); + remove_wait_queue (<->send_wchan, &__wait_chan); + current->state = TASK_RUNNING; + } while (0); + if (!lt->running) + break; + + /* check to make sure socket is ok. */ + down (<->sender); + + /* pop next item to be sent + * (it will get pushed back if there was problems.) + */ + spin_lock (<->queue_sender); + if (list_empty (<->to_be_sent)) { + spin_unlock (<->queue_sender); + up (<->sender); + continue; + } + tmp = (<->to_be_sent)->prev; + list_del (tmp); + spin_unlock (<->queue_sender); + sr = list_entry (tmp, send_req_t, sr_list); + + /* send. */ + if (sr->type == sr_lock) { + err = send_lock_req (sr->who); + if (err == 0) { + sr->who->in_to_be_sent = FALSE; + unmark_and_release_lock (sr->who); + } + } else if (sr->type == sr_act) { + err = send_lock_action (sr->who, sr->who->action); + if (err == 0) { + sr->who->in_to_be_sent = FALSE; + unmark_and_release_lock (sr->who); + } + } else if (sr->type == sr_cancel) { + err = + lg_lock_cancel_req (gulm_cm.hookup, sr->who->key, + sr->who->keylen); + if (err == 0) + unmark_and_release_lock (sr->who); + } else if (sr->type == sr_drop) { + /* XXX sr->lt isn't really needed. + * just lt should be fine. + * look into it someday. + */ + err = send_drop_exp_inter (sr->fs, sr->lt, sr->name); + } else { + log_err ("Unknown send_req type! %d\n", sr->type); + } + up (<->sender); + + /* if no errors, remove from queue. */ + if (err == 0) { + if (sr->type == sr_drop && sr->name != NULL) + kfree (sr->name); + kfree (sr); + sr = NULL; + } else { + /* if errors, re-queue. + * the send_* funcs already reported the error, so we won't + * repeat that. + * */ + spin_lock (<->queue_sender); + /* reset the pointers. otherwise things get weird. */ + INIT_LIST_HEAD (&sr->sr_list); + list_add_tail (&sr->sr_list, <->to_be_sent); + spin_unlock (<->queue_sender); + + current->state = TASK_INTERRUPTIBLE; + schedule_timeout (3 * HZ); + + /* gotta break shit up. + * else this loops hard and fast. + */ + } + } /* while( lt->running ) */ + + complete (<->startup); + return 0; +} + +/** + * cancel_pending_sender - + * @lck: + * + * want to cancel a lock request that we haven't sent to the server yet. + * + * this must skip over unlock requests. (never cancel unlocks) + * + * Returns: int + */ +int +cancel_pending_sender (gulm_lock_t * lck) +{ + lock_table_t *lt = &gulm_cm.ltpx; + struct list_head *tmp, *nxt; + send_req_t *sr; + int found = FALSE; + + spin_lock (<->queue_sender); + + list_for_each_safe (tmp, nxt, <->to_be_sent) { + sr = list_entry (tmp, send_req_t, sr_list); + if (sr->who == lck) { /* good enough? */ + if (lck->req_type == sr_cancel) + continue; + if (lck->req_state == LM_ST_UNLOCKED) + continue; /*donot cancel unlocks */ + list_del (tmp); + kfree (sr); + found = TRUE; + lck->in_to_be_sent = FALSE; + + /* Now we need to tell the waiting lock req that it got canceled. + * basically, we need to fake a lg_err_Canceled return.... + */ + lck->result = LM_OUT_CANCELED | lck->cur_state; + lck->actuallypending = FALSE; + lck->req_type = glck_nothing; + atomic_dec (<->locks_pending); +#ifndef USE_SYNC_LOCKING + send_async_reply (lck); +#else + complete (&lck->actsleep); +#endif + unmark_and_release_lock (lck); + break; + } + } + + spin_unlock (<->queue_sender); + return found; +} + +/** + * gulm_lt_login_reply - + * @misc: + * @error: + * @which: + * + * + * Returns: int + */ +int +gulm_lt_login_reply (void *misc, uint32_t error, uint8_t which) +{ + if (error != 0) { + gulm_cm.ltpx.running = FALSE; + log_err ("LTPX: Got a %d from the login request.\n", error); + } else { + log_msg (lgm_Network2, "Logged into local LTPX.\n"); + } + return error; +} + +/** + * gulm_lt_logout_reply - + * @misc: + * + * + * Returns: int + */ +int +gulm_lt_logout_reply (void *misc) +{ + gulm_cm.ltpx.running = FALSE; + log_msg (lgm_Network2, "Logged out of local LTPX.\n"); + return 0; +} + +/** + * gulm_lt_lock_state - + * @misc: + * @key: + * @keylen: + * @state: + * @flags: + * @error: + * @LVB: + * @LVBlen: + * + * + * Returns: int + */ +int +gulm_lt_lock_state (void *misc, uint8_t * key, uint16_t keylen, + uint8_t state, uint32_t flags, uint32_t error, + uint8_t * LVB, uint16_t LVBlen) +{ + gulm_lock_t *lck; + + if (key[0] == 'J') { + jid_state_reply (key, keylen, LVB, LVBlen); + return 0; + } + + if (!find_and_mark_lock (key, keylen, &lck)) { + log_err_lk (key, keylen, "Got a lock state reply for a lock " + "that we don't know of. state:%#x flags:%#x error:%#x\n", + state, flags, error); + return 0; + } + + lck->result = calc_lock_result (lck, state, error, flags); + + if ((lck->result & LM_OUT_ST_MASK) != LM_ST_UNLOCKED && + lck->lvb != NULL) { + memcpy (lck->lvb, LVB, MIN (lck->fs->lvb_size, LVBlen)); + } + + lck->actuallypending = FALSE; + lck->req_type = glck_nothing; + atomic_dec (&gulm_cm.ltpx.locks_pending); +#ifndef USE_SYNC_LOCKING + send_async_reply (lck); +#else + complete (&lck->actsleep); +#endif + + if (error != 0 && error != lg_err_TryFailed && error != lg_err_Canceled) + log_msg_lck (lck, "Error: %d:%s (req:%#x rpl:%#x lss:%#x)\n", + error, gio_Err_to_str (error), + lck->req_state, state, lck->last_suc_state); + + unmark_and_release_lock (lck); + return 0; +} + +/** + * gulm_lt_lock_action - + * @misc: + * @key: + * @keylen: + * @action: + * @error: + * + * + * Returns: int + */ +int +gulm_lt_lock_action (void *misc, uint8_t * key, uint16_t keylen, + uint8_t action, uint32_t error) +{ + gulm_lock_t *lck; + + if (key[0] == 'J') { + jid_action_reply (key, keylen); + return 0; + } + + if (!find_and_mark_lock (key, keylen, &lck)) { + log_err_lk (key, keylen, "Got a lock action reply for a lock " + "that we don't know of. action:%#x error:%#x\n", + action, error); + return 0; + } + + if (action == lg_lock_act_HoldLVB || + action == lg_lock_act_UnHoldLVB || action == lg_lock_act_SyncLVB) { + /* */ + lck->result = error; + if (error != lg_err_Ok) { + log_err ("on action reply act:%d err:%d\n", action, + error); + } + lck->req_type = glck_nothing; + lck->actuallypending = FALSE; + complete (&lck->actsleep); + } else { + log_err_lck (lck, "Got strange Action %#x\n", action); + } + unmark_and_release_lock (lck); + return 0; +} + +/** + * gulm_lt_drop_lock_req - + * @misc: + * @key: + * @keylen: + * @state: + * + * + * Returns: int + */ +int +gulm_lt_drop_lock_req (void *misc, uint8_t * key, uint16_t keylen, + uint8_t state) +{ + gulm_lock_t *lck; + + if (key[0] == 'J') { + jid_header_lock_drop (key, keylen); + return 0; + } + + if (!find_and_mark_lock (key, keylen, &lck)) { + log_err_lk (key, keylen, "Got a drop lcok request for a lock " + "that we don't know of. state:%#x\n", state); + return 0; + } + + do_drop_lock_req (lck->fs, state, key); + + unmark_and_release_lock (lck); + return 0; +} + +/** + * gulm_lt_drop_all - + * @misc: + * + * + * Returns: int + */ +int +gulm_lt_drop_all (void *misc) +{ + passup_droplocks (); + return 0; +} + +/** + * gulm_lt_error - + * @misc: + * @err: + * + * + * Returns: int + */ +int +gulm_lt_error (void *misc, uint32_t err) +{ + log_err ("LTPX: RANDOM ERROR %d\n", err); + return err; +} + +static lg_lockspace_callbacks_t lock_cb = { + login_reply:gulm_lt_login_reply, + logout_reply:gulm_lt_logout_reply, + lock_state:gulm_lt_lock_state, + lock_action:gulm_lt_lock_action, + drop_lock_req:gulm_lt_drop_lock_req, + drop_all:gulm_lt_drop_all, + error:gulm_lt_error +}; + +/** + * lt_io_recving_thread - + * @data: + * + * + * Returns: int + */ +int +lt_io_recving_thread (void *data) +{ + lock_table_t *lt = &gulm_cm.ltpx; + int err; + + daemonize ("gulm_LT_recver"); + lt->recver_task = current; + complete (<->startup); + + while (lt->running) { + err = lg_lock_handle_messages (gulm_cm.hookup, &lock_cb, NULL); + if (err != 0) { + log_err ("gulm_LT_recver err %d\n", err); + lt->running = FALSE; /* should stop the sender thread. */ + wake_up (<->send_wchan); + break; + } + } /* while( lt->running ) */ + + complete (<->startup); + return 0; +} + +/** + * lt_logout - log out of all of the lock tables + */ +void +lt_logout (void) +{ + lock_table_t *lt = &gulm_cm.ltpx; + int err; + + if (lt->running) { + lt->running = FALSE; + + /* stop sender thread */ + wake_up (<->send_wchan); + wait_for_completion (<->startup); + + /* stop recver thread */ + down (<->sender); + err = lg_lock_logout (gulm_cm.hookup); + up (<->sender); + + /* wait for thread to finish */ + wait_for_completion (<->startup); + } + +} + +/** + * lt_login - login to lock tables. + * + * Returns: int + */ +int +lt_login (void) +{ + int err; + lock_table_t *lt = &gulm_cm.ltpx; + + if (lt->running) + log_err + ("Trying to log into LTPX when it appears to be logged in!\n"); + + err = lg_lock_login (gulm_cm.hookup, "GFS "); + if (err != 0) { + log_err ("Failed to send login request. %d\n", err); + goto fail; + } + + /* start recver thread. */ + lt->running = TRUE; + err = kernel_thread (lt_io_recving_thread, lt, 0); + if (err < 0) { + log_err ("Failed to start gulm_lt_IOd. (%d)\n", err); + goto fail; + } + wait_for_completion (<->startup); + + /* start sender thread */ + err = kernel_thread (lt_io_sender_thread, lt, 0); + if (err < 0) { + log_err ("Failed to start gulm_LT_sender. (%d)\n", err); + goto fail; + } + wait_for_completion (<->startup); + + return 0; + fail: + lt_logout (); + log_msg (lgm_Always, "Exiting lt_login. err:%d\n", err); + return err; +} + +/****************************************************************************/ + +/** + * internal_gulm_get_lock - + * @fs: + * @key: + * @keylen: + * @lockp: + * + * + * Returns: 0 on success, -EXXX on failure + */ +int +internal_gulm_get_lock (gulm_fs_t * fs, uint8_t * key, uint8_t keylen, + gulm_lock_t ** lockp) +{ + int found = FALSE; + uint32_t bkt; + gulm_lock_t *lck = NULL; + + found = find_and_mark_lock (key, keylen, &lck); + + /* malloc space */ + if (found) { + GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,); + } else { + lck = kmalloc (sizeof (gulm_lock_t), GFP_KERNEL); + if (lck == NULL) { + log_err + ("fsid=%s: Out of memory for lock struct in get_lock!\n", + fs->fs_name); + return -ENOMEM; + } + memset (lck, 0, sizeof (gulm_lock_t)); + INIT_LIST_HEAD (&lck->gl_list); + atomic_set (&lck->count, 1); + lck->magic_one = 0xAAAAAAAA; + lck->fs = fs; + memcpy (lck->key, key, keylen); + lck->keylen = keylen; + lck->lvb = NULL; + init_completion (&lck->actsleep); + lck->actuallypending = FALSE; + lck->in_to_be_sent = FALSE; + lck->result = 0; + lck->action = -1; + lck->req_type = glck_nothing; + lck->last_suc_state = LM_ST_UNLOCKED; + + gulm_cm.ltpx.locks_total++; + gulm_cm.ltpx.locks_unl++; + + bkt = hash_lock_key (key, keylen); + bkt %= gulm_cm.ltpx.hashbuckets; + + spin_lock (&gulm_cm.ltpx.hshlk[bkt]); + list_add (&lck->gl_list, &gulm_cm.ltpx.lkhsh[bkt]); + spin_unlock (&gulm_cm.ltpx.hshlk[bkt]); + } + + *lockp = lck; + + return 0; +} + +/** + * gulm_get_lock - + * @lockspace: + * @name: + * @lockp: + * + * Returns: 0 on success, -EXXX on failure + */ +int +gulm_get_lock (lm_lockspace_t * lockspace, struct lm_lockname *name, + lm_lock_t ** lockp) +{ + int err, len; + gulm_fs_t *fs = (gulm_fs_t *) lockspace; + uint8_t key[GIO_KEY_SIZE]; + + /* i could add a per fs lock to force only one gulm_get_lock at a time. + */ + down (&fs->get_lock); + + memset (key, 0, GIO_KEY_SIZE); + /* pack lockname */ + key[0] = 'G'; /* G: fs lock, F: First mounter, J: JID mapping lock */ + key[1] = name->ln_type & 0xff; + key[2] = (name->ln_number >> 56) & 0xff; + key[3] = (name->ln_number >> 48) & 0xff; + key[4] = (name->ln_number >> 40) & 0xff; + key[5] = (name->ln_number >> 32) & 0xff; + key[6] = (name->ln_number >> 24) & 0xff; + key[7] = (name->ln_number >> 16) & 0xff; + key[8] = (name->ln_number >> 8) & 0xff; + key[9] = (name->ln_number >> 0) & 0xff; + + /* Now stick the fsname into the remaining space. */ + len = strlen (fs->fs_name); + strncpy (&key[10], fs->fs_name, GIO_KEY_SIZE - 16); + + len = MIN (len, GIO_KEY_SIZE - 16); + len += 11; /* 10 for the encoded buf, 1 for the '\0' after the fs name */ + err = internal_gulm_get_lock (fs, key, len, (gulm_lock_t **) lockp); + + up (&fs->get_lock); + + return err; +} + +/** + * gulm_put_lock - + * @lock: + * + * + * Returns: void + */ +void +gulm_put_lock (lm_lock_t * lock) +{ + gulm_lock_t *lck = (gulm_lock_t *) lock; + lock_table_t *lt = &gulm_cm.ltpx; + gulm_fs_t *fs = lck->fs; + + down (&fs->get_lock); + + GULM_ASSERT (lt != NULL,); + + if (lck->last_suc_state != LM_ST_UNLOCKED) { + log_err_lck (lck, + "fsid=%s: gulm_put_lock called on a lock that is not unlocked!" + " Current state:%#x\n", lck->fs->fs_name, + lck->last_suc_state); + /* I'm still not sure about this one. We should never see it, so I + * don't think it is that big of a deal, but i duno. + * + * Maybe should just make it an assertion. + * + * with the mark/unmark code, is it even a concern? + */ + } + + unmark_and_release_lock (lck); + /* lck = NULL; */ + + up (&fs->get_lock); + +} + +static int +valid_trasition (unsigned int cur, unsigned int req) +{ + int lock_state_changes[16] = { /* unl exl def shr */ + FALSE, TRUE, TRUE, TRUE, /* unl */ + TRUE, FALSE, TRUE, TRUE, /* exl */ + TRUE, TRUE, FALSE, TRUE, /* def */ + TRUE, TRUE, TRUE, FALSE /* shr */ + }; + GULM_ASSERT (cur < 4 + && req < 4, log_err ("cur:%d req:%d\n", cur, req);); + + return (lock_state_changes[4 * cur + req]); +} + +/** + * verify_gulm_lock_t - + * @lck: + * + * wonder if I should add some other checks. + * + * Returns: int + */ +int +verify_gulm_lock_t (gulm_lock_t * lck) +{ + if (lck == NULL) { + log_err ("Lock pointer was NULL!\n"); + return -1; + } + if (lck->fs == NULL) { + log_err ("This lock has no filesystem!!!\n"); + return -1; + } + return 0; +} + +/** + * gulm_lock - + * @lock: + * @cur_state: + * @req_state: + * @flags: + * + * + * Returns: int + */ +unsigned int +gulm_lock (lm_lock_t * lock, unsigned int cur_state, + unsigned int req_state, unsigned int flags) +{ + gulm_lock_t *lck = NULL; + gulm_fs_t *fs; + lock_table_t *lt; + + /* verify vars. */ + lck = (gulm_lock_t *) lock; + if (verify_gulm_lock_t (lck) != 0) { + return -EINVAL; + } + lt = &gulm_cm.ltpx; + fs = lck->fs; + + GULM_ASSERT (valid_trasition (cur_state, req_state), + log_err_lck (lck, "want %d with %s thinks:%d\n", req_state, + (LM_FLAG_TRY & flags) ? "try" : (LM_FLAG_NOEXP + & flags) ? + "noexp" : "no flags", cur_state); + ); + + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck);); + + /* save the details of this request. */ + lck->req_type = glck_state; + lck->result = 0; + lck->cur_state = cur_state; + lck->req_state = req_state; + lck->flags = flags; + + /* moving these here fixes a race on the s390 that ben found. + * basically, the request was sent to the server, the server receives + * it, the server processes, the server sends a reply, the client + * receives the reply, and the client tries to processe the reply before + * this thread could mark it as actuallypending. + * */ + lck->actuallypending = TRUE; + atomic_inc (<->locks_pending); + add_lock_to_send_req_queue (lt, lck, sr_lock); + + lt->lops++; +#ifdef USE_SYNC_LOCKING + wait_for_completion (&lck->actsleep); +#endif + +#ifdef USE_SYNC_LOCKING + return lck->result; +#else + return LM_OUT_ASYNC; +#endif +} + +/** + * gulm_unlock - + * @lock: + * @cur_state: + * + * + * Returns: int + */ +unsigned int +gulm_unlock (lm_lock_t * lock, unsigned int cur_state) +{ + int e; + e = gulm_lock (lock, cur_state, LM_ST_UNLOCKED, 0); + return e; +} + +/** + * gulm_cancel - + * @lock: + * + */ +void +gulm_cancel (lm_lock_t * lock) +{ + gulm_lock_t *lck; + gulm_fs_t *fs; + lock_table_t *lt; + + /* verify vars. */ + lck = (gulm_lock_t *) lock; + if (verify_gulm_lock_t (lck) != 0) { + return; + } + lt = &gulm_cm.ltpx; + fs = lck->fs; + + if (lck->actuallypending) { + if (lck->in_to_be_sent) { + /* this should pull the req out of the send queue and have it + * return with a cancel code without going to the server. + */ + cancel_pending_sender (lck); + } else { + add_lock_to_send_req_queue (lt, lck, sr_cancel); + } + } else { + log_msg_lck (lck, "Cancel called with no pending request.\n"); + } + +} + +/** + * gulm_hold_lvb - + * @lock: + * @lvbp: + * + * + * Returns: 0 on success, -EXXX on failure + */ +int +gulm_hold_lvb (lm_lock_t * lock, char **lvbp) +{ + gulm_lock_t *lck; + gulm_fs_t *fs; + lock_table_t *lt; + int err = -1; + + /* verify vars. */ + lck = (gulm_lock_t *) lock; + if (verify_gulm_lock_t (lck) != 0) { + return -EINVAL; + } + lt = &gulm_cm.ltpx; + fs = lck->fs; + + /* what where these for? */ + GULM_ASSERT (lck->magic_one == 0xAAAAAAAA, + log_msg_lck (lck, "Bad gulm_lock magic.\n");); + GULM_ASSERT (lt->magic_one == 0xAAAAAAAA, + log_msg_lck (lck, "Bad lock_table magic.\n");); + + lvb_log_msg_lk (lck->key, "Entering gulm_hold_lvb\n"); + + GULM_ASSERT (lck->lvb == NULL, + log_msg_lck (lck, + "fsid=%s: Lvb data wasn't null! must be held " + "already.\n", fs->fs_name); + ); + + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck);); + + lck->lvb = kmalloc (fs->lvb_size, GFP_KERNEL); + if (lck->lvb == NULL) { + err = -ENOMEM; + goto fail; + } + memset (lck->lvb, 0, fs->lvb_size); + + lck->req_type = glck_action; + lck->action = lg_lock_act_HoldLVB; + lck->result = 0; + lck->actuallypending = TRUE; + add_lock_to_send_req_queue (lt, lck, sr_act); + + wait_for_completion (&lck->actsleep); + + if (lck->result != lg_err_Ok) { + log_err ("fsid=%s: Got error %d on hold lvb request.\n", + fs->fs_name, lck->result); + kfree (lck->lvb); + lck->lvb = NULL; + goto fail; + } + + lt->locks_lvbs++; + + *lvbp = lck->lvb; + + lvb_log_msg_lk (lck->key, "fsid=%s: Exiting gulm_hold_lvb\n", + fs->fs_name); + return 0; + fail: + if (err != 0) + log_msg (lgm_Always, + "fsid=%s: Exiting gulm_hold_lvb with errors (%d)\n", + fs->fs_name, err); + return err; +} + +/** + * gulm_unhold_lvb - + * @lock: + * @lvb: + * + * + * Returns: void + */ +void +gulm_unhold_lvb (lm_lock_t * lock, char *lvb) +{ + gulm_lock_t *lck = NULL; + gulm_fs_t *fs; + lock_table_t *lt; + + /* verify vars. */ + lck = (gulm_lock_t *) lock; + if (verify_gulm_lock_t (lck) != 0) { + return; + } + lt = &gulm_cm.ltpx; + fs = lck->fs; + + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck);); + + if (lck->lvb != lvb) { + log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n", + fs->fs_name, lck->lvb, lvb); + goto exit; + } + + lvb_log_msg_lk (lck->key, "Entering gulm_unhold_lvb\n"); + + lck->req_type = glck_action; + lck->action = lg_lock_act_UnHoldLVB; + lck->result = 0; + lck->actuallypending = TRUE; + add_lock_to_send_req_queue (lt, lck, sr_act); + + wait_for_completion (&lck->actsleep); + + /* XXX ummm, is it sane to not free the memory if the command fails? + * gfs will still think that the lvb was dropped sucessfully.... + * (it assumes it is always sucessful) + * Maybe I should retry the drop request then? + */ + if (lck->result != lg_err_Ok) { + log_err ("fsid=%s: Got error %d on unhold LVB request.\n", + lck->fs->fs_name, lck->result); + } else { + if (lck->lvb != NULL) + kfree (lck->lvb); + lck->lvb = NULL; + lt->locks_lvbs--; + } + exit: + lvb_log_msg ("Exiting gulm_unhold_lvb\n"); +} + +/** + * gulm_sync_lvb - + * @lock: + * @lvb: + * + * umm, is this even used anymore? yes. + * + * Returns: void + */ +void +gulm_sync_lvb (lm_lock_t * lock, char *lvb) +{ + gulm_lock_t *lck = NULL; + gulm_fs_t *fs; + lock_table_t *lt; + + /* verify vars. */ + lck = (gulm_lock_t *) lock; + if (verify_gulm_lock_t (lck) != 0) { + return; + } + lt = &gulm_cm.ltpx; + fs = lck->fs; + + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck);); + + /* this check is also in the server, so it isn't really needed here. */ + if (lck->last_suc_state != LM_ST_EXCLUSIVE) { + log_err ("sync_lvb: You must hold the lock Exclusive first.\n"); + goto exit; /*cannot do anything */ + } + if (lck->lvb == NULL) { + log_err ("sync_lvb: You forgot to call hold lvb first.\n"); + goto exit; + } + if (lck->lvb != lvb) { + log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n", + fs->fs_name, lck->lvb, lvb); + goto exit; + } + + lvb_log_msg_lk (lck->key, "Entering gulm_sync_lvb\n"); + + lck->req_type = glck_action; + lck->action = lg_lock_act_SyncLVB; + lck->result = 0; + lck->actuallypending = TRUE; + add_lock_to_send_req_queue (lt, lck, sr_act); + + wait_for_completion (&lck->actsleep); + + /* XXX? retry if I get an error? */ + if (lck->result != lg_err_Ok) { + log_err_lck (lck, + "fsid=%s: Got error %d:%s on Sync LVB request.\n", + fs->fs_name, lck->result, + gio_Err_to_str (lck->result)); + } + exit: + lvb_log_msg ("Exiting gulm_sync_lvb\n"); +} + +/*****************************************************************************/ +static int +gulm_plock_get (lm_lockspace_t * lockspace, + struct lm_lockname *name, unsigned long owner, + uint64_t * start, uint64_t * end, int *exclusive, + unsigned long *rowner) +{ + return -ENOSYS; +} + +static int +gulm_plock (lm_lockspace_t * lockspace, + struct lm_lockname *name, unsigned long owner, + int wait, int exclusive, uint64_t start, uint64_t end) +{ + return -ENOSYS; +} + +static int +gulm_punlock (lm_lockspace_t * lockspace, + struct lm_lockname *name, unsigned long owner, + uint64_t start, uint64_t end) +{ + return -ENOSYS; +} + +/****************************************************************************/ +/****************************************************************************/ +/****************************************************************************/ +/* should move the firstmounter lock stuff into its own file perhaps? */ +/** + * get_special_lock - + * @fs: <> filesystem we're getting special lock for + * + * Returns: gulm_lock_t + */ +STATIC gulm_lock_t * +get_special_lock (gulm_fs_t * fs) +{ + int err, len; + gulm_lock_t *lck = NULL; + uint8_t key[GIO_KEY_SIZE]; + + /* pack lockname */ + memset (key, 0, GIO_KEY_SIZE); + /* The F at the beginning doesn't mash with the G that prefixes every fs + * lock. + */ + memcpy (key, "FirstMount", 10); + len = strlen (fs->fs_name); + strncpy (&key[10], fs->fs_name, GIO_KEY_SIZE - 21); + len = MIN (len, GIO_KEY_SIZE - 21); + len += 11; + + err = internal_gulm_get_lock (fs, key, len, &lck); + + /* return pointer */ + return lck; +} + +/** + * do_lock_time_out - + * @d: + * + * after timeout, set cancel request on the handler queue. (since we cannot + * call it from within the timer code. + * + */ +static void +do_lock_time_out (unsigned long d) +{ + gulm_lock_t *lck = (gulm_lock_t *) d; + qu_function_call (&lck->fs->cq, gulm_cancel, lck); +} + +/** + * get_mount_lock - + * @fs: + * @first: + * + * Get the Firstmount lock. + * We try to grab it Exl. IF we get that, then we are the first client + * mounting this fs. Otherwise we grab it shared to show that there are + * clients using this fs. + * + * Returns: int + */ +int +get_mount_lock (gulm_fs_t * fs, int *first) +{ + int err; + struct timer_list locktimeout; + gulm_lock_t *lck = NULL; + /* + * first we need to get the lock into the hash. + * then we can try to get it Exl with try and noexp. + * if the try fails, grab it shared. + */ + + lck = get_special_lock (fs); /* there is only a mount lock. */ + if (lck == NULL) { + err = -ENOMEM; + goto fail; + } + + fs->mountlock = lck; + try_it_again: + *first = FALSE; /* assume we're not first */ + + err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_EXCLUSIVE, + LM_FLAG_TRY | LM_FLAG_NOEXP); +#ifndef USE_SYNC_LOCKING + wait_for_completion (&fs->sleep); +#endif + + if ((lck->result & LM_OUT_ST_MASK) == LM_ST_EXCLUSIVE) { + /* we got the lock, we're the first mounter. */ + *first = TRUE; + log_msg (lgm_locking, "fsid=%s: Got mount lock Exclusive.\n", + fs->fs_name); + return 0; + } else if ((lck->result & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { + log_msg (lgm_locking, + "fsid=%s: Didn't get mount lock Exl, someone else " + "was first, trying for shared.\n", fs->fs_name); + + /* the try failed, pick it up shared. */ + /* There was a case (bug #220) where we could hang here. + * + * To handle this, we put up a timer for a couple of + * minutes. That if it trips, it cancels our shared + * request. Which we then see, so we go back and try the + * EXL again. If the Firstmounter is fine and is just + * taking a damn long time to do its work, this just ends + * back here, no worse for the wear. + * + * Another way to do this, is to wait for a killed message + * for the master. When we get that, && we're pending + * shared here, send the gulm_canel for the mounter lock. + * (too bad we are not in the fs list yet at this point. + * (well, maybe that *isn't* a bad thing)) + */ + init_timer (&locktimeout); + locktimeout.function = do_lock_time_out; + locktimeout.data = (unsigned long) lck; + mod_timer (&locktimeout, jiffies + (120 * HZ)); + err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_SHARED, 0); +#ifndef USE_SYNC_LOCKING + wait_for_completion (&fs->sleep); +#endif + del_timer (&locktimeout); + + if ((lck->result & LM_OUT_ST_MASK) == LM_ST_SHARED) { + /* kewl we got it. */ + log_msg (lgm_locking, + "fsid=%s: Got mount lock shared.\n", + fs->fs_name); + return 0; + } + + log_msg (lgm_locking, + "fsid=%s: Shared req timed out, trying Exl again.\n", + fs->fs_name); + goto try_it_again; + } + fail: + log_err ("Exit get_mount_lock err=%d\n", err); + return err; +} + +/** + * downgrade_mount_lock - + * @fs: + * + * drop the Firstmount lock down to shared. This lets other mount. + * + * Returns: int + */ +int +downgrade_mount_lock (gulm_fs_t * fs) +{ + int err; + gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock; + /* we were first, so we have it exl. + * shift it to shared so others may mount. + */ + err = gulm_lock (lck, LM_ST_EXCLUSIVE, LM_ST_SHARED, LM_FLAG_NOEXP); +#ifndef USE_SYNC_LOCKING + wait_for_completion (&fs->sleep); +#endif + + if ((lck->result & LM_OUT_ST_MASK) != LM_ST_SHARED) { + log_err + ("fsid=%s: Couldn't downgrade mount lock to shared!!!!!\n", + fs->fs_name); + } + return 0; +} + +/** + * drop_mount_lock - drop our hold on the firstmount lock. + * @fs: <> the filesystem pointer. + * + * Returns: int + */ +int +drop_mount_lock (gulm_fs_t * fs) +{ + int err; + gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock; + + if (fs->mountlock == NULL) { + log_err ("fsid=%s: There's no Mount lock!!!!!\n", fs->fs_name); + return -1; + } + err = gulm_unlock (lck, LM_ST_SHARED); +#ifndef USE_SYNC_LOCKING + wait_for_completion (&fs->sleep); +#endif + + if (lck->result != LM_ST_UNLOCKED) + log_err ("fsid=%s: Couldn't unlock mount lock!!!!!!\n", + fs->fs_name); + gulm_put_lock (fs->mountlock); + fs->mountlock = NULL; + return 0; +} + +/*****************************************************************************/ +struct lm_lockops gulm_ops = { + lm_proto_name:PROTO_NAME, + lm_mount:gulm_mount, + lm_others_may_mount:gulm_others_may_mount, + lm_unmount:gulm_unmount, + lm_get_lock:gulm_get_lock, + lm_put_lock:gulm_put_lock, + lm_lock:gulm_lock, + lm_unlock:gulm_unlock, + lm_cancel:gulm_cancel, + lm_hold_lvb:gulm_hold_lvb, + lm_unhold_lvb:gulm_unhold_lvb, + lm_sync_lvb:gulm_sync_lvb, + lm_plock_get:gulm_plock_get, + lm_plock:gulm_plock, + lm_punlock:gulm_punlock, + lm_recovery_done:gulm_recovery_done, + lm_owner:THIS_MODULE, +}; +/* vim: set ai cin noet sw=8 ts=8 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h --- linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,45 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __gulm_prints_h__ +#define __gulm_prints_h__ +#include "gulm_log_msg_bits.h" + +#define PROTO_NAME "lock_gulm" + +#ifdef GULM_ASSERT +#undef GULM_ASSERT +#endif +#define GULM_ASSERT(x, do) \ +{ \ + if (!(x)) \ + { \ + printk("\n"PROTO_NAME": Assertion failed on line %d of file %s\n" \ + PROTO_NAME": assertion: \"%s\"\n", \ + __LINE__, __FILE__, #x ); \ + {do} \ + panic("\n"PROTO_NAME": Record message above and reboot.\n"); \ + } \ +} + +#define log_msg(v, fmt, args...) if(((v)&gulm_cm.verbosity)==(v)||(v)==lgm_Always) {\ + printk(PROTO_NAME ": " fmt, ## args); \ +} +#define log_err(fmt, args...) {\ + printk(KERN_ERR PROTO_NAME ": ERROR " fmt, ## args); \ +} + +#define log_nop(fmt, args...) +#define TICK printk("TICK==>" PROTO_NAME ": [%s:%d] pid:%ld\n",__FILE__,__LINE__,osi_pid()) + +#endif /*__gulm_prints_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.c linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.c --- linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.c 2004-06-16 12:03:21.957894998 -0500 @@ -0,0 +1,165 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" +#include +#include +#include "util.h" + +extern gulm_cm_t gulm_cm; + +struct proc_dir_entry *gulm_proc_dir; +struct proc_dir_entry *gulm_fs_proc_dir; + +/* the read operating function. */ +int +gulm_fs_proc_read (char *buf, char **start, off_t off, int count, int *eof, + void *data) +{ + gulm_fs_t *fs = (gulm_fs_t *) data; + count = 0; /* ignore how much it wants */ + + count += sprintf (buf + count, "Filesystem: %s\nJID: %d\n" + "handler_queue_cur: %d\n" + "handler_queue_max: %d\n", + fs->fs_name, fs->fsJID, + fs->cq.task_count, fs->cq.task_max); + + *eof = TRUE; + if (off >= count) + return 0; + *start = buf + off; + return (count - off); +} + +/* read the stuff for all */ +int +gulm_core_proc_read (char *buf, char **start, off_t off, int count, + int *eof, void *data) +{ + count = 0; /* ignore how much it wants */ + + count = sprintf (buf, + "cluster id: %s\n" + "my name: %s\n", gulm_cm.clusterID, gulm_cm.myName); + + *eof = TRUE; + if (off >= count) + return 0; + *start = buf + off; + return (count - off); +} + +int +gulm_lt_proc_read (char *buf, char **start, off_t off, int count, + int *eof, void *data) +{ + lock_table_t *lt = (lock_table_t *) data; + count = 0; /* ignore how much it wants */ + + count += sprintf (buf + count, "\n" + "lock counts:\n" + " total: %d\n" + " unl: %d\n" + " exl: %d\n" + " shd: %d\n" + " dfr: %d\n" + "pending: %d\n" + " lvbs: %d\n" + " lops: %d\n\n", + lt->locks_total, + lt->locks_unl, + lt->locks_exl, + lt->locks_shd, + lt->locks_dfr, + atomic_read (<->locks_pending), + lt->locks_lvbs, lt->lops); + + *eof = TRUE; + if (off >= count) + return 0; + *start = buf + off; + return (count - off); +} + +/* add entry to our proc folder + * call this on mount. + * */ +int +add_to_proc (gulm_fs_t * fs) +{ + if (!(create_proc_read_entry (fs->fs_name, S_IFREG | S_IRUGO, + gulm_fs_proc_dir, gulm_fs_proc_read, + (void *) fs))) { + log_err ("couldn't register proc entry for %s\n", fs->fs_name); + return -EINVAL; + } + return 0; +} + +/* get rid of it + * this on umount. + * */ +void +remove_from_proc (gulm_fs_t * fs) +{ + remove_proc_entry (fs->fs_name, gulm_fs_proc_dir); +} + + /* create our own root dir. + * initmodule + * */ +int +init_proc_dir (void) +{ + if ((gulm_proc_dir = proc_mkdir ("gulm", &proc_root)) == NULL) { + log_err ("cannot create the gulm directory in /proc\n"); + return -EINVAL; + } + if (!(create_proc_read_entry ("core", S_IFREG | S_IRUGO, gulm_proc_dir, + gulm_core_proc_read, NULL))) { + log_err ("couldn't register proc entry for core\n"); + remove_proc_entry ("gulm", &proc_root); + return -EINVAL; + } + if ((gulm_fs_proc_dir = + proc_mkdir ("filesystems", gulm_proc_dir)) == NULL) { + log_err + ("cannot create the filesystems directory in /proc/gulm\n"); + remove_proc_entry ("core", gulm_proc_dir); + remove_proc_entry ("gulm", &proc_root); + return -EINVAL; + } + if (!(create_proc_read_entry ("lockspace", S_IFREG | S_IRUGO, + gulm_proc_dir, gulm_lt_proc_read, + (void *) &gulm_cm.ltpx))) { + remove_proc_entry ("filesystems", gulm_proc_dir); + remove_proc_entry ("core", gulm_proc_dir); + remove_proc_entry ("gulm", &proc_root); + return -EINVAL; + } + + return 0; +} + +/* destroy it + * close module + * */ +void +remove_proc_dir (void) +{ + remove_proc_entry ("lockspace", gulm_proc_dir); + remove_proc_entry ("filesystems", gulm_proc_dir); + remove_proc_entry ("core", gulm_proc_dir); + remove_proc_entry ("gulm", &proc_root); +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.h linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.h --- linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.h 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,22 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __procinfo_h__ +#define __procinfo_h__ +int add_to_proc (gulm_fs_t * fs); +void remove_from_proc (gulm_fs_t * fs); +void remove_locktables_from_proc (void); +void add_locktables_to_proc (void); +int init_proc_dir (void); +void remove_proc_dir (void); +#endif /*__procinfo_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.c linux-patched/fs/gfs_locking/lock_gulm/handler.c --- linux-orig/fs/gfs_locking/lock_gulm/handler.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/handler.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,343 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" + +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include + +#include "handler.h" + +/* things about myself + * mostly just for verbosity here. + * */ +extern gulm_cm_t gulm_cm; + +/* the task struct */ +typedef struct runtask_s { + struct list_head rt_list; + + gulm_fn fn; + lm_callback_t cb; + lm_fsdata_t *fsdata; + int type; + uint64_t lmnum; + unsigned int lmtype; + int result; + +} runtask_t; +/* ooo crufty. */ +#define LM_CB_GULM_FN 169 +#if LM_CB_GULM_FN == LM_CB_NEED_E || \ + LM_CB_GULM_FN == LM_CB_NEED_D || \ + LM_CB_GULM_FN == LM_CB_NEED_S || \ + LM_CB_GULM_FN == LM_CB_NEED_RECOVERY || \ + LM_CB_GULM_FN == LM_CB_DROPLOCKS || \ + LM_CB_GULM_FN == LM_CB_ASYNC +#error "LM_CB_GULM_FN collision with other LM_CB_*" +#endif + +static __inline__ int +queue_empty (callback_qu_t * cq) +{ + int ret; + spin_lock (&cq->list_lock); + ret = list_empty (&cq->run_tasks); + spin_unlock (&cq->list_lock); + return ret; +} + +/** + * handler - + * @d: + * + * + * Returns: int + */ +int +handler (void *d) +{ + callback_qu_t *cq = (callback_qu_t *) d; + runtask_t *rt; + struct list_head *tmp; + struct lm_lockname lockname; + struct lm_async_cb acb; + + daemonize ("gulm_Cb_Handler"); + atomic_inc (&cq->num_threads); + complete (&cq->startup); + + while (cq->running) { + do { + DECLARE_WAITQUEUE (__wait_chan, current); + current->state = TASK_INTERRUPTIBLE; + add_wait_queue (&cq->waiter, &__wait_chan); + if (queue_empty (cq)) + schedule (); + remove_wait_queue (&cq->waiter, &__wait_chan); + current->state = TASK_RUNNING; + } while (0); + + if (!cq->running) + break; + /* remove item from list */ + spin_lock (&cq->list_lock); + if (list_empty (&cq->run_tasks)) { + spin_unlock (&cq->list_lock); + continue; /* nothing here. move on */ + } + /* take items off the end of the list, since we add them to the + * beginning. + */ + tmp = (&cq->run_tasks)->prev; + list_del (tmp); + cq->task_count--; + spin_unlock (&cq->list_lock); + + rt = list_entry (tmp, runtask_t, rt_list); + + if (rt->type == LM_CB_ASYNC) { + acb.lc_name.ln_number = rt->lmnum; + acb.lc_name.ln_type = rt->lmtype; + acb.lc_ret = rt->result; + rt->cb (rt->fsdata, rt->type, &acb); + } else if (rt->type == LM_CB_GULM_FN) { + rt->fn (rt->fsdata); + } else { + lockname.ln_number = rt->lmnum; + lockname.ln_type = rt->lmtype; + rt->cb (rt->fsdata, rt->type, &lockname); + } + + kfree (rt); + + } /*while(running) */ + + atomic_dec (&cq->num_threads); + complete (&cq->startup); + return 0; +} + +/** + * display_handler_queue - + * @cq: + * + * remember, items are added to the head, and removed from the tail. + * So the last item listed, is the next item to be handled. + * + */ +void +display_handler_queue (callback_qu_t * cq) +{ + struct list_head *lltmp; + runtask_t *rt; + int i = 0; + log_msg (lgm_Always, "Dumping Handler queue with %d items, max %d\n", + cq->task_count, cq->task_max); + spin_lock (&cq->list_lock); + list_for_each (lltmp, &cq->run_tasks) { + rt = list_entry (lltmp, runtask_t, rt_list); + if (rt->type == LM_CB_ASYNC) { + log_msg (lgm_Always, + "%4d ASYNC (%" PRIu64 ", %u) result:%#x\n", + i, rt->lmnum, rt->lmtype, rt->result); + } else if (rt->type == LM_CB_GULM_FN) { + log_msg (lgm_Always, "%4d GULM FN func:%p data:%p\n", + i, rt->fn, rt->fsdata); + } else { /* callback. */ + log_msg (lgm_Always, + "%4d CALLBACK req:%u (%" PRIu64 ", %u)\n", i, + rt->type, rt->lmnum, rt->lmtype); + } + i++; + } + spin_unlock (&cq->list_lock); +} + +/** + * alloc_runtask - + * Returns: runtask_t + */ +runtask_t * +alloc_runtask (void) +{ + runtask_t *rt; + rt = kmalloc (sizeof (runtask_t), GFP_KERNEL); + return rt; +} + +/** + * qu_function_call - + * @cq: + * @fn: + * @data: + * + * Generic function execing on the handler thread. Mostly so I can add + * single things quick without having to build all the details into the + * handler queues. + * + * Returns: int + */ +int +qu_function_call (callback_qu_t * cq, gulm_fn fn, void *data) +{ + runtask_t *rt; + rt = alloc_runtask (); + if (rt == NULL) + return -ENOMEM; + rt->cb = NULL; + rt->fn = fn; + rt->fsdata = data; + rt->type = LM_CB_GULM_FN; + rt->lmtype = 0; + rt->lmnum = 0; + rt->result = 0; + INIT_LIST_HEAD (&rt->rt_list); + spin_lock (&cq->list_lock); + list_add (&rt->rt_list, &cq->run_tasks); + cq->task_count++; + if (cq->task_count > cq->task_max) + cq->task_max = cq->task_count; + spin_unlock (&cq->list_lock); + wake_up (&cq->waiter); + return 0; +} + +/** + * qu_async_rpl - + * @cq: + * @cb: + * @fsdata: + * @lockname: + * @result: + * + * + * Returns: int + */ +int +qu_async_rpl (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata, + struct lm_lockname *lockname, int result) +{ + runtask_t *rt; + rt = alloc_runtask (); + if (rt == NULL) + return -ENOMEM; + rt->cb = cb; + rt->fsdata = fsdata; + rt->type = LM_CB_ASYNC; + rt->lmtype = lockname->ln_type; + rt->lmnum = lockname->ln_number; + rt->result = result; + INIT_LIST_HEAD (&rt->rt_list); + spin_lock (&cq->list_lock); + list_add (&rt->rt_list, &cq->run_tasks); + cq->task_count++; + if (cq->task_count > cq->task_max) + cq->task_max = cq->task_count; + spin_unlock (&cq->list_lock); + wake_up (&cq->waiter); + return 0; +} + +/** + * qu_drop_req - + * + * Returns: <0:Error; =0:Ok + */ +int +qu_drop_req (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata, + int type, uint8_t lmtype, uint64_t lmnum) +{ + runtask_t *rt; + rt = alloc_runtask (); + if (rt == NULL) + return -ENOMEM; + rt->cb = cb; + rt->fsdata = fsdata; + rt->type = type; + rt->lmtype = lmtype; + rt->lmnum = lmnum; + rt->result = 0; + INIT_LIST_HEAD (&rt->rt_list); + spin_lock (&cq->list_lock); + list_add (&rt->rt_list, &cq->run_tasks); + cq->task_count++; + if (cq->task_count > cq->task_max) + cq->task_max = cq->task_count; + spin_unlock (&cq->list_lock); + wake_up (&cq->waiter); + return 0; +} + +/** + * stop_callback_qu - stop the handler thread + */ +void +stop_callback_qu (callback_qu_t * cq) +{ + struct list_head *lltmp, *tmp; + runtask_t *rt; + + if (cq->running) { + cq->running = FALSE; + /* make sure all thread stop. + * */ + while (atomic_read (&cq->num_threads) > 0) { + wake_up (&cq->waiter); + wait_for_completion (&cq->startup); + } + /* clear out any left overs. */ + list_for_each_safe (tmp, lltmp, &cq->run_tasks) { + rt = list_entry (tmp, runtask_t, rt_list); + list_del (tmp); + kfree (rt); + } + } +} + +/** + * start_callback_qu - + * + * Returns: <0:Error, >=0:Ok + */ +int +start_callback_qu (callback_qu_t * cq, int cnt) +{ + int err; + INIT_LIST_HEAD (&cq->run_tasks); + spin_lock_init (&cq->list_lock); + init_completion (&cq->startup); + init_waitqueue_head (&cq->waiter); + atomic_set (&cq->num_threads, 0); + cq->running = TRUE; + cq->task_count = 0; + cq->task_max = 0; + if (cnt <= 0) + cnt = 2; + for (; cnt > 0; cnt--) { + err = kernel_thread (handler, cq, 0); /* XXX linux part */ + if (err < 0) { + stop_callback_qu (cq); + /* calling stop here might not behave correctly in all error + * cases. + */ + return err; + } + wait_for_completion (&cq->startup); + } + return 0; +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.h linux-patched/fs/gfs_locking/lock_gulm/handler.h --- linux-orig/fs/gfs_locking/lock_gulm/handler.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/handler.h 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,42 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __handler_c__ +#define __handler_c__ +#include + +struct callback_qu_s { + struct completion startup; + int running; + int task_count; + int task_max; + struct list_head run_tasks; + spinlock_t list_lock; + wait_queue_head_t waiter; + atomic_t num_threads; +}; +typedef struct callback_qu_s callback_qu_t; + +/* kinda an excess overloading */ +typedef void (*gulm_fn) (void *); +int qu_function_call (callback_qu_t * cq, gulm_fn fn, void *data); + +int qu_async_rpl (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata, + struct lm_lockname *lockname, int result); +int qu_drop_req (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata, + int type, uint8_t lmtype, uint64_t lmnum); +int start_callback_qu (callback_qu_t * cq, int cnt); +void stop_callback_qu (callback_qu_t * cq); +void display_handler_queue (callback_qu_t * cq); + +#endif /*__handler_c__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_core.c linux-patched/fs/gfs_locking/lock_gulm/lg_core.c --- linux-orig/fs/gfs_locking/lock_gulm/lg_core.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_core.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,724 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* All of the core related functions for services are here. */ + +#include "lg_priv.h" + +/** + * lg_core_selector - + * @ulm_interface_p: + * + * + * Returns: int + */ +xdr_socket +lg_core_selector (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + /* make sure it is a gulm_interface_p. */ + if (lg == NULL || lg->first_magic != LGMAGIC + || lg->last_magic != LGMAGIC) +#ifdef __KERNEL__ + return NULL; +#else + return -EINVAL; +#endif + + return lg->core_fd; +} + +/** + * lg_core_handle_messages - + * @ulm_interface_p: + * @lg_core_callbacks_t: + * + * + * Returns: int + */ +int +lg_core_handle_messages (gulm_interface_p lgp, lg_core_callbacks_t * ccbp, + void *misc) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_dec_t *dec; + int err = 0; + uint64_t x_gen; + uint32_t x_code, x_error, x_rank; + struct in6_addr x_ip; + uint8_t x_state, x_mode; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_enc == NULL || lg->core_dec == NULL) + return -EBADR; + + down (&lg->core_recver); + if (lg->in_core_hm) + return -EDEADLK; + lg->in_core_hm = TRUE; + up (&lg->core_recver); + + dec = lg->core_dec; + + err = xdr_dec_uint32 (dec, &x_code); + if (err != 0) + goto exit; + + if (gulm_core_login_rpl == x_code) { + do { + if ((err = xdr_dec_uint64 (dec, &x_gen)) < 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_error)) < 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_rank)) < 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) < 0) + break; + } while (0); + if (err != 0) + goto exit; + if (ccbp->login_reply == NULL) { + err = 0; + goto exit; + } + err = ccbp->login_reply (misc, x_gen, x_error, x_rank, x_state); + goto exit; + } else if (gulm_core_logout_rpl == x_code) { + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0) + goto exit; + if (ccbp->logout_reply != NULL) { + err = ccbp->logout_reply (misc); + } + + xdr_close (&lg->core_fd); + xdr_enc_release (lg->core_enc); + lg->core_enc = NULL; + xdr_dec_release (lg->core_dec); + lg->core_dec = NULL; + + goto exit; + } else if (gulm_core_mbr_lstrpl == x_code) { + if (ccbp->nodelist != NULL) { + err = ccbp->nodelist (misc, lglcb_start, NULL, 0, 0); + if (err != 0) + goto exit; + } + do { + if ((err = xdr_dec_list_start (dec)) != 0) + break; + while (xdr_dec_list_stop (dec) != 0) { + if ((err = + xdr_dec_string_ag (dec, &lg->cfba, + &lg->cfba_len)) != 0) + break; + if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_rank)) != 0) + break; + if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0) + break; + if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0) + break; + if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0) + break; + + if (ccbp->nodelist != NULL) { + err = + ccbp->nodelist (misc, lglcb_item, + lg->cfba, &x_ip, + x_state); + if (err != 0) + goto exit; + } + + } + } while (0); + if (err != 0) { + goto exit; + } + if (ccbp->nodelist == NULL) { + err = 0; + goto exit; + } + err = ccbp->nodelist (misc, lglcb_stop, NULL, 0, 0); + goto exit; + } else if (gulm_core_state_chgs == x_code) { + do { + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + if (x_state == gio_Mbr_ama_Slave) { + if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0) + break; + if ((err = + xdr_dec_string_ag (dec, &lg->cfba, + &lg->cfba_len)) != 0) + break; + } + } while (0); + if (err != 0) { + goto exit; + } + if (ccbp->statechange == NULL) { + err = 0; + goto exit; + } + err = ccbp->statechange (misc, x_state, &x_ip, lg->cfba); + goto exit; + } else if (gulm_core_mbr_updt == x_code) { + do { + if ((err = + xdr_dec_string_ag (dec, &lg->cfba, + &lg->cfba_len)) != 0) + break; + if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + } while (0); + if (err != 0) { + goto exit; + } + if (ccbp->nodechange == NULL) { + err = 0; + goto exit; + } + err = ccbp->nodechange (misc, lg->cfba, &x_ip, x_state); + goto exit; + } else if (gulm_core_res_list == x_code) { + if (ccbp->service_list != NULL) { + if ((err = + ccbp->service_list (misc, lglcb_start, NULL)) != 0) + goto exit; + } + do { + if ((err = xdr_dec_list_start (dec)) != 0) + break; + while (xdr_dec_list_stop (dec)) { + if ((err = + xdr_dec_string_ag (dec, &lg->cfba, + &lg->cfba_len)) != 0) + break; + if (ccbp->service_list != NULL) { + if ((err = + ccbp->service_list (misc, + lglcb_item, + lg->cfba)) != + 0) { + goto exit; + } + } + } + } while (0); + if (err != 0) { + goto exit; + } + if (ccbp->service_list == NULL) { + err = 0; + goto exit; + } + err = ccbp->service_list (misc, lglcb_stop, NULL); + goto exit; + } else if (gulm_info_stats_rpl == x_code) { + if (ccbp->status != NULL) { + if ((err = + ccbp->status (misc, lglcb_start, NULL, NULL)) != 0) + goto exit; + } + do { + if ((err = xdr_dec_list_start (dec)) != 0) + break; + while (xdr_dec_list_stop (dec) != 0) { + if ((err = + xdr_dec_string_ag (dec, &lg->cfba, + &lg->cfba_len)) != 0) + break; + if ((err = + xdr_dec_string_ag (dec, &lg->cfbb, + &lg->cfbb_len)) != 0) + break; + if (ccbp->status != NULL) { + if ((err = + ccbp->status (misc, lglcb_item, + lg->cfba, + lg->cfbb)) != 0) { + goto exit; + } + } + } + } while (0); + if (err != 0) { + goto exit; + } + if (ccbp->status == NULL) { + err = 0; + goto exit; + } + err = ccbp->status (misc, lglcb_stop, NULL, NULL); + goto exit; + } else if (gulm_err_reply == x_code) { + if ((err = xdr_dec_uint32 (dec, &x_code)) != 0) + goto exit; + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0) + goto exit; + if (ccbp->error == NULL) { + err = 0; + goto exit; + } + err = ccbp->error (misc, x_error); + goto exit; + } else { + /* unknown code. what to do? */ + err = -EPROTO; + goto exit; + } + + exit: + lg->in_core_hm = FALSE; + return err; +} + +/** + * lg_core_login - + * @lgp: + * @important: + * + * On any error, things are closed and released to the state of things + * before you called login. + * + * Returns: int + */ +int +lg_core_login (gulm_interface_p lgp, int important) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + struct sockaddr_in6 adr; + int err; + xdr_socket cfd; + xdr_enc_t *enc; + xdr_dec_t *dec; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + adr.sin6_family = AF_INET6; + adr.sin6_addr = in6addr_loopback; + adr.sin6_port = htons (lg->core_port); + + if ((err = xdr_open (&cfd)) < 0) { + return err; + } + + if ((err = xdr_connect (&adr, cfd)) < 0) { + xdr_close (&cfd); + return err; + } + + enc = xdr_enc_init (cfd, 128); + if (enc == NULL) { + xdr_close (&cfd); + return -ENOMEM; + } + + dec = xdr_dec_init (cfd, 128); + if (enc == NULL) { + xdr_enc_release (enc); + xdr_close (&cfd); + return -ENOMEM; + } + + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_reslgn_req)) < 0) + break; + if ((err = xdr_enc_uint32 (enc, GIO_WIREPROT_VERS)) < 0) + break; + if ((err = xdr_enc_string (enc, lg->clusterID)) < 0) + break; + if ((err = xdr_enc_string (enc, lg->service_name)) < 0) + break; + if ((err = + xdr_enc_uint32 (enc, + important ? gulm_svc_opt_important : 0)) != + 0) + break; + if ((err = xdr_enc_flush (enc)) < 0) + break; + } while (0); + if (err != 0) { + xdr_dec_release (dec); + xdr_enc_release (enc); + xdr_close (&cfd); + return err; + } + + down (&lg->core_sender); + lg->core_fd = cfd; + lg->core_enc = enc; + lg->core_dec = dec; + up (&lg->core_sender); + + return 0; +} + +/** + * lg_core_logout - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_logout (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_logout_req)) != 0) + break; + if ((err = xdr_enc_string (enc, lg->service_name)) != 0) + break; + if ((err = xdr_enc_uint8 (enc, gio_Mbr_ama_Resource)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_nodeinfo - + * @lgp: + * @nodename: + * + * + * Returns: int + */ +int +lg_core_nodeinfo (gulm_interface_p lgp, char *nodename) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + if (nodename == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_req)) != 0) + break; + if ((err = xdr_enc_string (enc, nodename)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_nodelist - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_nodelist (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_lstreq)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_servicelist - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_servicelist (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_res_req)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_corestate - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_corestate (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_state_req)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_shutdown - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_shutdown (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_shutdown)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_forceexpire - + * @lgp: + * @node_name: + * + * + * Returns: int + */ +int +lg_core_forceexpire (gulm_interface_p lgp, char *nodename) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + if (nodename == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_force)) != 0) + break; + if ((err = xdr_enc_string (enc, nodename)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_forcepending - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_forcepending (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_core_forcepend)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} + +/** + * lg_core_status - + * @lgp: + * + * + * Returns: int + */ +int +lg_core_status (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL) + return -EINVAL; + + enc = lg->core_enc; + + down (&lg->core_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->core_sender); + return err; +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c --- linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,667 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* all of the lock related fucntion are here. */ +#include "lg_priv.h" + +/** + * lg_lock_selector - + * @ulm_interface_p: + * + * + * Returns: int + */ +xdr_socket +lg_lock_selector (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + /* make sure it is a gulm_interface_p. */ + if (lg == NULL || lg->first_magic != LGMAGIC + || lg->last_magic != LGMAGIC) +#ifdef __KERNEL__ + return NULL; +#else + return -EINVAL; +#endif + + return lg->lock_fd; +} + +/** + * lg_lock_handle_messages - + * @ulm_interface_p: + * @lg_lockspace_callbacks_t: + * + * Returns: int + */ +int +lg_lock_handle_messages (gulm_interface_p lgp, lg_lockspace_callbacks_t * cbp, + void *misc) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_dec_t *dec; + int err = 0; + uint32_t x_code, x_error, x_flags; + uint16_t x_keylen, x_lvblen = 0; + uint8_t x_state; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->core_enc == NULL || lg->core_dec == NULL) + return -EBADR; + + down (&lg->lock_recver); + if (lg->in_lock_hm) + return -EDEADLK; + lg->in_lock_hm = TRUE; + up (&lg->lock_recver); + + dec = lg->lock_dec; + + err = xdr_dec_uint32 (dec, &x_code); + if (err != 0) + goto exit; + + if (gulm_lock_login_rpl == x_code) { + do { + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + } while (0); + if (err != 0) + goto exit; + if (cbp->login_reply == NULL) { + err = 0; + goto exit; + } + err = cbp->login_reply (misc, x_error, x_state); + goto exit; + } else if (gulm_lock_logout_rpl == x_code) { + if (cbp->logout_reply != NULL) { + err = cbp->logout_reply (misc); + } + + xdr_close (&lg->lock_fd); + xdr_enc_release (lg->lock_enc); + lg->lock_enc = NULL; + xdr_dec_release (lg->lock_dec); + lg->lock_dec = NULL; + + goto exit; + } else if (gulm_lock_state_rpl == x_code) { + do { + if ((err = + xdr_dec_raw_ag (dec, (void **) &lg->lfba, + &lg->lfba_len, &x_keylen)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_flags)) != 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0) + break; + if (x_flags & gio_lck_fg_hasLVB) { + if ((err = + xdr_dec_raw_ag (dec, (void **) &lg->lfbb, + &lg->lfbb_len, + &x_lvblen)) != 0) + break; + } + } while (0); + if (err != 0) { + goto exit; + } + if (x_keylen <= 4) { + err = -EPROTO; /* or something */ + goto exit; + } + if (cbp->lock_state == NULL) { + err = 0; + goto exit; + } + err = cbp->lock_state (misc, &lg->lfba[4], x_keylen - 4, + x_state, x_flags, x_error, + lg->lfbb, x_lvblen); + goto exit; + } else if (gulm_lock_action_rpl == x_code) { + do { + if ((err = + xdr_dec_raw_ag (dec, (void **) &lg->lfba, + &lg->lfba_len, &x_keylen)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0) + break; + } while (0); + if (err != 0) { + goto exit; + } + if (x_keylen <= 4) { + err = -EPROTO; /* or something */ + goto exit; + } + if (cbp->lock_action == NULL) { + err = 0; + goto exit; + } + err = + cbp->lock_action (misc, &lg->lfba[4], x_keylen - 4, x_state, + x_error); + goto exit; + } else if (gulm_lock_cb_state == x_code) { + do { + if ((err = + xdr_dec_raw_ag (dec, (void **) &lg->lfba, + &lg->lfba_len, &x_keylen)) != 0) + break; + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0) + break; + } while (0); + if (err != 0) { + goto exit; + } + if (cbp->drop_lock_req == NULL) { + err = 0; + goto exit; + } + err = + cbp->drop_lock_req (misc, &lg->lfba[4], x_keylen - 4, + x_state); + goto exit; + } else if (gulm_lock_cb_dropall == x_code) { + if (cbp->drop_all == NULL) { + err = 0; + goto exit; + } + err = cbp->drop_all (misc); + goto exit; + } else if (gulm_info_stats_rpl == x_code) { + if (cbp->status != NULL) { + if ((err = + cbp->status (misc, lglcb_start, NULL, NULL)) != 0) + goto exit; + } + do { + if ((err = xdr_dec_list_start (dec)) != 0) + break; + while (xdr_dec_list_stop (dec) != 0) { + if ((err = + xdr_dec_string_ag (dec, &lg->lfba, + &lg->lfba_len)) != 0) + break; + if ((err = + xdr_dec_string_ag (dec, &lg->lfbb, + &lg->lfbb_len)) != 0) + break; + if (cbp->status != NULL) { + if ((err = + cbp->status (misc, lglcb_item, + lg->lfba, + lg->lfbb)) != 0) { + break; + } + } + } + } while (0); + if (err != 0) { + goto exit; + } + if (cbp->status == NULL) { + err = 0; + goto exit; + } + err = cbp->status (misc, lglcb_stop, NULL, NULL); + goto exit; + } else if (gulm_err_reply == x_code) { + do { + if ((err = xdr_dec_uint32 (dec, &x_code)) != 0) + break; + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0) + break; + } while (0); + if (err != 0) + goto exit; + if (cbp->error == NULL) { + err = 0; + goto exit; + } + err = cbp->error (misc, x_error); + goto exit; + } else { + err = -EPROTO; + goto exit; + } + + exit: + lg->in_lock_hm = FALSE; + return err; +} + +/** + * lg_lock_login - + * @ulm_interface_p: + * @4: + * + * + * Returns: int + */ +int +lg_lock_login (gulm_interface_p lgp, uint8_t lockspace[4]) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + struct sockaddr_in6 adr; + int err; + xdr_socket cfd; + xdr_enc_t *enc; + xdr_dec_t *dec; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + adr.sin6_family = AF_INET6; + adr.sin6_addr = in6addr_loopback; + adr.sin6_port = htons (lg->lock_port); + + if ((err = xdr_open (&cfd)) < 0) { + return err; + } + + if ((err = xdr_connect (&adr, cfd)) < 0) { + xdr_close (&cfd); + return err; + } + + enc = xdr_enc_init (cfd, 512); + if (enc == NULL) { + xdr_close (&cfd); + return -ENOMEM; + } + + dec = xdr_dec_init (cfd, 512); + if (enc == NULL) { + xdr_enc_release (enc); + xdr_close (&cfd); + return -ENOMEM; + } + + do { + if ((err = xdr_enc_uint32 (enc, gulm_lock_login_req)) < 0) + break; + if ((err = xdr_enc_uint32 (enc, GIO_WIREPROT_VERS)) < 0) + break; + if ((err = xdr_enc_string (enc, lg->service_name)) < 0) + break; + if ((err = xdr_enc_uint8 (enc, gio_lck_st_Client)) < 0) + break; + if ((err = xdr_enc_flush (enc)) < 0) + break; + + if ((err = xdr_enc_uint32 (enc, gulm_lock_sel_lckspc)) < 0) + break; + if ((err = xdr_enc_raw (enc, lockspace, 4)) < 0) + break; + /* don't flush here. + * dumb programmer stunt. This way, the lockspace selection won't + * happen until the next thing the user of this lib sends. Which + * means it will be after we have received the login reply. + * + * Is there really a good reason not to flush here? + */ + } while (0); + if (err != 0) { + xdr_dec_release (dec); + xdr_enc_release (enc); + xdr_close (&cfd); + return err; + } + + down (&lg->lock_sender); + lg->lock_fd = cfd; + lg->lock_enc = enc; + lg->lock_dec = dec; + + memcpy (lg->lockspace, lockspace, 4); + up (&lg->lock_sender); + + return 0; +} + +/** + * lg_lock_logout - + * @ulm_interface_p: + * + * + * Returns: int + */ +int +lg_lock_logout (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL) + return -EINVAL; + + enc = lg->lock_enc; + + down (&lg->lock_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_lock_logout_req)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->lock_sender); + return err; +} + +/** + * lg_lock_state_req - + * @lgp: + * @key: + * @keylen: + * @state: + * @flags: + * @LVB: + * @LVBlen: + * + * + * Returns: int + */ +int +lg_lock_state_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen, + uint8_t state, uint32_t flags, uint8_t * LVB, + uint16_t LVBlen) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + struct iovec iov[2]; + xdr_enc_t *enc; + uint32_t iflgs = 0; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL) + return -EINVAL; + + if (state != lg_lock_state_Unlock && + state != lg_lock_state_Exclusive && + state != lg_lock_state_Deferred && state != lg_lock_state_Shared) + return -EINVAL; + + /* make sure only the accepted flags get passed through. */ + if ((flags & lg_lock_flag_DoCB) == lg_lock_flag_DoCB) + iflgs |= lg_lock_flag_DoCB; + if ((flags & lg_lock_flag_Try) == lg_lock_flag_Try) + iflgs |= lg_lock_flag_Try; + if ((flags & lg_lock_flag_Any) == lg_lock_flag_Any) + iflgs |= lg_lock_flag_Any; + if ((flags & lg_lock_flag_IgnoreExp) == lg_lock_flag_IgnoreExp) + iflgs |= lg_lock_flag_IgnoreExp; + if ((flags & lg_lock_flag_Piority) == lg_lock_flag_Piority) + iflgs |= lg_lock_flag_Piority; + + enc = lg->lock_enc; + + if (LVB != NULL && LVBlen > 0) + iflgs |= gio_lck_fg_hasLVB; + + iov[0].iov_base = lg->lockspace; + iov[0].iov_len = 4; + iov[1].iov_base = key; + iov[1].iov_len = keylen; + + down (&lg->lock_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_lock_state_req)) != 0) + break; + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0) + break; + if ((err = xdr_enc_uint8 (enc, state)) != 0) + break; + if ((err = xdr_enc_uint32 (enc, iflgs)) != 0) + break; + if (iflgs & gio_lck_fg_hasLVB) + if ((err = xdr_enc_raw (enc, LVB, LVBlen)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->lock_sender); + return err; +} + +/** + * lg_lock_cancel_req - + * @lgp: + * @key: + * @keylen: + * + * + * Returns: int + */ +int +lg_lock_cancel_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + struct iovec iov[2]; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL) + return -EINVAL; + + enc = lg->lock_enc; + + iov[0].iov_base = lg->lockspace; + iov[0].iov_len = 4; + iov[1].iov_base = key; + iov[1].iov_len = keylen; + + down (&lg->lock_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_lock_action_req)) != 0) + break; + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0) + break; + if ((err = xdr_enc_uint8 (enc, gio_lck_st_Cancel)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->lock_sender); + return err; +} + +/** + * lg_lock_action_req - + * @lgp: + * @key: + * @keylen: + * @action: + * @LVB: + * @LVBlen: + * + * XXX + * I wonder if I should actually break this into three seperate calls for + * the lvb stuff. Does it really matter? + * + * Returns: int + */ +int +lg_lock_action_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen, + uint8_t action, uint8_t * LVB, uint16_t LVBlen) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + struct iovec iov[2]; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL) + return -EINVAL; + + if (action != lg_lock_act_HoldLVB && + action != lg_lock_act_UnHoldLVB && action != lg_lock_act_SyncLVB) + return -EINVAL; + + enc = lg->lock_enc; + + iov[0].iov_base = lg->lockspace; + iov[0].iov_len = 4; + iov[1].iov_base = key; + iov[1].iov_len = keylen; + + down (&lg->lock_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_lock_action_req)) != 0) + break; + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0) + break; + if ((err = xdr_enc_uint8 (enc, action)) != 0) + break; + if (action == gio_lck_st_SyncLVB) + if ((err = xdr_enc_raw (enc, LVB, LVBlen)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->lock_sender); + return err; +} + +/** + * lg_lock_drop_exp - + * @ulm_interface_p: + * @holder: + * @keymask: + * @kmlen: + * + * holder is the node name of the expired holder that you want to clear. + * Only locks matching the keymask will be looked at. (most of the time you + * will just set key to a bunch of 0xff to match all) The keymask lets you + * basically subdivide your lockspace into smaller seperate parts. + * (example, there is one gfs lockspace, but each filesystem gets its own + * subpart of that larger space) + * + * If holder is NULL, all expired holders in your lockspace will get + * dropped. + * + * Returns: int + */ +int +lg_lock_drop_exp (gulm_interface_p lgp, uint8_t * holder, uint8_t * key, + uint16_t keylen) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + struct iovec iov[2]; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL) + return -EINVAL; + + enc = lg->lock_enc; + + iov[0].iov_base = lg->lockspace; + iov[0].iov_len = 4; + iov[1].iov_base = key; + iov[1].iov_len = (key != NULL) ? keylen : 0; + + down (&lg->lock_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_lock_drop_exp)) != 0) + break; + if ((err = xdr_enc_string (enc, holder)) != 0) + break; + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->lock_sender); + return err; +} + +/** + * lg_lock_status - + * @lgp: + * + * + * Returns: int + */ +int +lg_lock_status (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + xdr_enc_t *enc; + int err; + + /* make sure it is a gulm_interface_p. */ + if (lg == NULL) + return -EINVAL; + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL) + return -EINVAL; + + enc = lg->lock_enc; + + down (&lg->lock_sender); + do { + if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0) + break; + if ((err = xdr_enc_flush (enc)) != 0) + break; + } while (0); + up (&lg->lock_sender); + return err; +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_main.c linux-patched/fs/gfs_locking/lock_gulm/lg_main.c --- linux-orig/fs/gfs_locking/lock_gulm/lg_main.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_main.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,209 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* This is where all of the library specific functions exist. + * Not many, but keeps things clean. + */ + +#include "lg_priv.h" +#include "gulm.h" +extern gulm_cm_t gulm_cm; + +/** + * lg_initialize - + * @gulm_interface_p: + * @cluster_name: + * @service_name: + * + * if returning an error, nothing was done to the value of gulm_interface_p + * + * Returns: gulm_interface_p + */ +int +lg_initialize (gulm_interface_p * ret, char *cluster_name, char *service_name) +{ + gulm_interface_t *lg; + int err, len; + + lg = kmalloc (sizeof (gulm_interface_t), GFP_KERNEL); + if (lg == NULL) + return -ENOMEM; + + memset (lg, 0, sizeof (gulm_interface_t)); + lg->first_magic = LGMAGIC; + lg->last_magic = LGMAGIC; + + if (cluster_name == NULL) + cluster_name = "cluster"; + len = strlen (cluster_name) + 1; + lg->clusterID = kmalloc (len, GFP_KERNEL); + if (lg->clusterID == NULL) { + err = -ENOMEM; + goto fail_nomem; + } + memcpy (lg->clusterID, cluster_name, len); + + len = strlen (service_name) + 1; + lg->service_name = kmalloc (len, GFP_KERNEL); + if (lg->service_name == NULL) { + err = -ENOMEM; + goto fail_nomem; + } + memcpy (lg->service_name, service_name, len); + + /* set up flutter bufs. */ + lg->cfba_len = 64; + lg->cfba = kmalloc (lg->cfba_len, GFP_KERNEL); + if (lg->cfba == NULL) { + err = -ENOMEM; + goto fail_nomem; + } + + lg->cfbb_len = 64; + lg->cfbb = kmalloc (lg->cfbb_len, GFP_KERNEL); + if (lg->cfbb == NULL) { + err = -ENOMEM; + goto fail_nomem; + } + + lg->lfba_len = 128; + lg->lfba = kmalloc (lg->lfba_len, GFP_KERNEL); + if (lg->lfba == NULL) { + err = -ENOMEM; + goto fail_nomem; + } + + lg->lfbb_len = 128; + lg->lfbb = kmalloc (lg->lfbb_len, GFP_KERNEL); + if (lg->lfbb == NULL) { + err = -ENOMEM; + goto fail_nomem; + } + + /* setup mutexes */ + init_MUTEX (&lg->core_sender); + init_MUTEX (&lg->core_recver); + init_MUTEX (&lg->lock_sender); + init_MUTEX (&lg->lock_recver); + + lg->core_port = 40040; + lg->lock_port = 40042; + + *ret = lg; + return 0; + fail_nomem: + if (lg->clusterID != NULL) + kfree (lg->clusterID); + if (lg->service_name != NULL) + kfree (lg->service_name); + if (lg->cfba != NULL) + kfree (lg->cfba); + if (lg->cfbb != NULL) + kfree (lg->cfbb); + if (lg->lfba != NULL) + kfree (lg->lfba); + if (lg->lfbb != NULL) + kfree (lg->lfbb); + kfree (lg); + return err; +} + +/** + * lg_release - + * @lg: + * + */ +void +lg_release (gulm_interface_p lgp) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + if (lgp == NULL) + return; + /* make sure it is a gulm_interface_p. */ + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return; + + if (lg->service_name != NULL) + kfree (lg->service_name); + if (lg->clusterID != NULL) + kfree (lg->clusterID); + + /* wonder if I should send a logout packet? */ + if (lg->core_enc != NULL) + xdr_enc_release (lg->core_enc); + if (lg->core_dec != NULL) + xdr_dec_release (lg->core_dec); + xdr_close (&lg->core_fd); + + if (lg->lock_enc != NULL) + xdr_enc_release (lg->lock_enc); + if (lg->lock_dec != NULL) + xdr_dec_release (lg->lock_dec); + xdr_close (&lg->lock_fd); + + if (lg->cfba != NULL) + kfree (lg->cfba); + if (lg->cfbb != NULL) + kfree (lg->cfbb); + if (lg->lfba != NULL) + kfree (lg->lfba); + if (lg->lfbb != NULL) + kfree (lg->lfbb); + + kfree (lg); +} + +/** + * lg_set_core_port - + * @lgp: + * @new: + * + * + * Returns: int + */ +int +lg_set_core_port (gulm_interface_p lgp, uint16_t new) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + if (lgp == NULL) + return -EINVAL; + /* make sure it is a gulm_interface_p. */ + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + lg->core_port = new; + return 0; +} + +/** + * lg_set_ltpx_port - + * @lgp: + * @new: + * + * + * Returns: int + */ +int +lg_set_lock_port (gulm_interface_p lgp, uint16_t new) +{ + gulm_interface_t *lg = (gulm_interface_t *) lgp; + if (lgp == NULL) + return -EINVAL; + /* make sure it is a gulm_interface_p. */ + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC) + return -EINVAL; + + lg->lock_port = new; + + return 0; +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h --- linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,86 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __lg_priv_h__ +#define __lg_priv_h__ +/* private details that we don't want to give the users of this lib access + * to go here. + */ + +#ifdef __linux__ +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#endif /*__linux__*/ + +#include "xdr.h" +#include "gio_wiretypes.h" +#include "libgulm.h" + +#define LGMAGIC (0x474d4354) + +struct gulm_interface_s { + /* since we've masked this to a void* to the users, it is a nice safty + * net to put a little magic in here so we know things stay good. + */ + uint32_t first_magic; + + /* WHAT IS YOUR NAME?!? */ + char *service_name; + + char *clusterID; + + uint16_t core_port; + xdr_socket core_fd; + xdr_enc_t *core_enc; + xdr_dec_t *core_dec; + struct semaphore core_sender; + struct semaphore core_recver; + int in_core_hm; + + uint16_t lock_port; + xdr_socket lock_fd; + xdr_enc_t *lock_enc; + xdr_dec_t *lock_dec; + struct semaphore lock_sender; + struct semaphore lock_recver; + int in_lock_hm; + uint8_t lockspace[4]; + + /* in the message recver func, we read data into these buffers and pass + * them to the callback function. This way we avoid doinf mallocs and + * frees on every callback. + */ + uint16_t cfba_len; + uint8_t *cfba; + uint16_t cfbb_len; + uint8_t *cfbb; + uint16_t lfba_len; + uint8_t *lfba; + uint16_t lfbb_len; + uint8_t *lfbb; + + uint32_t last_magic; +}; +typedef struct gulm_interface_s gulm_interface_t; + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +#endif /*__lg_priv_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/libgulm.h linux-patched/fs/gfs_locking/lock_gulm/libgulm.h --- linux-orig/fs/gfs_locking/lock_gulm/libgulm.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/libgulm.h 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,191 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __libgulm_h__ +#define __libgulm_h__ + +/* bit messy, but we need this to be rather seemless in both kernel and + * userspace. and this seems the easiest way to do it. + */ + +#ifdef __linux__ +#include +typedef struct socket *lg_socket; +#endif /*__linux__*/ + +typedef void *gulm_interface_p; + +/* mallocs the interface structure. + */ +int lg_initialize (gulm_interface_p *, char *cluster_name, char *service_name); +/* frees struct. + */ +void lg_release (gulm_interface_p); + +/* Determins where we are with a itemlist callback */ +typedef enum { lglcb_start, lglcb_item, lglcb_stop } lglcb_t; + +/****** Core specifics ******/ + +/* leaving a callback pointer as NULL, will cause that message type to + * be ignored. */ +typedef struct lg_core_callbacks_s { + int (*login_reply) (void *misc, uint64_t gen, uint32_t error, + uint32_t rank, uint8_t corestate); + int (*logout_reply) (void *misc); + int (*nodelist) (void *misc, lglcb_t type, char *name, + struct in6_addr * ip, uint8_t state); + int (*statechange) (void *misc, uint8_t corestate, + struct in6_addr * masterip, char *mastername); + int (*nodechange) (void *misc, char *nodename, + struct in6_addr * nodeip, uint8_t nodestate); + int (*service_list) (void *misc, lglcb_t type, char *service); + int (*status) (void *misc, lglcb_t type, char *key, char *value); + int (*error) (void *misc, uint32_t err); +} lg_core_callbacks_t; + +/* this will trigger a callback from gulm_core_callbacks_t + * handles one message! Either stick this inside of a thread, + * or in a poll()/select() loop using the function below. + * This will block until there is a message sent from core. + */ +int lg_core_handle_messages (gulm_interface_p, lg_core_callbacks_t *, + void *misc); + +/* this returns the filedescriptor that the library is using to + * communicate with the core. This is only for using in a poll() + * or select() call to avoid having the gulm_core_handle_messages() + * call block. + */ +lg_socket lg_core_selector (gulm_interface_p); + +/* Queue requests. */ +int lg_core_login (gulm_interface_p, int important); +int lg_core_logout (gulm_interface_p); +int lg_core_nodeinfo (gulm_interface_p, char *nodename); +int lg_core_nodelist (gulm_interface_p); +int lg_core_servicelist (gulm_interface_p); +int lg_core_corestate (gulm_interface_p); + +/* for completeness mostly. */ +int lg_core_shutdown (gulm_interface_p); +int lg_core_forceexpire (gulm_interface_p, char *node_name); +int lg_core_forcepending (gulm_interface_p); + +int lg_core_status (gulm_interface_p); + +/* Node states + * First three are actual states, as well as changes. Last is only a node + * change message. + * */ +#define lg_core_Logged_in (0x05) +#define lg_core_Logged_out (0x06) +#define lg_core_Expired (0x07) +#define lg_core_Fenced (0x08) +/* Core states */ +#define lg_core_Slave (0x01) +#define lg_core_Master (0x02) +#define lg_core_Pending (0x03) +#define lg_core_Arbitrating (0x04) +#define lg_core_Client (0x06) + +/****** lock space specifics *****/ +/* note that this library masks out the lock table seperation. + */ + +typedef struct lg_lockspace_callbacks_s { + int (*login_reply) (void *misc, uint32_t error, uint8_t which); + int (*logout_reply) (void *misc); + int (*lock_state) (void *misc, uint8_t * key, uint16_t keylen, + uint8_t state, uint32_t flags, uint32_t error, + uint8_t * LVB, uint16_t LVBlen); + int (*lock_action) (void *misc, uint8_t * key, uint16_t keylen, + uint8_t action, uint32_t error); + int (*cancel_reply) (void *misc, uint8_t * key, uint16_t keylen, + uint32_t error); + int (*drop_lock_req) (void *misc, uint8_t * key, uint16_t keylen, + uint8_t state); + int (*drop_all) (void *misc); + int (*status) (void *misc, lglcb_t type, char *key, char *value); + int (*error) (void *misc, uint32_t err); +} lg_lockspace_callbacks_t; + +/* Like the core handle messages function, but for the lockspace. + * Handles one message, blocks. + */ + +int lg_lock_handle_messages (gulm_interface_p, lg_lockspace_callbacks_t *, + void *misc); + +/* this returns the filedescriptor that the library is using to + * communicate with the ltpx. This is only for using in a poll() + * or select() call to avoid having the gulm_lock_handle_messages() + * call block. + */ +lg_socket lg_lock_selector (gulm_interface_p); + +/* Lockspace request calls */ +int lg_lock_login (gulm_interface_p, uint8_t lockspace[4]); +int lg_lock_logout (gulm_interface_p); +int lg_lock_state_req (gulm_interface_p, uint8_t * key, uint16_t keylen, + uint8_t state, uint32_t flags, uint8_t * LVB, + uint16_t LVBlen); +int lg_lock_cancel_req (gulm_interface_p, uint8_t * key, uint16_t keylen); +int lg_lock_action_req (gulm_interface_p, uint8_t * key, + uint16_t keylen, uint8_t action, + uint8_t * LVB, uint16_t LVBlen); +int lg_lock_drop_exp (gulm_interface_p, uint8_t * holder, + uint8_t * keymask, uint16_t kmlen); +int lg_lock_status (gulm_interface_p); + +/* state requests */ +#define lg_lock_state_Unlock (0x00) +#define lg_lock_state_Exclusive (0x01) +#define lg_lock_state_Deferred (0x02) +#define lg_lock_state_Shared (0x03) + +/* actions */ +#define lg_lock_act_HoldLVB (0x0b) +#define lg_lock_act_UnHoldLVB (0x0c) +#define lg_lock_act_SyncLVB (0x0d) + +/* flags */ +#define lg_lock_flag_DoCB (0x00000001) +#define lg_lock_flag_Try (0x00000002) +#define lg_lock_flag_Any (0x00000004) +#define lg_lock_flag_IgnoreExp (0x00000008) +#define lg_lock_flag_Cachable (0x00000020) +#define lg_lock_flag_Piority (0x00000040) + +/* These are the possible values that can be in the error fields. */ +#define lg_err_Ok (0) +#define lg_err_BadLogin (1001) +#define lg_err_BadCluster (1003) +#define lg_err_BadConfig (1004) +#define lg_err_BadGeneration (1005) +#define lg_err_BadWireProto (1019) + +#define lg_err_NotAllowed (1006) +#define lg_err_Unknown_Cs (1007) +#define lg_err_BadStateChg (1008) +#define lg_err_MemoryIssues (1009) + +#define lg_err_TryFailed (1011) +#define lg_err_AlreadyPend (1013) +#define lg_err_Canceled (1015) + +#define lg_err_NoSuchFS (1016) +#define lg_err_NoSuchJID (1017) +#define lg_err_NoSuchName (1018) + +#endif /*__libgulm_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/linux_gulm_main.c linux-patched/fs/gfs_locking/lock_gulm/linux_gulm_main.c --- linux-orig/fs/gfs_locking/lock_gulm/linux_gulm_main.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/linux_gulm_main.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,109 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#define EXPORT_SYMTAB +#define WANT_DEBUG_NAMES +#define WANT_GMALLOC_NAMES +#define EXTERN +#include "gulm.h" + +#include + +#include "util.h" +#include "gulm_procinfo.h" + +MODULE_DESCRIPTION ("Grand Unified Locking Module " GULM_RELEASE_NAME); +MODULE_AUTHOR ("Red Hat, Inc."); +MODULE_LICENSE ("GPL"); + +extern gulm_cm_t gulm_cm; + +/** + * init_gulm - Initialize the gulm module + * + * Returns: 0 on success, -EXXX on failure + */ +int __init +init_gulm (void) +{ + int error; + + memset (&gulm_cm, 0, sizeof (gulm_cm_t)); + gulm_cm.loaded = FALSE; + gulm_cm.hookup = NULL; + + /* register with the lm layers. */ + error = lm_register_proto (&gulm_ops); + if (error) + goto fail; + + error = init_proc_dir (); + if (error != 0) { + goto fail_lm; + } + + init_gulm_fs (); + + printk ("Gulm %s (built %s %s) installed\n", + GULM_RELEASE_NAME, __DATE__, __TIME__); + + return 0; + + fail_lm: + lm_unregister_proto (&gulm_ops); + + fail: + return error; +} + +/** + * exit_gulm - cleanup the gulm module + * + */ + +void __exit +exit_gulm (void) +{ + remove_proc_dir (); + lm_unregister_proto (&gulm_ops); +} + +module_init (init_gulm); +module_exit (exit_gulm); + +/* the libgulm.h interface. */ +EXPORT_SYMBOL (lg_initialize); +EXPORT_SYMBOL (lg_release); + +EXPORT_SYMBOL (lg_core_handle_messages); +EXPORT_SYMBOL (lg_core_selector); +EXPORT_SYMBOL (lg_core_login); +EXPORT_SYMBOL (lg_core_logout); +EXPORT_SYMBOL (lg_core_nodeinfo); +EXPORT_SYMBOL (lg_core_nodelist); +EXPORT_SYMBOL (lg_core_servicelist); +EXPORT_SYMBOL (lg_core_corestate); +EXPORT_SYMBOL (lg_core_shutdown); +EXPORT_SYMBOL (lg_core_forceexpire); +EXPORT_SYMBOL (lg_core_forcepending); +EXPORT_SYMBOL (lg_core_status); + +EXPORT_SYMBOL (lg_lock_handle_messages); +EXPORT_SYMBOL (lg_lock_selector); +EXPORT_SYMBOL (lg_lock_login); +EXPORT_SYMBOL (lg_lock_logout); +EXPORT_SYMBOL (lg_lock_state_req); +EXPORT_SYMBOL (lg_lock_cancel_req); +EXPORT_SYMBOL (lg_lock_action_req); +EXPORT_SYMBOL (lg_lock_drop_exp); +EXPORT_SYMBOL (lg_lock_status); diff -urN linux-orig/fs/gfs_locking/lock_gulm/load_info.c linux-patched/fs/gfs_locking/lock_gulm/load_info.c --- linux-orig/fs/gfs_locking/lock_gulm/load_info.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/load_info.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,96 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gulm.h" + +#include +#include +#define __KERNEL_SYSCALLS__ +#include + +#include /* for extern system_utsname */ + +#include "util.h" +#include "utils_verb_flags.h" + +gulm_cm_t gulm_cm; + +/** + * init_ltpx - + */ +int +init_ltpx (void) +{ + int j; + lock_table_t *lt = &gulm_cm.ltpx; + + INIT_LIST_HEAD (<->to_be_sent); + spin_lock_init (<->queue_sender); + init_waitqueue_head (<->send_wchan); + lt->magic_one = 0xAAAAAAAA; + init_MUTEX (<->sender); + init_completion (<->startup); + atomic_set (<->locks_pending, 0); + lt->hashbuckets = 8191; + lt->hshlk = kmalloc (sizeof (spinlock_t) * lt->hashbuckets, GFP_KERNEL); + if (lt->hshlk == NULL) + return -ENOMEM; + lt->lkhsh = + kmalloc (sizeof (struct list_head) * lt->hashbuckets, GFP_KERNEL); + if (lt->lkhsh == NULL) { + kfree (lt->hshlk); + return -ENOMEM; + } + for (j = 0; j < lt->hashbuckets; j++) { + spin_lock_init (<->hshlk[j]); + INIT_LIST_HEAD (<->lkhsh[j]); + } + return 0; +} + +/** + * load_info - + * @hostdata: < optionally override the name of this node. + * + * Returns: int + */ +int +load_info (char *hostdata) +{ + int err = 0; + + if (gulm_cm.loaded) + goto exit; + + gulm_cm.verbosity = 0; + if (hostdata != NULL && strlen (hostdata) > 0) { + strncpy (gulm_cm.myName, hostdata, 64); + } else { + strncpy (gulm_cm.myName, system_utsname.nodename, 64); + } + gulm_cm.myName[63] = '\0'; + + /* breaking away from ccs. just hardcoding defaults here. + * Noone really used these anyways and if ppl want them badly, we'll + * find another way to set them. (modprobe options for example.) + * */ + gulm_cm.handler_threads = 2; + set_verbosity ("Default", &gulm_cm.verbosity); + + init_ltpx (); + + gulm_cm.loaded = TRUE; + exit: + return err; +} +/* vim: set ai cin noet sw=8 ts=8 : */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/load_info.h linux-patched/fs/gfs_locking/lock_gulm/load_info.h --- linux-orig/fs/gfs_locking/lock_gulm/load_info.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/load_info.h 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,17 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __load_info_h__ +#define __load_info_h__ +int load_info (char *); +#endif /*__load_info_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/util.c linux-patched/fs/gfs_locking/lock_gulm/util.c --- linux-orig/fs/gfs_locking/lock_gulm/util.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/util.c 2004-06-16 12:03:21.958894765 -0500 @@ -0,0 +1,109 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include "utils_crc.h" + +/** + * atoi + * + * @c: + * + */ + +int +atoi (char *c) +{ + int x = 0; + + while ('0' <= *c && *c <= '9') { + x = x * 10 + (*c - '0'); + c++; + } + + return (x); +} + +/** + * inet_aton + * + * @ascii: + * @ip: + * + */ + +int +inet_aton (char *ascii, uint32_t * ip) +{ + uint32_t value; + int x; + + *ip = 0; + + for (x = 0; x < 4; x++) { + value = atoi (ascii); + if (value > 255) + return (-1); + + *ip = (*ip << 8) | value; + + if (x != 3) { + for (; *ascii != '.' && *ascii != '\0'; ascii++) { + if (*ascii < '0' || *ascii > '9') { + /* not a number. stop */ + return -1; + } + } + if (*ascii == '\0') + return (-1); + + ascii++; + } + } + + return (0); +} + +/** + * inet_ntoa + * + * @ascii: + * @ip: + * + */ +void +inet_ntoa (uint32_t ip, char *buf) +{ + int i; + char *p; + + p = buf; + + for (i = 3; i >= 0; i--) { + p += sprintf (p, "%d", (ip >> (8 * i)) & 0xFF); + if (i > 0) + *(p++) = '.'; + } + +} + +/* public functions */ +#define hash_init_val 0x6d696b65 + +uint32_t __inline__ +hash_lock_key (uint8_t * in, uint8_t len) +{ /* other hash function was to variable */ + return crc32 (in, len, hash_init_val); +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/util.h linux-patched/fs/gfs_locking/lock_gulm/util.h --- linux-orig/fs/gfs_locking/lock_gulm/util.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/util.h 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,29 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __UTIL_DOT_H__ +#define __UTIL_DOT_H__ + +int atoi (char *c); +int inet_aton (char *ascii, uint32_t * ip); +void inet_ntoa (uint32_t ip, char *buf); +void dump_buffer (void *buf, int len); + +uint32_t __inline__ hash_lock_key (uint8_t * in, uint8_t len); +uint8_t __inline__ fourtoone (uint32_t); + +__inline__ int testbit (uint16_t bit, uint8_t * set); +__inline__ void setbit (uint16_t bit, uint8_t * set); +__inline__ void clearbit (uint16_t bit, uint8_t * set); + +#endif /* __UTIL_DOT_H__ */ diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_crc.c linux-patched/fs/gfs_locking/lock_gulm/utils_crc.c --- linux-orig/fs/gfs_locking/lock_gulm/utils_crc.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_crc.c 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,92 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include + +static const uint32_t crc_32_tab[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +/** + * crc32 - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * + * completely copied from GFS/src/fs.c + * + * Take some data and convert it to a 32-bit hash. + * + * The hash function is a 32-bit CRC of the data. The algorithm uses + * the crc_32_tab table above. + * + * This may not be the fastest hash function, but it does a fair bit better + * at providing uniform results than the others I've looked at. That's + * really important for efficient directories. + * + * Returns: the hash + */ + +uint32_t +crc32 (const char *data, int len, uint32_t init) +{ + uint32_t hash = init; + + for (; len--; data++) + hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8); + + hash = ~hash; + + return hash; +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_crc.h linux-patched/fs/gfs_locking/lock_gulm/utils_crc.h --- linux-orig/fs/gfs_locking/lock_gulm/utils_crc.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_crc.h 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,17 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __utils_crc_h__ +#define __utils_crc_h__ +uint32_t crc32 (const char *data, int len, uint32_t init); +#endif /*__utils_crc_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c --- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,207 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include "gio_wiretypes.h" + +char * +gio_Err_to_str (int x) +{ + char *t = "Unknown GULM Err"; + switch (x) { + case gio_Err_Ok: + t = "Ok"; + break; + + case gio_Err_BadLogin: + t = "Bad Login"; + break; + case gio_Err_BadCluster: + t = "Bad Cluster ID"; + break; + case gio_Err_BadConfig: + t = "Incompatible configurations"; + break; + case gio_Err_BadGeneration: + t = "Bad Generation ID"; + break; + case gio_Err_BadWireProto: + t = "Bad Wire Protocol Version"; + break; + + case gio_Err_NotAllowed: + t = "Not Allowed"; + break; + case gio_Err_Unknown_Cs: + t = "Uknown Client"; + break; + case gio_Err_BadStateChg: + t = "Bad State Change"; + break; + case gio_Err_MemoryIssues: + t = "Memory Problems"; + break; + + case gio_Err_PushQu: + t = "Push Queue"; + break; + case gio_Err_TryFailed: + t = "Try Failed"; + break; + case gio_Err_AlreadyPend: + t = "Request Already Pending"; + break; + case gio_Err_Canceled: + t = "Request Canceled"; + break; + + case gio_Err_NoSuchFS: + t = "No Such Filesystem"; + break; + case gio_Err_NoSuchJID: + t = "No Such JID"; + break; + case gio_Err_NoSuchName: + t = "No Such Node"; + break; + } + return t; +} + +char * +gio_mbrupdate_to_str (int x) +{ + char *t = "Unknown Membership Update"; + switch (x) { + case gio_Mbr_Logged_in: + t = "Logged in"; + break; + case gio_Mbr_Logged_out: + t = "Logged out"; + break; + case gio_Mbr_Expired: + t = "Expired"; + break; + case gio_Mbr_Killed: + t = "Fenced"; + break; + case gio_Mbr_OM_lgin: + t = "Was Logged in"; + break; + } + return t; +} + +char * +gio_I_am_to_str (int x) +{ + switch (x) { + case gio_Mbr_ama_Slave: + return "Slave"; + break; + case gio_Mbr_ama_Pending: + return "Pending"; + break; + case gio_Mbr_ama_Arbitrating: + return "Arbitrating"; + break; + case gio_Mbr_ama_Master: + return "Master"; + break; + case gio_Mbr_ama_Resource: + return "Service"; + break; + case gio_Mbr_ama_Client: + return "Client"; + break; + default: + return "Unknown I_am state"; + break; + } +} + +char * +gio_license_states (int x) +{ + switch (x) { + case 0: + return "valid"; + break; + case 1: + return "expired"; + break; + case 2: + return "invalid"; + break; + default: + return "unknown"; + break; + } +} + +char * +gio_opcodes (int x) +{ + switch (x) { +#define CP(x) case (x): return #x ; break + CP (gulm_err_reply); + + CP (gulm_core_login_req); + CP (gulm_core_login_rpl); + CP (gulm_core_logout_req); + CP (gulm_core_logout_rpl); + CP (gulm_core_reslgn_req); + CP (gulm_core_beat_req); + CP (gulm_core_beat_rpl); + CP (gulm_core_mbr_req); + CP (gulm_core_mbr_updt); + CP (gulm_core_mbr_lstreq); + CP (gulm_core_mbr_lstrpl); + CP (gulm_core_mbr_force); + CP (gulm_core_res_req); + CP (gulm_core_res_list); + CP (gulm_core_state_req); + CP (gulm_core_state_chgs); + CP (gulm_core_shutdown); + CP (gulm_core_forcepend); + + CP (gulm_info_stats_req); + CP (gulm_info_stats_rpl); + CP (gulm_info_set_verbosity); + CP (gulm_socket_close); + CP (gulm_info_slave_list_req); + CP (gulm_info_slave_list_rpl); + + CP (gulm_lock_login_req); + CP (gulm_lock_login_rpl); + CP (gulm_lock_logout_req); + CP (gulm_lock_logout_rpl); + CP (gulm_lock_state_req); + CP (gulm_lock_state_rpl); + CP (gulm_lock_state_updt); + CP (gulm_lock_action_req); + CP (gulm_lock_action_rpl); + CP (gulm_lock_action_updt); + CP (gulm_lock_update_rpl); + CP (gulm_lock_cb_state); + CP (gulm_lock_cb_dropall); + CP (gulm_lock_drop_exp); + CP (gulm_lock_dump_req); + CP (gulm_lock_dump_rpl); + CP (gulm_lock_rerunqueues); + +#undef CP + default: + return "Unknown Op Code"; + break; + } +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h --- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,22 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __utils_tostr_h__ +#define __utils_tostr_h__ +char *gio_Err_to_str (int x); +char *gio_mbrupdate_to_str (int x); +char *gio_mbrama_to_str (int x); +char *gio_I_am_to_str (int x); +char *gio_license_states (int x); +char *gio_opcodes (int x); +#endif /*__utils_tostr_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.c linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.c --- linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.c 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,271 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifdef __linux__ +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#endif /*__linux__*/ + +#include "gulm_log_msg_bits.h" + +static __inline__ int +strncasecmp (const char *s1, const char *s2, size_t l) +{ + char c1 = '\0', c2 = '\0'; + + while (*s1 && *s2 && l-- > 0) { + c1 = *s1++; + c2 = *s2++; + + if (c1 >= 'A' && c1 <= 'Z') + c1 += 'a' - 'A'; + + if (c2 >= 'A' && c2 <= 'Z') + c2 += 'a' - 'A'; + + if (c1 != c2) + break; + } + return (c1 - c2); +} + +static int bit_array[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + +#define BITCOUNT(x) (bit_array[x & 0x000F] + \ + bit_array[(x >> 4) & 0x000F] + \ + bit_array[(x >> 8) & 0x000F] + \ + bit_array[(x >> 12) & 0x000F] + \ + bit_array[(x >> 16) & 0x000F] + \ + bit_array[(x >> 20) & 0x000F] + \ + bit_array[(x >> 24) & 0x000F] + \ + bit_array[(x >> 28) & 0x000F]) + +struct { + char *name; + uint32_t val; +} verbose_flags[] = { + { + "Network", lgm_Network,}, { + "Network2", lgm_Network2,}, { + "Network3", lgm_Network3,}, { + "Fencing", lgm_Stomith,}, { + "Heartbeat", lgm_Heartbeat,}, { + "Locking", lgm_locking,}, { + "Forking", lgm_Forking,}, { + "JIDMap", lgm_JIDMap,}, { + "JIDUpdates", lgm_JIDUpdates,}, { + "Subscribers", lgm_Subscribers,}, { + "LockUpdates", lgm_LockUpdates,}, { + "LoginLoops", lgm_LoginLoops,}, { + "ServerState", lgm_ServerState,}, { + "Default", lgm_Network | lgm_Stomith | lgm_Forking,}, +/* Since I really don't want people really doing *all* flags with all, + * there is AlmostAll, which users really get, and ReallyAll, which is all + * bits on. + * This is mostly due to Network3, which dumps messages on nearly + * every packet. (should actually be every packet.) + * Also drop the slave updates, since that is on every packet as well. + */ + { + "All", + (lgm_ReallyAll & + ~(lgm_Network3 | lgm_JIDUpdates | + lgm_LockUpdates)),}, { + "AlmostAll", + lgm_ReallyAll & ~(lgm_Network3 | lgm_JIDUpdates | + lgm_LockUpdates),}, { + "ReallyAll", lgm_ReallyAll,} +}; + +static int +add_string (char *name, size_t * cur, char *str, size_t slen) +{ + size_t nl; + + nl = strlen (name); + if (*cur + nl > slen) { + memcpy (str + *cur, "...", 3); + cur += 3; + str[*cur] = '\0'; + return -1; + } + memcpy (str + *cur, name, nl); + *cur += nl; + str[*cur] = ','; + *cur += 1; + + return 0; +} + +/** + * get_verbosity_string - + * @str: + * @verb: + * + * + * Returns: int + */ +int +get_verbosity_string (char *str, size_t slen, uint32_t verb) +{ + int i, vlen = sizeof (verbose_flags) / sizeof (verbose_flags[0]); + size_t cur = 0; + int combo_match = -1, error = 0; + + memset (str, 0, slen); + slen -= 4; /* leave room for dots and null */ + + if (verb == 0) { + error = add_string ("Quiet", &cur, str, slen); + goto end; + } + + /* Combo verb flag phase */ + for (i = 0; i < vlen; i++) { + if (BITCOUNT (verbose_flags[i].val) > 1) { + /* check to see if this flag matches exclusively */ + if ((verbose_flags[i].val ^ verb) == 0) { + error = + add_string (verbose_flags[i].name, &cur, + str, slen); + goto end; + } + + if ((verbose_flags[i].val & verb) == + verbose_flags[i].val) { + if (combo_match < 0) { + combo_match = i; + } else { + /* Compare this combo with the one in combo_match */ + if (BITCOUNT (verbose_flags[i].val) > + BITCOUNT (verbose_flags + [combo_match].val)) { + combo_match = i; + } + } + + } + } + } + /* Add the best combo to the string */ + if (combo_match > -1) { + if (add_string + (verbose_flags[combo_match].name, &cur, str, slen) == -1) { + error = -1; + goto end; + } + } + + /* Single verb flag phase */ + for (i = 0; i < vlen; i++) { + if (BITCOUNT (verbose_flags[i].val) == 1) { + if (combo_match > -1) { + if ((verbose_flags[combo_match]. + val & verbose_flags[i].val) == + verbose_flags[i].val) { + continue; + } + } + + if ((verbose_flags[i].val & verb) == + verbose_flags[i].val) { + if (add_string + (verbose_flags[i].name, &cur, str, + slen) == -1) { + error = -1; + goto end; + } + } + } + } + end: + /* Clear trailing ',' */ + if (str[cur - 1] == ',') { + str[cur - 1] = '\0'; + } + return error; +} + +/** + * set_verbosity - + * @str: + * @verb: + * + * toggle bits according to the `rules' in the str. + * str is a list of verb flags. can be prefexed with '+' or '-' + * No prefix is the same as '+' prefix + * '+' sets bits + * '-' unsets bits. + * special 'clear' unsets all. + */ +void +set_verbosity (char *str, uint32_t * verb) +{ + char *token, *next; + int i, wl, tl, len = sizeof (verbose_flags) / sizeof (verbose_flags[0]); + + if (str == NULL) + return; + + wl = strlen (str); + if (wl == 0) + return; + for (token = str, tl = 0; tl < wl && + token[tl] != ',' && + token[tl] != ' ' && token[tl] != '|' && token[tl] != '\0'; tl++) ; + next = token + tl + 1; + + for (;;) { + if (token[0] == '-') { + token++; + for (i = 0; i < len; i++) { + if (strncasecmp + (token, verbose_flags[i].name, tl) == 0) { + (*verb) &= ~(verbose_flags[i].val); + } + } + } else if (token[0] == '+') { + token++; + for (i = 0; i < len; i++) { + if (strncasecmp + (token, verbose_flags[i].name, tl) == 0) { + (*verb) |= verbose_flags[i].val; + } + } + } else { + if (strncasecmp (token, "clear", tl) == 0) { + (*verb) = 0; + } else { + for (i = 0; i < len; i++) { + if (strncasecmp + (token, verbose_flags[i].name, + tl) == 0) { + (*verb) |= verbose_flags[i].val; + } + } + } + } + + if (next >= str + wl) + return; + for (token = next, tl = 0; + tl < wl && + token[tl] != ',' && + token[tl] != ' ' && + token[tl] != '|' && token[tl] != '\0'; tl++) ; + next = token + tl + 1; + + } +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.h linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.h --- linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.h 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,18 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __utils_verb_flags_h__ +#define __utils_verb_flags_h__ +int get_verbosity_string (char *str, size_t slen, uint32_t verb); +void set_verbosity (char *str, uint32_t * verb); +#endif /*__utils_verb_flags_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr.h linux-patched/fs/gfs_locking/lock_gulm/xdr.h --- linux-orig/fs/gfs_locking/lock_gulm/xdr.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr.h 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,98 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __gulm_xdr_h__ +#define __gulm_xdr_h__ +typedef struct xdr_enc_s xdr_enc_t; +typedef struct xdr_dec_s xdr_dec_t; + +/* sockets in kernel space are done a bit different than socket in + * userspace. But we need to have them appear to be the same. + */ +#ifdef __KERNEL__ + +#ifdef __linux__ +#include +#include +#include +#include +#include + +typedef struct socket *xdr_socket; +#endif /*__linux__*/ +#else /*__KERNEL__*/ +#include +#include +#include +#include +#include +#include +#include +typedef int xdr_socket; +#endif /*__KERNEL__*/ + +/* start things up */ +int xdr_open (xdr_socket * sk); +int xdr_connect (struct sockaddr_in6 *adr, xdr_socket sk); +void xdr_close (xdr_socket * sk); + +/* deep, basic io */ +#ifdef __KERNEL__ +#ifdef __linux__ +size_t xdr_send (struct socket *sock, void *buf, size_t size); +size_t xdr_recv (struct socket *sock, void *buf, size_t size); +#endif /*__linux__*/ +#else /*__KERNEL__*/ +ssize_t xdr_recv (int fd, void *buf, size_t len); +ssize_t xdr_send (int fd, void *buf, size_t len); +#endif /*__KERNEL__*/ + +xdr_enc_t *xdr_enc_init (xdr_socket sk, int buffer_size); +xdr_dec_t *xdr_dec_init (xdr_socket sk, int buffer_size); +int xdr_enc_flush (xdr_enc_t * xdr); +int xdr_enc_release (xdr_enc_t * xdr); /* calls xdr_enc_flush() */ +void xdr_enc_force_release (xdr_enc_t * xdr); /* doesn't call xdr_enc_flush() */ +void xdr_dec_release (xdr_dec_t * xdr); +/* xdr_enc_force_release() is for when you get and error sending and you + * want to free that stuff up right away. If you use the regular release + * for enc, it will fail if it cannot send data over the filedesciptor. + */ + +/* encoders add to a stream */ +int __inline__ xdr_enc_uint64 (xdr_enc_t * xdr, uint64_t i); +int __inline__ xdr_enc_uint32 (xdr_enc_t * xdr, uint32_t i); +int __inline__ xdr_enc_uint16 (xdr_enc_t * xdr, uint16_t i); +int __inline__ xdr_enc_uint8 (xdr_enc_t * xdr, uint8_t i); +int __inline__ xdr_enc_ipv6 (xdr_enc_t * enc, struct in6_addr *ip); +int xdr_enc_raw (xdr_enc_t * xdr, void *pointer, uint16_t len); +int xdr_enc_raw_iov (xdr_enc_t * xdr, int count, struct iovec *iov); +int xdr_enc_string (xdr_enc_t * xdr, uint8_t * s); +int xdr_enc_list_start (xdr_enc_t * xdr); +int xdr_enc_list_stop (xdr_enc_t * xdr); + +/* decoders remove from stream */ +int xdr_dec_uint64 (xdr_dec_t * xdr, uint64_t * i); +int xdr_dec_uint32 (xdr_dec_t * xdr, uint32_t * i); +int xdr_dec_uint16 (xdr_dec_t * xdr, uint16_t * i); +int xdr_dec_uint8 (xdr_dec_t * xdr, uint8_t * i); +int xdr_dec_ipv6 (xdr_dec_t * xdr, struct in6_addr *ip); +int xdr_dec_raw (xdr_dec_t * xdr, void *p, uint16_t * l); /* no malloc */ +int xdr_dec_raw_m (xdr_dec_t * xdr, void **p, uint16_t * l); /* mallocs p */ +int xdr_dec_raw_ag (xdr_dec_t * xdr, void **p, uint16_t * bl, uint16_t * rl); +int xdr_dec_string (xdr_dec_t * xdr, uint8_t ** strp); /* mallocs s */ +int xdr_dec_string_nm (xdr_dec_t * xdr, uint8_t * strp, size_t l); /* no malloc */ +int xdr_dec_string_ag (xdr_dec_t * xdr, uint8_t ** s, uint16_t * bl); +int xdr_dec_list_start (xdr_dec_t * xdr); +int xdr_dec_list_stop (xdr_dec_t * xdr); + +#endif /*__gulm_xdr_h__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c --- linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,904 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + * This is a bit of an abstraction layer to get this working in both kernel + * and userspace. + */ +#define TRUE (1) +#define FALSE (0) +#define MIN(a,b) ((a +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#endif /*__linux__*/ + +#include "xdr.h" + +/** + * xdr_realloc - a realloc for kernel space. + * @a: < pointer to realloc + * @nl: < desired new size + * @ol: < current old size + * + * Not as good as the real realloc, since it always moves memory. But good + * enough for as little as it will get used here. + * + * XXX this is broken. + * + * Returns: void* + */ +static void * +xdr_realloc (void *a, size_t nl, size_t ol) +{ + if (nl == ol) { + return a; + } else if (nl == 0) { + kfree (a); + return NULL; + } else if (a == NULL && nl > 0) { + return kmalloc (nl, GFP_KERNEL); + } else { + void *tmp; + tmp = kmalloc (nl, GFP_KERNEL); + if (tmp == NULL) + return NULL; + memcpy (tmp, a, MIN (nl, ol)); + kfree (a); + return tmp; + } +} + +typedef enum { xdr_enc, xdr_dec } xdr_type; + +/* encoders have this sorta non-blocking, growing buffering stunt. + * makes them a bit different from the decoders now. + */ +struct xdr_enc_s { + size_t default_buf_size; + xdr_socket fd; + xdr_type type; + size_t length; + size_t curloc; + uint8_t *stream; +}; + +/* decoders only pull a single item off of the socket at a time. + * so this is all they need. + */ +struct xdr_dec_s { + size_t length; /* total byte length of the stream */ + size_t curloc; /* current byte offset from start */ + uint8_t *stream; /* start of the encoded stream. */ + xdr_socket fd; + xdr_type type; +}; + +/* the types of data we support. */ + +#define XDR_NULL 0x00 /* NOT A VALID TAG!!! used in dec code. */ +#define XDR_LIST_START 0x01 +#define XDR_LIST_STOP 0x02 +/* list is a variable length device. It is a start tag, some number of + * xdr_enc_*, then an stop tag. It's main purpose is to provide a method + * of encasing data. + * */ +#define XDR_STRING 0x04 +/* string tag is followed by a uint16 which is the byte length */ +#define XDR_RAW 0x05 +/* raw tag is followed by a uint16 which is the byte length + * if 65535 bytes isn't enough, split your data and put multiples of these + * back to back. (idea of xdr is to avoid this twit.) + * */ + +/* note, if the size of these should variate, I'm screwed. Should consider + * changing this all to the bit shift and array access to be more concrete. + * later. + */ +#define XDR_UINT64 0x06 +#define XDR_UINT32 0x07 +#define XDR_UINT16 0x08 +#define XDR_UINT8 0x09 +/* should add signed ints */ + +#define XDR_IPv6 0x0a /* 16 bytes, IPv6 address */ + +/* any other base types? + */ + +#define XDR_DEFAULT_BUFFER_SIZE 4096 +/*****************************************************************************/ + +/** + * xdr_enc_init - + * @fd: + * @buffer_size: + * + * + * Returns: xdr_enc_t* + */ +xdr_enc_t * +xdr_enc_init (xdr_socket fd, int buffer_size) +{ + xdr_enc_t *xdr; + + if (buffer_size <= 0) + buffer_size = XDR_DEFAULT_BUFFER_SIZE; + + xdr = kmalloc (sizeof (xdr_enc_t), GFP_KERNEL); + if (xdr == NULL) + return NULL; + xdr->stream = kmalloc (buffer_size, GFP_KERNEL); + if (xdr->stream == NULL) { + kfree (xdr); + return NULL; + } + xdr->fd = fd; + xdr->type = xdr_enc; + xdr->default_buf_size = buffer_size; + xdr->length = buffer_size; + xdr->curloc = 0; + + return xdr; +} + +/** + * xdr_dec_init - + * @fd: + * @buffer_size: + * + * + * Returns: xdr_dec_t* + */ +xdr_dec_t * +xdr_dec_init (xdr_socket fd, int buffer_size) +{ + xdr_dec_t *xdr; + + if (buffer_size <= 0) + buffer_size = XDR_DEFAULT_BUFFER_SIZE; + + xdr = kmalloc (sizeof (xdr_dec_t), GFP_KERNEL); + if (xdr == NULL) + return NULL; + xdr->length = buffer_size; + xdr->curloc = 0; + xdr->stream = kmalloc (buffer_size, GFP_KERNEL); + xdr->fd = fd; + xdr->type = xdr_dec; + if (xdr->stream == NULL) { + kfree (xdr); + return NULL; + } + *(xdr->stream) = XDR_NULL; /* so the first dec_call will call get_next */ + return xdr; +} + +/*****************************************************************************/ +/** + * xdr_enc_flush - + * @xdr: + * + * Returns: int + */ +int +xdr_enc_flush (xdr_enc_t * xdr) +{ + int err; + if (xdr == NULL) + return -EINVAL; + if (xdr->type != xdr_enc) + return -EINVAL; + if (xdr->curloc == 0) + return 0; + + err = xdr_send (xdr->fd, xdr->stream, xdr->curloc); + if (err < 0) + return err; + if (err == 0) + return -EPROTO; /* why? */ + xdr->curloc = 0; + + return 0; +} + +/** + * xdr_release - + * @xdr: + * + * Free the memory, losing whatever may be there. + */ +void +xdr_dec_release (xdr_dec_t * xdr) +{ + if (xdr == NULL) + return; + kfree (xdr->stream); + kfree (xdr); +} + +/** + * xdr_enc_force_release - + * @xdr: + * + * Free the memory, losing whatever may be there. + */ +void +xdr_enc_force_release (xdr_enc_t * xdr) +{ + if (xdr == NULL) + return; + if (xdr->stream != NULL) + kfree (xdr->stream); + kfree (xdr); +} + +/** + * xdr_enc_release - + * @xdr: + * + * Free things up, trying to send any possible leftover data first. + * + * Returns: int + */ +int +xdr_enc_release (xdr_enc_t * xdr) +{ + int e; + if (xdr == NULL) + return -EINVAL; + if ((e = xdr_enc_flush (xdr)) != 0) + return e; + xdr_enc_force_release (xdr); + return 0; +} + +/*****************************************************************************/ +/** + * grow_stream - + * @xdr: + * @len: + * + * each single encoded call needs to fit within a buffer. So we make sure + * the buffer is big enough. + * + * If the buffer is big enough, but just doesn't have room, we send the + * data in the buffer, emptying it, first. + * + * Returns: int + */ +static int +grow_stream (xdr_enc_t * enc, size_t len) +{ + int err; + uint8_t *c; + + /* buffer must be big enough for one type entry. */ + if (len > enc->length) { + c = xdr_realloc (enc->stream, len, enc->length); + if (c == NULL) + return -ENOMEM; + enc->stream = c; + enc->length = len; + } + + /* if there isn't room on the end of this chunk, + * try sending what we've got. + */ + if (enc->curloc + len > enc->length) { + err = xdr_enc_flush (enc); + if (err != 0) { + /* error, better pass this up. */ + return err; + } + } + + return 0; +} + +/** + * append_bytes - + * @xdr: + * @xdr_type: + * @bytes: + * @len: + * + * + * Returns: int + */ +static int +append_bytes (xdr_enc_t * xdr, uint8_t xdr_type, void *bytes, size_t len) +{ + int e; + if (xdr == NULL) + return -EINVAL; + if (xdr->type != xdr_enc) + return -EINVAL; + + /* len + 1; need the one byte for the type code. */ + if ((e = grow_stream (xdr, len + 1)) != 0) + return e; + *(xdr->stream + xdr->curloc) = xdr_type; + xdr->curloc += 1; + memcpy ((xdr->stream + xdr->curloc), bytes, len); + xdr->curloc += len; + + return 0; +} + +int __inline__ +xdr_enc_uint64 (xdr_enc_t * xdr, uint64_t i) +{ + uint64_t b = cpu_to_be64 (i); + return append_bytes (xdr, XDR_UINT64, &b, sizeof (uint64_t)); +} + +int __inline__ +xdr_enc_uint32 (xdr_enc_t * xdr, uint32_t i) +{ + uint32_t b = cpu_to_be32 (i); + return append_bytes (xdr, XDR_UINT32, &b, sizeof (uint32_t)); +} + +int __inline__ +xdr_enc_uint16 (xdr_enc_t * xdr, uint16_t i) +{ + uint16_t b = cpu_to_be16 (i); + return append_bytes (xdr, XDR_UINT16, &b, sizeof (uint16_t)); +} + +int __inline__ +xdr_enc_uint8 (xdr_enc_t * xdr, uint8_t i) +{ + return append_bytes (xdr, XDR_UINT8, &i, sizeof (uint8_t)); +} + +int __inline__ +xdr_enc_ipv6 (xdr_enc_t * xdr, struct in6_addr *ip) +{ /* bytes should already be in the right order. */ + return append_bytes (xdr, XDR_IPv6, ip->s6_addr, 16); +} + +int +xdr_enc_raw (xdr_enc_t * xdr, void *p, uint16_t len) +{ + int e; + if (xdr == NULL) + return -EINVAL; + if ((e = grow_stream (xdr, len + 3)) != 0) + return e; + *(xdr->stream + xdr->curloc) = XDR_RAW; + xdr->curloc += 1; + (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) = + cpu_to_be16 (len); + xdr->curloc += 2; + memcpy ((xdr->stream + xdr->curloc), p, len); + xdr->curloc += len; + return 0; +} + +int +xdr_enc_raw_iov (xdr_enc_t * xdr, int count, struct iovec *iov) +{ + size_t total = 0; + int i, err; + if (xdr == NULL || count < 1 || iov == NULL) + return -EINVAL; + for (i = 0; i < count; i++) + total += iov[i].iov_len; + /* make sure it fits in a uint16_t */ + if (total > 0xffff) + return -EFBIG; + /* grow to fit */ + if ((err = grow_stream (xdr, total + 3)) != 0) + return err; + /* copy in header and size */ + *(xdr->stream + xdr->curloc) = XDR_RAW; + xdr->curloc += 1; + (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) = + cpu_to_be16 (total); + xdr->curloc += 2; + /* copy in all iovbufs */ + for (i = 0; i < count; i++) { + if (iov[i].iov_base == NULL) + continue; + memcpy ((xdr->stream + xdr->curloc), iov[i].iov_base, + iov[i].iov_len); + xdr->curloc += iov[i].iov_len; + } + return 0; +} + +int +xdr_enc_string (xdr_enc_t * xdr, uint8_t * s) +{ + int len, e; + if (xdr == NULL) + return -EINVAL; + if (s == NULL) + len = 0; + else + len = strlen (s); + if ((e = grow_stream (xdr, len + 3)) != 0) + return e; + *(xdr->stream + xdr->curloc) = XDR_STRING; + xdr->curloc += 1; + (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) = + cpu_to_be16 (len); + xdr->curloc += 2; + if (len > 0) { + memcpy ((xdr->stream + xdr->curloc), s, len); + xdr->curloc += len; + } + return 0; +} + +int +xdr_enc_list_start (xdr_enc_t * xdr) +{ + int e; + if (xdr == NULL) + return -EINVAL; + if ((e = grow_stream (xdr, 1)) != 0) + return e; + *(xdr->stream + xdr->curloc) = XDR_LIST_START; + xdr->curloc += 1; + return 0; +} + +int +xdr_enc_list_stop (xdr_enc_t * xdr) +{ + int e; + if (xdr == NULL) + return -EINVAL; + if ((e = grow_stream (xdr, 1)) != 0) + return e; + *(xdr->stream + xdr->curloc) = XDR_LIST_STOP; + xdr->curloc += 1; + return 0; +} + +/*****************************************************************************/ + +/** + * get_next - + * @xdr: + * + * get what ever may be next, and put it into the buffer. + * + * Returns: int + */ +static int +get_next (xdr_dec_t * xdr) +{ + int err; + uint16_t len; + if ((err = xdr_recv (xdr->fd, xdr->stream, 1)) < 0) + return err; + if (err == 0) + return -EPROTO; + xdr->curloc = 1; + if (*(xdr->stream) == XDR_UINT64) { + len = sizeof (uint64_t); + } else if (*(xdr->stream) == XDR_UINT32) { + len = sizeof (uint32_t); + } else if (*(xdr->stream) == XDR_UINT16) { + len = sizeof (uint16_t); + } else if (*(xdr->stream) == XDR_UINT8) { + len = sizeof (uint8_t); + } else if (*(xdr->stream) == XDR_IPv6) { + len = 16; + } else if (*(xdr->stream) == XDR_STRING) { + if ((err = xdr_recv (xdr->fd, (xdr->stream + 1), 2)) < 0) + return err; + if (err == 0) + return -EPROTO; + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + } else if (*(xdr->stream) == XDR_RAW) { + if ((err = xdr_recv (xdr->fd, (xdr->stream + 1), 2)) < 0) + return err; + if (err == 0) + return -EPROTO; + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + } else if (*(xdr->stream) == XDR_LIST_START) { + xdr->curloc = 0; + return 0; + } else if (*(xdr->stream) == XDR_LIST_STOP) { + xdr->curloc = 0; + return 0; + } else { + return -1; + } + + /* grow buffer if need be. */ + if (xdr->curloc + len > xdr->length) { + uint8_t *c; + c = xdr_realloc (xdr->stream, xdr->curloc + len, xdr->length); + if (c == NULL) + return -ENOMEM; + xdr->stream = c; + xdr->length = xdr->curloc + len; + } + + if (len > 0) { + if ((err = + xdr_recv (xdr->fd, (xdr->stream + xdr->curloc), len)) < 0) + return err; + if (err == 0) + return -EPROTO; + } + xdr->curloc = 0; + return 0; +} + +int +xdr_dec_uint64 (xdr_dec_t * xdr, uint64_t * i) +{ + int err; + if (xdr == NULL || i == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_UINT64) + return -ENOMSG; + *i = be64_to_cpu (*((uint64_t *) (xdr->stream + 1))); + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_uint32 (xdr_dec_t * xdr, uint32_t * i) +{ + int err; + if (xdr == NULL || i == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_UINT32) + return -ENOMSG; + *i = be32_to_cpu (*((uint32_t *) (xdr->stream + 1))); + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_uint16 (xdr_dec_t * xdr, uint16_t * i) +{ + int err; + if (xdr == NULL || i == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_UINT16) + return -ENOMSG; + *i = be16_to_cpu (*((uint16_t *) (xdr->stream + 1))); + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_uint8 (xdr_dec_t * xdr, uint8_t * i) +{ + int err; + if (xdr == NULL || i == NULL) + return -EINVAL; + + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_UINT8) + return -ENOMSG; + *i = *((uint8_t *) (xdr->stream + 1)); + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_ipv6 (xdr_dec_t * xdr, struct in6_addr *ip) +{ + int err; + if (xdr == NULL || ip == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_IPv6) + return -ENOMSG; + memcpy (ip, xdr->stream + 1, 16); + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +/* mallocing version */ +int +xdr_dec_raw_m (xdr_dec_t * xdr, void **p, uint16_t * l) +{ + int len; + void *str; + int err; + + if (xdr == NULL || p == NULL || l == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_RAW) + return -ENOMSG; + xdr->curloc = 1; + + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + + str = kmalloc (len, GFP_KERNEL); + if (str == NULL) + return -ENOMEM; + memcpy (str, (xdr->stream + xdr->curloc), len); + xdr->curloc += len; + + *p = str; + *l = len; + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +/* non-mallocing version */ +int +xdr_dec_raw (xdr_dec_t * xdr, void *p, uint16_t * l) +{ + int len; + int err; + + if (xdr == NULL || p == NULL || l == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_RAW) + return -ENOMSG; + xdr->curloc = 1; + + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + + if (len > *l) + return -1; + + memcpy (p, (xdr->stream + xdr->curloc), len); + xdr->curloc += len; + + *l = len; + + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +/** + * xdr_dec_raw_ag - auto-growing version + * @xdr: + * @p: <> pointer to buffer + * @bl: <> size of the buffer + * @rl: > size of data read from stream + * + * This form of xdr_dec_raw will increase the size of a pre-malloced buffer + * to fit the data it is reading. It is kind of a merger of the + * non-mallocing and mallocing versions. + * + * Returns: int + */ +int +xdr_dec_raw_ag (xdr_dec_t * xdr, void **p, uint16_t * bl, uint16_t * rl) +{ + int len; + int err; + + if (xdr == NULL || p == NULL || bl == NULL || rl == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_RAW) + return -ENOMSG; + xdr->curloc = 1; + + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + + if (len > *bl) { /* grow p */ + void *temp; + temp = xdr_realloc (*p, len, *bl); + if (temp == NULL) + return -ENOMEM; + *bl = len; + *p = temp; + } + + memcpy (*p, (xdr->stream + xdr->curloc), len); + xdr->curloc += len; + + *rl = len; + + *(xdr->stream) = XDR_NULL; + return 0; +} + +/* mallocing version */ +int +xdr_dec_string (xdr_dec_t * xdr, uint8_t ** strp) +{ + int len; + char *str; + int err; + if (xdr == NULL || strp == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_STRING) + return -ENOMSG; + xdr->curloc = 1; + + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + + if (len > 0) { + str = kmalloc (len + 1, GFP_KERNEL); + if (str == NULL) + return -ENOMEM; + str[len] = '\0'; + memcpy (str, (xdr->stream + xdr->curloc), len); + xdr->curloc += len; + + *strp = str; + } else { + *strp = NULL; + } + + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +/* non-mallocing version */ +int +xdr_dec_string_nm (xdr_dec_t * xdr, uint8_t * string, size_t l) +{ + int len; + int err; + if (xdr == NULL || string == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_STRING) + return -ENOMSG; + xdr->curloc = 1; + + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + + if (len > 0) { + memcpy (string, (xdr->stream + xdr->curloc), MIN (len, l)); + if (l > len) { + string[len] = '\0'; + } + string[l - 1] = '\0'; + } else { + string[0] = '\0'; + } + + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_string_ag (xdr_dec_t * xdr, uint8_t ** s, uint16_t * bl) +{ + int len; + int err; + if (xdr == NULL || s == NULL || bl == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_STRING) + return -ENOMSG; + xdr->curloc = 1; + + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc))); + xdr->curloc += 2; + + if (len == 0) { /* empty string */ + **s = '\0'; + *(xdr->stream) = XDR_NULL; + return 0; + } + + if (len >= *bl) { /* grow s */ + void *temp; + temp = xdr_realloc (*s, len + 1, *bl); + if (temp == NULL) + return -ENOMEM; + *bl = len + 1; + *s = temp; + } + + memcpy (*s, (xdr->stream + xdr->curloc), len); + (*s)[len] = '\0'; + + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_list_start (xdr_dec_t * xdr) +{ + int err; + if (xdr == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_LIST_START) + return -ENOMSG; + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} + +int +xdr_dec_list_stop (xdr_dec_t * xdr) +{ + int err; + if (xdr == NULL) + return -EINVAL; + if (*(xdr->stream) == XDR_NULL) { + if ((err = get_next (xdr)) != 0) + return err; + } + if (*(xdr->stream) != XDR_LIST_STOP) + return -ENOMSG; + /* read the item out, mark that */ + *(xdr->stream) = XDR_NULL; + return 0; +} diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c --- linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,169 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + * does the lowest level of reads and writes. + * In kernel and/or userspace. + */ + +#include "xdr.h" + +#ifdef __KERNEL__ +#ifdef __linux__ +#include +#include +#include +#include +#include "asm/uaccess.h" + +/** + * do_tfer - transfers data over a socket + * @sock: < socket + * @iov: <> iovec of buffers + * @n: < how many iovecs + * @size: < total data size to send/recv + * @dir: < send or recv + * @timeout: < how many sec to wait. 0 == forever. + * + * Returns: <0: Error + * >=0: Bytes transfered + */ +static int +do_tfer (struct socket *sock, struct iovec *iov, int n, int size, int dir) +{ + unsigned long flags; + sigset_t oldset; + struct msghdr m; + mm_segment_t fs; + int rv, moved = 0; + + fs = get_fs (); + set_fs (get_ds ()); + + /* XXX do I still want the signal stuff? */ + spin_lock_irqsave (¤t->sighand->siglock, flags); + oldset = current->blocked; + siginitsetinv (¤t->blocked, + sigmask (SIGKILL) | sigmask (SIGTERM)); + recalc_sigpending (); + spin_unlock_irqrestore (¤t->sighand->siglock, flags); + + memset (&m, 0, sizeof (struct msghdr)); + for (;;) { + m.msg_iov = iov; + m.msg_iovlen = n; + m.msg_flags = MSG_NOSIGNAL; + + if (dir) + rv = sock_sendmsg (sock, &m, size - moved); + else + rv = sock_recvmsg (sock, &m, size - moved, 0); + + if (rv <= 0) + goto out_err; + moved += rv; + + if (moved >= size) + break; + + /* adjust iov's for next transfer */ + while (iov->iov_len == 0) { + iov++; + n--; + } + + } + rv = moved; + out_err: + spin_lock_irqsave (¤t->sighand->siglock, flags); + current->blocked = oldset; + recalc_sigpending (); + spin_unlock_irqrestore (¤t->sighand->siglock, flags); + + set_fs (fs); + + return rv; +} + +size_t +xdr_send (struct socket * sock, void *buf, size_t size) +{ + struct iovec iov; + int res; + + iov.iov_base = buf; + iov.iov_len = size; + + res = do_tfer (sock, &iov, 1, size, 1); + + return res; +} + +size_t +xdr_recv (struct socket * sock, void *buf, size_t size) +{ + struct iovec iov; + int res; + + iov.iov_base = buf; + iov.iov_len = size; + + res = do_tfer (sock, &iov, 1, size, 0); + + return res; +} + +#endif /*__linux__*/ +#else /*__KERNEL__*/ + +#include +#include +#include + +ssize_t +xdr_recv (int fd, void *buf, size_t len) +{ + ssize_t cnt = 0; + size_t ttl = 0; + while (len > 0) { + cnt = recv (fd, buf, len, 0); + if (cnt == 0) + return 0; + if (cnt < 0) + return -errno; + len -= cnt; + buf += cnt; + ttl += cnt; + } + return ttl; +} + +ssize_t +xdr_send (int fd, void *buf, size_t len) +{ + ssize_t cnt = 0; + size_t ttl = 0; + while (len > 0) { + cnt = send (fd, buf, len, 0); + if (cnt == 0) + return 0; + if (cnt < 0) + return -errno; + len -= cnt; + buf += cnt; + ttl += cnt; + } + return ttl; +} + +#endif /*__KERNEL__*/ diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c --- linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c 2004-06-16 12:03:21.959894533 -0500 @@ -0,0 +1,82 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + * This file opens and closes a socket. + * In kernel and/or userspace. + */ + +#include "xdr.h" + +#ifdef __KERNEL__ +#ifdef __linux__ + +int +xdr_open (xdr_socket * xsk) +{ + return sock_create (AF_INET6, SOCK_STREAM, 0, xsk); +} + +int +xdr_connect (struct sockaddr_in6 *adr, xdr_socket xsk) +{ + return xsk->ops->connect (xsk, + (struct sockaddr *) adr, + sizeof (struct sockaddr_in6), 0); +} + +void +xdr_close (xdr_socket * xsk) +{ + if (*xsk == NULL) + return; + sock_release (*xsk); + *xsk = NULL; +} + +#endif /*__linux__*/ +#else /*__KERNEL__*/ + +int +xdr_open (xdr_socket * xsk) +{ + int sk; + sk = socket (AF_INET6, SOCK_STREAM, 0); + if (sk < 0) + return -errno; + *xsk = sk; + return 0; +} + +int +xdr_connect (struct sockaddr_in6 *adr, xdr_socket xsk) +{ + int err; + err = + connect (xsk, (struct sockaddr *) adr, + sizeof (struct sockaddr_in6)); + if (err < 0) + return -errno; + return 0; +} + +void +xdr_close (xdr_socket * xsk) +{ + if (*xsk < 0) + return; + close (*xsk); + *xsk = -1; +} + +#endif /*__KERNEL__*/ diff -urN linux-orig/fs/gfs_locking/lock_harness/main.c linux-patched/fs/gfs_locking/lock_harness/main.c --- linux-orig/fs/gfs_locking/lock_harness/main.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_harness/main.c 2004-06-16 12:03:10.006671787 -0500 @@ -0,0 +1,226 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define RELEASE_NAME "" + +struct lmh_wrapper { + struct list_head lw_list; + struct lm_lockops *lw_ops; +}; + +static struct semaphore lmh_lock; +static struct list_head lmh_list; + +/** + * lm_register_proto - Register a low-level locking protocol + * @proto: the protocol definition + * + * Returns: 0 on success, -EXXX on failure + */ + +int +lm_register_proto(struct lm_lockops *proto) +{ + struct list_head *tmp, *head; + struct lmh_wrapper *lw; + + down(&lmh_lock); + + for (head = &lmh_list, tmp = head->next; tmp != head; tmp = tmp->next) { + lw = list_entry(tmp, struct lmh_wrapper, lw_list); + + if (strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name) == 0) { + up(&lmh_lock); + printk("lock_harness: protocol %s already exists\n", + proto->lm_proto_name); + return -EEXIST; + } + } + + lw = kmalloc(sizeof (struct lmh_wrapper), GFP_KERNEL); + if (!lw) { + up(&lmh_lock); + return -ENOMEM; + } + memset(lw, 0, sizeof (struct lmh_wrapper)); + + lw->lw_ops = proto; + list_add(&lw->lw_list, &lmh_list); + + up(&lmh_lock); + + return 0; +} + +/** + * lm_unregister_proto - Unregister a low-level locking protocol + * @proto: the protocol definition + * + */ + +void +lm_unregister_proto(struct lm_lockops *proto) +{ + struct list_head *tmp, *head; + struct lmh_wrapper *lw = NULL; + + down(&lmh_lock); + + for (head = &lmh_list, tmp = head->next; tmp != head; tmp = tmp->next) { + lw = list_entry(tmp, struct lmh_wrapper, lw_list); + + if (strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name) == 0) { + list_del(&lw->lw_list); + up(&lmh_lock); + kfree(lw); + return; + } + } + + up(&lmh_lock); + + printk("lock_harness: can't unregister lock protocol %s\n", + proto->lm_proto_name); +} + +/** + * lm_mount - Mount a lock protocol + * @proto_name - the name of the protocol + * @table_name - the name of the lock space + * @host_data - data specific to this host + * @cb - the callback to the code using the lock module + * @fsdata - data to pass back with the callback + * @min_lvb_size - the mininum LVB size that the caller can deal with + * @lockstruct - a structure returned describing the mount + * + * Returns: 0 on success, -EXXX on failure + */ + +int +lm_mount(char *proto_name, char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t * fsdata, + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct) +{ + struct list_head *tmp; + struct lmh_wrapper *lw = NULL; + int try = 0; + int error; + + retry: + down(&lmh_lock); + + for (tmp = lmh_list.next; tmp != &lmh_list; tmp = tmp->next) { + lw = list_entry(tmp, struct lmh_wrapper, lw_list); + + if (strcmp(lw->lw_ops->lm_proto_name, proto_name) == 0) + break; + else + lw = NULL; + } + + if (!lw) { + if (!try && capable(CAP_SYS_MODULE)) { + try = 1; + up(&lmh_lock); + request_module(proto_name); + goto retry; + } + printk("lock_harness: can't find protocol %s\n", proto_name); + error = -ENOENT; + goto out; + } + + if (!try_module_get(lw->lw_ops->lm_owner)) { + try = 0; + up(&lmh_lock); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto retry; + } + + error = lw->lw_ops->lm_mount(table_name, host_data, + cb, fsdata, min_lvb_size, lockstruct); + if (error) + module_put(lw->lw_ops->lm_owner); + + out: + up(&lmh_lock); + + return error; +} + +/** + * lm_unmount - unmount a lock module + * @lockstruct: the lockstruct passed into mount + * + */ + +void +lm_unmount(struct lm_lockstruct *lockstruct) +{ + down(&lmh_lock); + lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); + if (lockstruct->ls_ops->lm_owner) + module_put(lockstruct->ls_ops->lm_owner); + up(&lmh_lock); +} + +/** + * init_lmh - Initialize the lock module harness + * + * Returns: 0 on success, -EXXX on failure + */ + +int __init +init_lmh(void) +{ + init_MUTEX(&lmh_lock); + INIT_LIST_HEAD(&lmh_list); + + printk("Lock_Harness %s (built %s %s) installed\n", + RELEASE_NAME, __DATE__, __TIME__); + + return 0; +} + +/** + * exit_lmh - cleanup the Lock Module Harness + * + * Returns: 0 on success, -EXXX on failure + */ + +void __exit +exit_lmh(void) +{ +} + +module_init(init_lmh); +module_exit(exit_lmh); + +MODULE_DESCRIPTION("GFS Lock Module Harness " RELEASE_NAME); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL_GPL(lm_register_proto); +EXPORT_SYMBOL_GPL(lm_unregister_proto); +EXPORT_SYMBOL_GPL(lm_mount); +EXPORT_SYMBOL_GPL(lm_unmount); diff -urN linux-orig/include/linux/lm_interface.h linux-patched/include/linux/lm_interface.h --- linux-orig/include/linux/lm_interface.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/include/linux/lm_interface.h 2004-06-16 12:03:10.005672019 -0500 @@ -0,0 +1,193 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + + Sooner or later, I need to put all the documentation back into this file. + In the mean time, here are some notes. + + - The lock module is now responsible for STOMITHing the an expired + client before calling the callback with type LM_CB_NEED_RECOVERY. + + - If mount() operation returns first == TRUE, GFS will check all the + journals. GFS itself can't/shouldn't stomith the machines, so the lock module + needs to make sure that there are no zombie machines on any of the + journals. (i.e. this should probably be on the first mount of the lock + space where all mounts by other machines are blocked.) GFS will call + others_may_mount() when the filesystem is in a consistent state. + + - GFS can issue multiple simultaneous get_lock()s for the same lockname. + The lock module needs to deal with it, either by 1) building a hash table + to lookup the structures and keeping a reference count so there is only + on lm_lock_t for a given lockname. or 2) just dealing with multiple + lm_lock_t structures for a given lockname. + +*/ + +#ifndef __LM_INTERFACE_DOT_H__ +#define __LM_INTERFACE_DOT_H__ + +typedef void lm_lockspace_t; +typedef void lm_lock_t; +typedef void lm_fsdata_t; +typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type, + void *data); + +/* Flags for the struct lm_lockstruct->ls_flags field */ + +#define LM_LSFLAG_LOCAL (0x00000001) +#define LM_LSFLAG_ASYNC (0x00000002) + +/* Lock types */ + +#define LM_TYPE_RESERVED (0x00) +#define LM_TYPE_NONDISK (0x01) +#define LM_TYPE_INODE (0x02) +#define LM_TYPE_RGRP (0x03) +#define LM_TYPE_META (0x04) +#define LM_TYPE_IOPEN (0x05) +#define LM_TYPE_FLOCK (0x06) +#define LM_TYPE_PLOCK (0x07) +#define LM_TYPE_QUOTA (0x08) + +/* States passed to lock() */ + +#define LM_ST_UNLOCKED (0) +#define LM_ST_EXCLUSIVE (1) +#define LM_ST_DEFERRED (2) +#define LM_ST_SHARED (3) + +/* Flags passed to lock() */ + +#define LM_FLAG_TRY (0x00000001) +#define LM_FLAG_TRY_1CB (0x00000002) +#define LM_FLAG_NOEXP (0x00000004) +#define LM_FLAG_ANY (0x00000008) +#define LM_FLAG_PRIORITY (0x00000010) + +/* Flags returned by lock() */ + +#define LM_OUT_ST_MASK (0x00000003) +#define LM_OUT_CACHEABLE (0x00000004) +#define LM_OUT_CANCELED (0x00000008) +#define LM_OUT_NEED_E (0x00000010) +#define LM_OUT_NEED_D (0x00000020) +#define LM_OUT_NEED_S (0x00000040) +#define LM_OUT_ASYNC (0x00000080) +#define LM_OUT_LVB_INVALID (0x00000100) + +/* Callback types */ + +#define LM_CB_NEED_E (257) +#define LM_CB_NEED_D (258) +#define LM_CB_NEED_S (259) +#define LM_CB_NEED_RECOVERY (260) +#define LM_CB_DROPLOCKS (261) +#define LM_CB_ASYNC (262) + +/* Reset_exp messages */ + +#define LM_RD_GAVEUP (308) +#define LM_RD_SUCCESS (309) + +struct lm_lockname { + uint64_t ln_number; + unsigned int ln_type; +}; + +#define lm_name_equal(name1, name2) \ +(((name1)->ln_number == (name2)->ln_number) && \ + ((name1)->ln_type == (name2)->ln_type)) \ + +struct lm_async_cb { + struct lm_lockname lc_name; + int lc_ret; +}; + +struct lm_lockstruct; + +struct lm_lockops { + char lm_proto_name[256]; + + /* Mount/Unmount */ + + int (*lm_mount) (char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t *fsdata, + unsigned int min_lvb_size, + struct lm_lockstruct *lockstruct); + void (*lm_others_may_mount) (lm_lockspace_t *lockspace); + void (*lm_unmount) (lm_lockspace_t *lockspace); + + /* Lock oriented operations */ + + int (*lm_get_lock) (lm_lockspace_t *lockspace, + struct lm_lockname *name, lm_lock_t **lockp); + void (*lm_put_lock) (lm_lock_t *lock); + + unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state, + unsigned int req_state, unsigned int flags); + unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state); + + void (*lm_cancel) (lm_lock_t *lock); + + int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp); + void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb); + void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb); + + /* Posix Lock oriented operations */ + + int (*lm_plock_get) (lm_lockspace_t *lockspace, + struct lm_lockname *name, unsigned long owner, + uint64_t *start, uint64_t *end, int *exclusive, + unsigned long *rowner); + + int (*lm_plock) (lm_lockspace_t *lockspace, + struct lm_lockname *name, unsigned long owner, + int wait, int exclusive, uint64_t start, + uint64_t end); + + int (*lm_punlock) (lm_lockspace_t *lockspace, + struct lm_lockname *name, unsigned long owner, + uint64_t start, uint64_t end); + + /* Client oriented operations */ + + void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid, + unsigned int message); + + struct module *lm_owner; +}; + +struct lm_lockstruct { + unsigned int ls_jid; + unsigned int ls_first; + unsigned int ls_lvb_size; + lm_lockspace_t *ls_lockspace; + struct lm_lockops *ls_ops; + int ls_flags; +}; + +/* Bottom interface */ + +int lm_register_proto(struct lm_lockops *proto); +void lm_unregister_proto(struct lm_lockops *proto); + +/* Top interface */ + +int lm_mount(char *proto_name, + char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t *fsdata, + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct); +void lm_unmount(struct lm_lockstruct *lockstruct); + +#endif /* __LM_INTERFACE_DOT_H__ */ diff -urN linux-orig/fs/gfs_locking/lock_nolock/main.c linux-patched/fs/gfs_locking/lock_nolock/main.c --- linux-orig/fs/gfs_locking/lock_nolock/main.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-patched/fs/gfs_locking/lock_nolock/main.c 2004-06-16 12:03:13.918762838 -0500 @@ -0,0 +1,350 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#define RELEASE_NAME "" + +struct nolock_lockspace { + unsigned int nl_lvb_size; +}; + +struct lm_lockops nolock_ops; + +/** + * nolock_mount - mount a nolock lockspace + * @table_name: the name of the space to mount + * @host_data: host specific data + * @cb: the callback + * @lockstruct: the structure of crap to fill in + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +nolock_mount(char *table_name, char *host_data, + lm_callback_t cb, lm_fsdata_t *fsdata, + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct) +{ + char *c; + unsigned int jid; + struct nolock_lockspace *nl; + + /* If there is a "jid=" in the hostdata, return that jid. + Otherwise, return zero. */ + + c = strstr(host_data, "jid="); + if (!c) + jid = 0; + else { + c += 4; + sscanf(c, "%u", &jid); + } + + nl = kmalloc(sizeof(struct nolock_lockspace), GFP_KERNEL); + if (!nl) + return -ENOMEM; + + memset(nl, 0, sizeof(struct nolock_lockspace)); + nl->nl_lvb_size = min_lvb_size; + + lockstruct->ls_jid = jid; + lockstruct->ls_first = 1; + lockstruct->ls_lvb_size = min_lvb_size; + lockstruct->ls_lockspace = (lm_lockspace_t *)nl; + lockstruct->ls_ops = &nolock_ops; + lockstruct->ls_flags = LM_LSFLAG_LOCAL | LM_LSFLAG_ASYNC; + + return 0; +} + +/** + * nolock_others_may_mount - unmount a lock space + * @lockspace: the lockspace to unmount + * + */ + +static void +nolock_others_may_mount(lm_lockspace_t *lockspace) +{ +} + +/** + * nolock_unmount - unmount a lock space + * @lockspace: the lockspace to unmount + * + */ + +static void +nolock_unmount(lm_lockspace_t *lockspace) +{ + struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace; + kfree(nl); +} + +/** + * nolock_get_lock - get a lm_lock_t given a descripton of the lock + * @lockspace: the lockspace the lock lives in + * @name: the name of the lock + * @lockp: return the lm_lock_t here + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name, + lm_lock_t ** lockp) +{ + *lockp = (lm_lock_t *)lockspace; + return 0; +} + +/** + * nolock_put_lock - get rid of a lock structure + * @lock: the lock to throw away + * + */ + +static void +nolock_put_lock(lm_lock_t *lock) +{ +} + +/** + * nolock_lock - acquire a lock + * @lock: the lock to manipulate + * @cur_state: the current state + * @req_state: the requested state + * @flags: modifier flags + * + * Returns: A bitmap of LM_OUT_* + */ + +static unsigned int +nolock_lock(lm_lock_t *lock, unsigned int cur_state, unsigned int req_state, + unsigned int flags) +{ + return req_state | LM_OUT_CACHEABLE; +} + +/** + * nolock_unlock - unlock a lock + * @lock: the lock to manipulate + * @cur_state: the current state + * + * Returns: 0 + */ + +static unsigned int +nolock_unlock(lm_lock_t *lock, unsigned int cur_state) +{ + return 0; +} + +/** + * nolock_cancel - cancel a request on a lock + * @lock: the lock to cancel request for + * + */ + +static void +nolock_cancel(lm_lock_t *lock) +{ +} + +/** + * nolock_hold_lvb - hold on to a lock value block + * @lock: the lock the LVB is associated with + * @lvbp: return the lm_lvb_t here + * + * Returns: 0 on success, -EXXX on failure + */ + +static int +nolock_hold_lvb(lm_lock_t *lock, char **lvbp) +{ + struct nolock_lockspace *nl = (struct nolock_lockspace *)lock; + int error = 0; + + *lvbp = kmalloc(nl->nl_lvb_size, GFP_KERNEL); + if (*lvbp) + memset(*lvbp, 0, nl->nl_lvb_size); + else + error = -ENOMEM; + + return error; +} + +/** + * nolock_unhold_lvb - release a LVB + * @lock: the lock the LVB is associated with + * @lvb: the lock value block + * + */ + +static void +nolock_unhold_lvb(lm_lock_t *lock, char *lvb) +{ + kfree(lvb); +} + +/** + * nolock_sync_lvb - sync out the value of a lvb + * @lock: the lock the LVB is associated with + * @lvb: the lock value block + * + */ + +static void +nolock_sync_lvb(lm_lock_t *lock, char *lvb) +{ +} + +/** + * nolock_plock_get - + * @lockspace: the lockspace + * @name: + * @owner: + * @start: + * @end: + * @exclusive: + * @rowner: + * + */ + +static int +nolock_plock_get(lm_lockspace_t *lockspace, + struct lm_lockname *name, unsigned long owner, + uint64_t *start, uint64_t *end, int *exclusive, + unsigned long *rowner) +{ + return -ENOSYS; +} + +/** + * nolock_plock - + * @lockspace: the lockspace + * @name: + * @owner: + * @wait: + * @exclusive: + * @start: + * @end: + * + */ + +static int +nolock_plock(lm_lockspace_t *lockspace, + struct lm_lockname *name, unsigned long owner, + int wait, int exclusive, uint64_t start, + uint64_t end) +{ + return -ENOSYS; +} + +/** + * nolock_punlock - + * @lockspace: the lockspace + * @name: + * @owner: + * @start: + * @end: + * + */ + +static int +nolock_punlock(lm_lockspace_t *lockspace, + struct lm_lockname *name, unsigned long owner, + uint64_t start, uint64_t end) +{ + return -ENOSYS; +} + +/** + * nolock_recovery_done - reset the expired locks for a given jid + * @lockspace: the lockspace + * @jid: the jid + * + */ + +static void +nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid, + unsigned int message) +{ +} + +struct lm_lockops nolock_ops = { + .lm_proto_name = "lock_nolock", + .lm_mount = nolock_mount, + .lm_others_may_mount = nolock_others_may_mount, + .lm_unmount = nolock_unmount, + .lm_get_lock = nolock_get_lock, + .lm_put_lock = nolock_put_lock, + .lm_lock = nolock_lock, + .lm_unlock = nolock_unlock, + .lm_cancel = nolock_cancel, + .lm_hold_lvb = nolock_hold_lvb, + .lm_unhold_lvb = nolock_unhold_lvb, + .lm_sync_lvb = nolock_sync_lvb, + .lm_plock_get = nolock_plock_get, + .lm_plock = nolock_plock, + .lm_punlock = nolock_punlock, + .lm_recovery_done = nolock_recovery_done, + .lm_owner = THIS_MODULE, +}; + +/** + * init_nolock - Initialize the nolock module + * + * Returns: 0 on success, -EXXX on failure + */ + +int __init +init_nolock(void) +{ + int error; + + error = lm_register_proto(&nolock_ops); + if (error) { + printk("lock_nolock: can't register protocol: %d\n", error); + return error; + } + + printk("Lock_Nolock %s (built %s %s) installed\n", + RELEASE_NAME, __DATE__, __TIME__); + + return 0; +} + +/** + * exit_nolock - cleanup the nolock module + * + */ + +void __exit +exit_nolock(void) +{ + lm_unregister_proto(&nolock_ops); +} + +module_init(init_nolock); +module_exit(exit_nolock); + +MODULE_DESCRIPTION("GFS Nolock Locking Module " RELEASE_NAME); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL");